diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-05 17:47:31 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-05 17:47:31 -0400 |
| commit | 01227a889ed56ae53aeebb9f93be9d54dd8b2de8 (patch) | |
| tree | d5eba9359a9827e84d4112b84d48c54df5c5acde | |
| parent | 9e6879460c8edb0cd3c24c09b83d06541b5af0dc (diff) | |
| parent | db6ae6158186a17165ef990bda2895ae7594b039 (diff) | |
Merge tag 'kvm-3.10-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Gleb Natapov:
"Highlights of the updates are:
general:
- new emulated device API
- legacy device assignment is now optional
- irqfd interface is more generic and can be shared between arches
x86:
- VMCS shadow support and other nested VMX improvements
- APIC virtualization and Posted Interrupt hardware support
- Optimize mmio spte zapping
ppc:
- BookE: in-kernel MPIC emulation with irqfd support
- Book3S: in-kernel XICS emulation (incomplete)
- Book3S: HV: migration fixes
- BookE: more debug support preparation
- BookE: e6500 support
ARM:
- reworking of Hyp idmaps
s390:
- ioeventfd for virtio-ccw
And many other bug fixes, cleanups and improvements"
* tag 'kvm-3.10-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (204 commits)
kvm: Add compat_ioctl for device control API
KVM: x86: Account for failing enable_irq_window for NMI window request
KVM: PPC: Book3S: Add API for in-kernel XICS emulation
kvm/ppc/mpic: fix missing unlock in set_base_addr()
kvm/ppc: Hold srcu lock when calling kvm_io_bus_read/write
kvm/ppc/mpic: remove users
kvm/ppc/mpic: fix mmio region lists when multiple guests used
kvm/ppc/mpic: remove default routes from documentation
kvm: KVM_CAP_IOMMU only available with device assignment
ARM: KVM: iterate over all CPUs for CPU compatibility check
KVM: ARM: Fix spelling in error message
ARM: KVM: define KVM_ARM_MAX_VCPUS unconditionally
KVM: ARM: Fix API documentation for ONE_REG encoding
ARM: KVM: promote vfp_host pointer to generic host cpu context
ARM: KVM: add architecture specific hook for capabilities
ARM: KVM: perform HYP initilization for hotplugged CPUs
ARM: KVM: switch to a dual-step HYP init code
ARM: KVM: rework HYP page table freeing
ARM: KVM: enforce maximum size for identity mapped code
ARM: KVM: move to a KVM provided HYP idmap
...
110 files changed, 8522 insertions, 2270 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 119358dfb742..5f91eda91647 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
| @@ -1486,15 +1486,23 @@ struct kvm_ioeventfd { | |||
| 1486 | __u8 pad[36]; | 1486 | __u8 pad[36]; |
| 1487 | }; | 1487 | }; |
| 1488 | 1488 | ||
| 1489 | For the special case of virtio-ccw devices on s390, the ioevent is matched | ||
| 1490 | to a subchannel/virtqueue tuple instead. | ||
| 1491 | |||
| 1489 | The following flags are defined: | 1492 | The following flags are defined: |
| 1490 | 1493 | ||
| 1491 | #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch) | 1494 | #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch) |
| 1492 | #define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio) | 1495 | #define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio) |
| 1493 | #define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign) | 1496 | #define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign) |
| 1497 | #define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \ | ||
| 1498 | (1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify) | ||
| 1494 | 1499 | ||
| 1495 | If datamatch flag is set, the event will be signaled only if the written value | 1500 | If datamatch flag is set, the event will be signaled only if the written value |
| 1496 | to the registered address is equal to datamatch in struct kvm_ioeventfd. | 1501 | to the registered address is equal to datamatch in struct kvm_ioeventfd. |
| 1497 | 1502 | ||
| 1503 | For virtio-ccw devices, addr contains the subchannel id and datamatch the | ||
| 1504 | virtqueue index. | ||
| 1505 | |||
| 1498 | 1506 | ||
| 1499 | 4.60 KVM_DIRTY_TLB | 1507 | 4.60 KVM_DIRTY_TLB |
| 1500 | 1508 | ||
| @@ -1780,27 +1788,48 @@ registers, find a list below: | |||
| 1780 | PPC | KVM_REG_PPC_VPA_DTL | 128 | 1788 | PPC | KVM_REG_PPC_VPA_DTL | 128 |
| 1781 | PPC | KVM_REG_PPC_EPCR | 32 | 1789 | PPC | KVM_REG_PPC_EPCR | 32 |
| 1782 | PPC | KVM_REG_PPC_EPR | 32 | 1790 | PPC | KVM_REG_PPC_EPR | 32 |
| 1791 | PPC | KVM_REG_PPC_TCR | 32 | ||
| 1792 | PPC | KVM_REG_PPC_TSR | 32 | ||
| 1793 | PPC | KVM_REG_PPC_OR_TSR | 32 | ||
| 1794 | PPC | KVM_REG_PPC_CLEAR_TSR | 32 | ||
| 1795 | PPC | KVM_REG_PPC_MAS0 | 32 | ||
| 1796 | PPC | KVM_REG_PPC_MAS1 | 32 | ||
| 1797 | PPC | KVM_REG_PPC_MAS2 | 64 | ||
| 1798 | PPC | KVM_REG_PPC_MAS7_3 | 64 | ||
| 1799 | PPC | KVM_REG_PPC_MAS4 | 32 | ||
| 1800 | PPC | KVM_REG_PPC_MAS6 | 32 | ||
| 1801 | PPC | KVM_REG_PPC_MMUCFG | 32 | ||
| 1802 | PPC | KVM_REG_PPC_TLB0CFG | 32 | ||
| 1803 | PPC | KVM_REG_PPC_TLB1CFG | 32 | ||
| 1804 | PPC | KVM_REG_PPC_TLB2CFG | 32 | ||
| 1805 | PPC | KVM_REG_PPC_TLB3CFG | 32 | ||
| 1806 | PPC | KVM_REG_PPC_TLB0PS | 32 | ||
| 1807 | PPC | KVM_REG_PPC_TLB1PS | 32 | ||
| 1808 | PPC | KVM_REG_PPC_TLB2PS | 32 | ||
| 1809 | PPC | KVM_REG_PPC_TLB3PS | 32 | ||
| 1810 | PPC | KVM_REG_PPC_EPTCFG | 32 | ||
| 1811 | PPC | KVM_REG_PPC_ICP_STATE | 64 | ||
| 1783 | 1812 | ||
| 1784 | ARM registers are mapped using the lower 32 bits. The upper 16 of that | 1813 | ARM registers are mapped using the lower 32 bits. The upper 16 of that |
| 1785 | is the register group type, or coprocessor number: | 1814 | is the register group type, or coprocessor number: |
| 1786 | 1815 | ||
| 1787 | ARM core registers have the following id bit patterns: | 1816 | ARM core registers have the following id bit patterns: |
| 1788 | 0x4002 0000 0010 <index into the kvm_regs struct:16> | 1817 | 0x4020 0000 0010 <index into the kvm_regs struct:16> |
| 1789 | 1818 | ||
| 1790 | ARM 32-bit CP15 registers have the following id bit patterns: | 1819 | ARM 32-bit CP15 registers have the following id bit patterns: |
| 1791 | 0x4002 0000 000F <zero:1> <crn:4> <crm:4> <opc1:4> <opc2:3> | 1820 | 0x4020 0000 000F <zero:1> <crn:4> <crm:4> <opc1:4> <opc2:3> |
| 1792 | 1821 | ||
| 1793 | ARM 64-bit CP15 registers have the following id bit patterns: | 1822 | ARM 64-bit CP15 registers have the following id bit patterns: |
| 1794 | 0x4003 0000 000F <zero:1> <zero:4> <crm:4> <opc1:4> <zero:3> | 1823 | 0x4030 0000 000F <zero:1> <zero:4> <crm:4> <opc1:4> <zero:3> |
| 1795 | 1824 | ||
| 1796 | ARM CCSIDR registers are demultiplexed by CSSELR value: | 1825 | ARM CCSIDR registers are demultiplexed by CSSELR value: |
| 1797 | 0x4002 0000 0011 00 <csselr:8> | 1826 | 0x4020 0000 0011 00 <csselr:8> |
| 1798 | 1827 | ||
| 1799 | ARM 32-bit VFP control registers have the following id bit patterns: | 1828 | ARM 32-bit VFP control registers have the following id bit patterns: |
| 1800 | 0x4002 0000 0012 1 <regno:12> | 1829 | 0x4020 0000 0012 1 <regno:12> |
| 1801 | 1830 | ||
| 1802 | ARM 64-bit FP registers have the following id bit patterns: | 1831 | ARM 64-bit FP registers have the following id bit patterns: |
| 1803 | 0x4002 0000 0012 0 <regno:12> | 1832 | 0x4030 0000 0012 0 <regno:12> |
| 1804 | 1833 | ||
| 1805 | 4.69 KVM_GET_ONE_REG | 1834 | 4.69 KVM_GET_ONE_REG |
| 1806 | 1835 | ||
| @@ -2161,6 +2190,76 @@ header; first `n_valid' valid entries with contents from the data | |||
| 2161 | written, then `n_invalid' invalid entries, invalidating any previously | 2190 | written, then `n_invalid' invalid entries, invalidating any previously |
| 2162 | valid entries found. | 2191 | valid entries found. |
| 2163 | 2192 | ||
| 2193 | 4.79 KVM_CREATE_DEVICE | ||
| 2194 | |||
| 2195 | Capability: KVM_CAP_DEVICE_CTRL | ||
| 2196 | Type: vm ioctl | ||
| 2197 | Parameters: struct kvm_create_device (in/out) | ||
| 2198 | Returns: 0 on success, -1 on error | ||
| 2199 | Errors: | ||
| 2200 | ENODEV: The device type is unknown or unsupported | ||
| 2201 | EEXIST: Device already created, and this type of device may not | ||
| 2202 | be instantiated multiple times | ||
| 2203 | |||
| 2204 | Other error conditions may be defined by individual device types or | ||
| 2205 | have their standard meanings. | ||
| 2206 | |||
| 2207 | Creates an emulated device in the kernel. The file descriptor returned | ||
| 2208 | in fd can be used with KVM_SET/GET/HAS_DEVICE_ATTR. | ||
| 2209 | |||
| 2210 | If the KVM_CREATE_DEVICE_TEST flag is set, only test whether the | ||
| 2211 | device type is supported (not necessarily whether it can be created | ||
| 2212 | in the current vm). | ||
| 2213 | |||
| 2214 | Individual devices should not define flags. Attributes should be used | ||
| 2215 | for specifying any behavior that is not implied by the device type | ||
| 2216 | number. | ||
| 2217 | |||
| 2218 | struct kvm_create_device { | ||
| 2219 | __u32 type; /* in: KVM_DEV_TYPE_xxx */ | ||
| 2220 | __u32 fd; /* out: device handle */ | ||
| 2221 | __u32 flags; /* in: KVM_CREATE_DEVICE_xxx */ | ||
| 2222 | }; | ||
| 2223 | |||
| 2224 | 4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR | ||
| 2225 | |||
| 2226 | Capability: KVM_CAP_DEVICE_CTRL | ||
| 2227 | Type: device ioctl | ||
| 2228 | Parameters: struct kvm_device_attr | ||
| 2229 | Returns: 0 on success, -1 on error | ||
| 2230 | Errors: | ||
| 2231 | ENXIO: The group or attribute is unknown/unsupported for this device | ||
| 2232 | EPERM: The attribute cannot (currently) be accessed this way | ||
| 2233 | (e.g. read-only attribute, or attribute that only makes | ||
| 2234 | sense when the device is in a different state) | ||
| 2235 | |||
| 2236 | Other error conditions may be defined by individual device types. | ||
| 2237 | |||
| 2238 | Gets/sets a specified piece of device configuration and/or state. The | ||
| 2239 | semantics are device-specific. See individual device documentation in | ||
| 2240 | the "devices" directory. As with ONE_REG, the size of the data | ||
| 2241 | transferred is defined by the particular attribute. | ||
| 2242 | |||
| 2243 | struct kvm_device_attr { | ||
| 2244 | __u32 flags; /* no flags currently defined */ | ||
| 2245 | __u32 group; /* device-defined */ | ||
| 2246 | __u64 attr; /* group-defined */ | ||
| 2247 | __u64 addr; /* userspace address of attr data */ | ||
| 2248 | }; | ||
| 2249 | |||
| 2250 | 4.81 KVM_HAS_DEVICE_ATTR | ||
| 2251 | |||
| 2252 | Capability: KVM_CAP_DEVICE_CTRL | ||
| 2253 | Type: device ioctl | ||
| 2254 | Parameters: struct kvm_device_attr | ||
| 2255 | Returns: 0 on success, -1 on error | ||
| 2256 | Errors: | ||
| 2257 | ENXIO: The group or attribute is unknown/unsupported for this device | ||
| 2258 | |||
| 2259 | Tests whether a device supports a particular attribute. A successful | ||
| 2260 | return indicates the attribute is implemented. It does not necessarily | ||
| 2261 | indicate that the attribute can be read or written in the device's | ||
| 2262 | current state. "addr" is ignored. | ||
| 2164 | 2263 | ||
| 2165 | 4.77 KVM_ARM_VCPU_INIT | 2264 | 4.77 KVM_ARM_VCPU_INIT |
| 2166 | 2265 | ||
| @@ -2243,6 +2342,25 @@ and distributor interface, the ioctl must be called after calling | |||
| 2243 | KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs. Calling | 2342 | KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs. Calling |
| 2244 | this ioctl twice for any of the base addresses will return -EEXIST. | 2343 | this ioctl twice for any of the base addresses will return -EEXIST. |
| 2245 | 2344 | ||
| 2345 | 4.82 KVM_PPC_RTAS_DEFINE_TOKEN | ||
| 2346 | |||
| 2347 | Capability: KVM_CAP_PPC_RTAS | ||
| 2348 | Architectures: ppc | ||
| 2349 | Type: vm ioctl | ||
| 2350 | Parameters: struct kvm_rtas_token_args | ||
| 2351 | Returns: 0 on success, -1 on error | ||
| 2352 | |||
| 2353 | Defines a token value for a RTAS (Run Time Abstraction Services) | ||
| 2354 | service in order to allow it to be handled in the kernel. The | ||
| 2355 | argument struct gives the name of the service, which must be the name | ||
| 2356 | of a service that has a kernel-side implementation. If the token | ||
| 2357 | value is non-zero, it will be associated with that service, and | ||
| 2358 | subsequent RTAS calls by the guest specifying that token will be | ||
| 2359 | handled by the kernel. If the token value is 0, then any token | ||
| 2360 | associated with the service will be forgotten, and subsequent RTAS | ||
| 2361 | calls by the guest for that service will be passed to userspace to be | ||
| 2362 | handled. | ||
| 2363 | |||
| 2246 | 2364 | ||
| 2247 | 5. The kvm_run structure | 2365 | 5. The kvm_run structure |
| 2248 | ------------------------ | 2366 | ------------------------ |
| @@ -2646,3 +2764,19 @@ to receive the topmost interrupt vector. | |||
| 2646 | When disabled (args[0] == 0), behavior is as if this facility is unsupported. | 2764 | When disabled (args[0] == 0), behavior is as if this facility is unsupported. |
| 2647 | 2765 | ||
| 2648 | When this capability is enabled, KVM_EXIT_EPR can occur. | 2766 | When this capability is enabled, KVM_EXIT_EPR can occur. |
| 2767 | |||
| 2768 | 6.6 KVM_CAP_IRQ_MPIC | ||
| 2769 | |||
| 2770 | Architectures: ppc | ||
| 2771 | Parameters: args[0] is the MPIC device fd | ||
| 2772 | args[1] is the MPIC CPU number for this vcpu | ||
| 2773 | |||
| 2774 | This capability connects the vcpu to an in-kernel MPIC device. | ||
| 2775 | |||
| 2776 | 6.7 KVM_CAP_IRQ_XICS | ||
| 2777 | |||
| 2778 | Architectures: ppc | ||
| 2779 | Parameters: args[0] is the XICS device fd | ||
| 2780 | args[1] is the XICS CPU number (server ID) for this vcpu | ||
| 2781 | |||
| 2782 | This capability connects the vcpu to an in-kernel XICS device. | ||
diff --git a/Documentation/virtual/kvm/devices/README b/Documentation/virtual/kvm/devices/README new file mode 100644 index 000000000000..34a69834124a --- /dev/null +++ b/Documentation/virtual/kvm/devices/README | |||
| @@ -0,0 +1 @@ | |||
| This directory contains specific device bindings for KVM_CAP_DEVICE_CTRL. | |||
diff --git a/Documentation/virtual/kvm/devices/mpic.txt b/Documentation/virtual/kvm/devices/mpic.txt new file mode 100644 index 000000000000..8257397adc3c --- /dev/null +++ b/Documentation/virtual/kvm/devices/mpic.txt | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | MPIC interrupt controller | ||
| 2 | ========================= | ||
| 3 | |||
| 4 | Device types supported: | ||
| 5 | KVM_DEV_TYPE_FSL_MPIC_20 Freescale MPIC v2.0 | ||
| 6 | KVM_DEV_TYPE_FSL_MPIC_42 Freescale MPIC v4.2 | ||
| 7 | |||
| 8 | Only one MPIC instance, of any type, may be instantiated. The created | ||
| 9 | MPIC will act as the system interrupt controller, connecting to each | ||
| 10 | vcpu's interrupt inputs. | ||
| 11 | |||
| 12 | Groups: | ||
| 13 | KVM_DEV_MPIC_GRP_MISC | ||
| 14 | Attributes: | ||
| 15 | KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit) | ||
| 16 | Base address of the 256 KiB MPIC register space. Must be | ||
| 17 | naturally aligned. A value of zero disables the mapping. | ||
| 18 | Reset value is zero. | ||
| 19 | |||
| 20 | KVM_DEV_MPIC_GRP_REGISTER (rw, 32-bit) | ||
| 21 | Access an MPIC register, as if the access were made from the guest. | ||
| 22 | "attr" is the byte offset into the MPIC register space. Accesses | ||
| 23 | must be 4-byte aligned. | ||
| 24 | |||
| 25 | MSIs may be signaled by using this attribute group to write | ||
| 26 | to the relevant MSIIR. | ||
| 27 | |||
| 28 | KVM_DEV_MPIC_GRP_IRQ_ACTIVE (rw, 32-bit) | ||
| 29 | IRQ input line for each standard openpic source. 0 is inactive and 1 | ||
| 30 | is active, regardless of interrupt sense. | ||
| 31 | |||
| 32 | For edge-triggered interrupts: Writing 1 is considered an activating | ||
| 33 | edge, and writing 0 is ignored. Reading returns 1 if a previously | ||
| 34 | signaled edge has not been acknowledged, and 0 otherwise. | ||
| 35 | |||
| 36 | "attr" is the IRQ number. IRQ numbers for standard sources are the | ||
| 37 | byte offset of the relevant IVPR from EIVPR0, divided by 32. | ||
| 38 | |||
| 39 | IRQ Routing: | ||
| 40 | |||
| 41 | The MPIC emulation supports IRQ routing. Only a single MPIC device can | ||
| 42 | be instantiated. Once that device has been created, it's available as | ||
| 43 | irqchip id 0. | ||
| 44 | |||
| 45 | This irqchip 0 has 256 interrupt pins, which expose the interrupts in | ||
| 46 | the main array of interrupt sources (a.k.a. "SRC" interrupts). | ||
| 47 | |||
| 48 | The numbering is the same as the MPIC device tree binding -- based on | ||
| 49 | the register offset from the beginning of the sources array, without | ||
| 50 | regard to any subdivisions in chip documentation such as "internal" | ||
| 51 | or "external" interrupts. | ||
| 52 | |||
| 53 | Access to non-SRC interrupts is not implemented through IRQ routing mechanisms. | ||
diff --git a/Documentation/virtual/kvm/devices/xics.txt b/Documentation/virtual/kvm/devices/xics.txt new file mode 100644 index 000000000000..42864935ac5d --- /dev/null +++ b/Documentation/virtual/kvm/devices/xics.txt | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | XICS interrupt controller | ||
| 2 | |||
| 3 | Device type supported: KVM_DEV_TYPE_XICS | ||
| 4 | |||
| 5 | Groups: | ||
| 6 | KVM_DEV_XICS_SOURCES | ||
| 7 | Attributes: One per interrupt source, indexed by the source number. | ||
| 8 | |||
| 9 | This device emulates the XICS (eXternal Interrupt Controller | ||
| 10 | Specification) defined in PAPR. The XICS has a set of interrupt | ||
| 11 | sources, each identified by a 20-bit source number, and a set of | ||
| 12 | Interrupt Control Presentation (ICP) entities, also called "servers", | ||
| 13 | each associated with a virtual CPU. | ||
| 14 | |||
| 15 | The ICP entities are created by enabling the KVM_CAP_IRQ_ARCH | ||
| 16 | capability for each vcpu, specifying KVM_CAP_IRQ_XICS in args[0] and | ||
| 17 | the interrupt server number (i.e. the vcpu number from the XICS's | ||
| 18 | point of view) in args[1] of the kvm_enable_cap struct. Each ICP has | ||
| 19 | 64 bits of state which can be read and written using the | ||
| 20 | KVM_GET_ONE_REG and KVM_SET_ONE_REG ioctls on the vcpu. The 64 bit | ||
| 21 | state word has the following bitfields, starting at the | ||
| 22 | least-significant end of the word: | ||
| 23 | |||
| 24 | * Unused, 16 bits | ||
| 25 | |||
| 26 | * Pending interrupt priority, 8 bits | ||
| 27 | Zero is the highest priority, 255 means no interrupt is pending. | ||
| 28 | |||
| 29 | * Pending IPI (inter-processor interrupt) priority, 8 bits | ||
| 30 | Zero is the highest priority, 255 means no IPI is pending. | ||
| 31 | |||
| 32 | * Pending interrupt source number, 24 bits | ||
| 33 | Zero means no interrupt pending, 2 means an IPI is pending | ||
| 34 | |||
| 35 | * Current processor priority, 8 bits | ||
| 36 | Zero is the highest priority, meaning no interrupts can be | ||
| 37 | delivered, and 255 is the lowest priority. | ||
| 38 | |||
| 39 | Each source has 64 bits of state that can be read and written using | ||
| 40 | the KVM_GET_DEVICE_ATTR and KVM_SET_DEVICE_ATTR ioctls, specifying the | ||
| 41 | KVM_DEV_XICS_SOURCES attribute group, with the attribute number being | ||
| 42 | the interrupt source number. The 64 bit state word has the following | ||
| 43 | bitfields, starting from the least-significant end of the word: | ||
| 44 | |||
| 45 | * Destination (server number), 32 bits | ||
| 46 | This specifies where the interrupt should be sent, and is the | ||
| 47 | interrupt server number specified for the destination vcpu. | ||
| 48 | |||
| 49 | * Priority, 8 bits | ||
| 50 | This is the priority specified for this interrupt source, where 0 is | ||
| 51 | the highest priority and 255 is the lowest. An interrupt with a | ||
| 52 | priority of 255 will never be delivered. | ||
| 53 | |||
| 54 | * Level sensitive flag, 1 bit | ||
| 55 | This bit is 1 for a level-sensitive interrupt source, or 0 for | ||
| 56 | edge-sensitive (or MSI). | ||
| 57 | |||
| 58 | * Masked flag, 1 bit | ||
| 59 | This bit is set to 1 if the interrupt is masked (cannot be delivered | ||
| 60 | regardless of its priority), for example by the ibm,int-off RTAS | ||
| 61 | call, or 0 if it is not masked. | ||
| 62 | |||
| 63 | * Pending flag, 1 bit | ||
| 64 | This bit is 1 if the source has a pending interrupt, otherwise 0. | ||
| 65 | |||
| 66 | Only one XICS instance may be created per VM. | ||
diff --git a/arch/arm/include/asm/idmap.h b/arch/arm/include/asm/idmap.h index 1a66f907e5cc..bf863edb517d 100644 --- a/arch/arm/include/asm/idmap.h +++ b/arch/arm/include/asm/idmap.h | |||
| @@ -8,7 +8,6 @@ | |||
| 8 | #define __idmap __section(.idmap.text) noinline notrace | 8 | #define __idmap __section(.idmap.text) noinline notrace |
| 9 | 9 | ||
| 10 | extern pgd_t *idmap_pgd; | 10 | extern pgd_t *idmap_pgd; |
| 11 | extern pgd_t *hyp_pgd; | ||
| 12 | 11 | ||
| 13 | void setup_mm_for_reboot(void); | 12 | void setup_mm_for_reboot(void); |
| 14 | 13 | ||
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 0c4e643d939e..57cb786a6203 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h | |||
| @@ -87,7 +87,7 @@ struct kvm_vcpu_fault_info { | |||
| 87 | u32 hyp_pc; /* PC when exception was taken from Hyp mode */ | 87 | u32 hyp_pc; /* PC when exception was taken from Hyp mode */ |
| 88 | }; | 88 | }; |
| 89 | 89 | ||
| 90 | typedef struct vfp_hard_struct kvm_kernel_vfp_t; | 90 | typedef struct vfp_hard_struct kvm_cpu_context_t; |
| 91 | 91 | ||
| 92 | struct kvm_vcpu_arch { | 92 | struct kvm_vcpu_arch { |
| 93 | struct kvm_regs regs; | 93 | struct kvm_regs regs; |
| @@ -105,8 +105,10 @@ struct kvm_vcpu_arch { | |||
| 105 | struct kvm_vcpu_fault_info fault; | 105 | struct kvm_vcpu_fault_info fault; |
| 106 | 106 | ||
| 107 | /* Floating point registers (VFP and Advanced SIMD/NEON) */ | 107 | /* Floating point registers (VFP and Advanced SIMD/NEON) */ |
| 108 | kvm_kernel_vfp_t vfp_guest; | 108 | struct vfp_hard_struct vfp_guest; |
| 109 | kvm_kernel_vfp_t *vfp_host; | 109 | |
| 110 | /* Host FP context */ | ||
| 111 | kvm_cpu_context_t *host_cpu_context; | ||
| 110 | 112 | ||
| 111 | /* VGIC state */ | 113 | /* VGIC state */ |
| 112 | struct vgic_cpu vgic_cpu; | 114 | struct vgic_cpu vgic_cpu; |
| @@ -188,23 +190,38 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); | |||
| 188 | int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, | 190 | int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, |
| 189 | int exception_index); | 191 | int exception_index); |
| 190 | 192 | ||
| 191 | static inline void __cpu_init_hyp_mode(unsigned long long pgd_ptr, | 193 | static inline void __cpu_init_hyp_mode(unsigned long long boot_pgd_ptr, |
| 194 | unsigned long long pgd_ptr, | ||
| 192 | unsigned long hyp_stack_ptr, | 195 | unsigned long hyp_stack_ptr, |
| 193 | unsigned long vector_ptr) | 196 | unsigned long vector_ptr) |
| 194 | { | 197 | { |
| 195 | unsigned long pgd_low, pgd_high; | ||
| 196 | |||
| 197 | pgd_low = (pgd_ptr & ((1ULL << 32) - 1)); | ||
| 198 | pgd_high = (pgd_ptr >> 32ULL); | ||
| 199 | |||
| 200 | /* | 198 | /* |
| 201 | * Call initialization code, and switch to the full blown | 199 | * Call initialization code, and switch to the full blown HYP |
| 202 | * HYP code. The init code doesn't need to preserve these registers as | 200 | * code. The init code doesn't need to preserve these |
| 203 | * r1-r3 and r12 are already callee save according to the AAPCS. | 201 | * registers as r0-r3 are already callee saved according to |
| 204 | * Note that we slightly misuse the prototype by casing the pgd_low to | 202 | * the AAPCS. |
| 205 | * a void *. | 203 | * Note that we slightly misuse the prototype by casing the |
| 204 | * stack pointer to a void *. | ||
| 205 | * | ||
| 206 | * We don't have enough registers to perform the full init in | ||
| 207 | * one go. Install the boot PGD first, and then install the | ||
| 208 | * runtime PGD, stack pointer and vectors. The PGDs are always | ||
| 209 | * passed as the third argument, in order to be passed into | ||
| 210 | * r2-r3 to the init code (yes, this is compliant with the | ||
| 211 | * PCS!). | ||
| 206 | */ | 212 | */ |
| 207 | kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr); | 213 | |
| 214 | kvm_call_hyp(NULL, 0, boot_pgd_ptr); | ||
| 215 | |||
| 216 | kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr); | ||
| 208 | } | 217 | } |
| 209 | 218 | ||
| 219 | static inline int kvm_arch_dev_ioctl_check_extension(long ext) | ||
| 220 | { | ||
| 221 | return 0; | ||
| 222 | } | ||
| 223 | |||
| 224 | int kvm_perf_init(void); | ||
| 225 | int kvm_perf_teardown(void); | ||
| 226 | |||
| 210 | #endif /* __ARM_KVM_HOST_H__ */ | 227 | #endif /* __ARM_KVM_HOST_H__ */ |
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 970f3b5fa109..472ac7091003 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h | |||
| @@ -19,21 +19,33 @@ | |||
| 19 | #ifndef __ARM_KVM_MMU_H__ | 19 | #ifndef __ARM_KVM_MMU_H__ |
| 20 | #define __ARM_KVM_MMU_H__ | 20 | #define __ARM_KVM_MMU_H__ |
| 21 | 21 | ||
| 22 | #include <asm/cacheflush.h> | 22 | #include <asm/memory.h> |
| 23 | #include <asm/pgalloc.h> | 23 | #include <asm/page.h> |
| 24 | #include <asm/idmap.h> | ||
| 25 | 24 | ||
| 26 | /* | 25 | /* |
| 27 | * We directly use the kernel VA for the HYP, as we can directly share | 26 | * We directly use the kernel VA for the HYP, as we can directly share |
| 28 | * the mapping (HTTBR "covers" TTBR1). | 27 | * the mapping (HTTBR "covers" TTBR1). |
| 29 | */ | 28 | */ |
| 30 | #define HYP_PAGE_OFFSET_MASK (~0UL) | 29 | #define HYP_PAGE_OFFSET_MASK UL(~0) |
| 31 | #define HYP_PAGE_OFFSET PAGE_OFFSET | 30 | #define HYP_PAGE_OFFSET PAGE_OFFSET |
| 32 | #define KERN_TO_HYP(kva) (kva) | 31 | #define KERN_TO_HYP(kva) (kva) |
| 33 | 32 | ||
| 33 | /* | ||
| 34 | * Our virtual mapping for the boot-time MMU-enable code. Must be | ||
| 35 | * shared across all the page-tables. Conveniently, we use the vectors | ||
| 36 | * page, where no kernel data will ever be shared with HYP. | ||
| 37 | */ | ||
| 38 | #define TRAMPOLINE_VA UL(CONFIG_VECTORS_BASE) | ||
| 39 | |||
| 40 | #ifndef __ASSEMBLY__ | ||
| 41 | |||
| 42 | #include <asm/cacheflush.h> | ||
| 43 | #include <asm/pgalloc.h> | ||
| 44 | |||
| 34 | int create_hyp_mappings(void *from, void *to); | 45 | int create_hyp_mappings(void *from, void *to); |
| 35 | int create_hyp_io_mappings(void *from, void *to, phys_addr_t); | 46 | int create_hyp_io_mappings(void *from, void *to, phys_addr_t); |
| 36 | void free_hyp_pmds(void); | 47 | void free_boot_hyp_pgd(void); |
| 48 | void free_hyp_pgds(void); | ||
| 37 | 49 | ||
| 38 | int kvm_alloc_stage2_pgd(struct kvm *kvm); | 50 | int kvm_alloc_stage2_pgd(struct kvm *kvm); |
| 39 | void kvm_free_stage2_pgd(struct kvm *kvm); | 51 | void kvm_free_stage2_pgd(struct kvm *kvm); |
| @@ -45,6 +57,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); | |||
| 45 | void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); | 57 | void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); |
| 46 | 58 | ||
| 47 | phys_addr_t kvm_mmu_get_httbr(void); | 59 | phys_addr_t kvm_mmu_get_httbr(void); |
| 60 | phys_addr_t kvm_mmu_get_boot_httbr(void); | ||
| 61 | phys_addr_t kvm_get_idmap_vector(void); | ||
| 48 | int kvm_mmu_init(void); | 62 | int kvm_mmu_init(void); |
| 49 | void kvm_clear_hyp_idmap(void); | 63 | void kvm_clear_hyp_idmap(void); |
| 50 | 64 | ||
| @@ -114,4 +128,8 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) | |||
| 114 | } | 128 | } |
| 115 | } | 129 | } |
| 116 | 130 | ||
| 131 | #define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) | ||
| 132 | |||
| 133 | #endif /* !__ASSEMBLY__ */ | ||
| 134 | |||
| 117 | #endif /* __ARM_KVM_MMU_H__ */ | 135 | #endif /* __ARM_KVM_MMU_H__ */ |
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index a53efa993690..ee68cce6b48e 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c | |||
| @@ -158,7 +158,7 @@ int main(void) | |||
| 158 | DEFINE(VCPU_MIDR, offsetof(struct kvm_vcpu, arch.midr)); | 158 | DEFINE(VCPU_MIDR, offsetof(struct kvm_vcpu, arch.midr)); |
| 159 | DEFINE(VCPU_CP15, offsetof(struct kvm_vcpu, arch.cp15)); | 159 | DEFINE(VCPU_CP15, offsetof(struct kvm_vcpu, arch.cp15)); |
| 160 | DEFINE(VCPU_VFP_GUEST, offsetof(struct kvm_vcpu, arch.vfp_guest)); | 160 | DEFINE(VCPU_VFP_GUEST, offsetof(struct kvm_vcpu, arch.vfp_guest)); |
| 161 | DEFINE(VCPU_VFP_HOST, offsetof(struct kvm_vcpu, arch.vfp_host)); | 161 | DEFINE(VCPU_VFP_HOST, offsetof(struct kvm_vcpu, arch.host_cpu_context)); |
| 162 | DEFINE(VCPU_REGS, offsetof(struct kvm_vcpu, arch.regs)); | 162 | DEFINE(VCPU_REGS, offsetof(struct kvm_vcpu, arch.regs)); |
| 163 | DEFINE(VCPU_USR_REGS, offsetof(struct kvm_vcpu, arch.regs.usr_regs)); | 163 | DEFINE(VCPU_USR_REGS, offsetof(struct kvm_vcpu, arch.regs.usr_regs)); |
| 164 | DEFINE(VCPU_SVC_REGS, offsetof(struct kvm_vcpu, arch.regs.svc_regs)); | 164 | DEFINE(VCPU_SVC_REGS, offsetof(struct kvm_vcpu, arch.regs.svc_regs)); |
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index b571484e9f03..a871b8e00fca 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S | |||
| @@ -20,7 +20,7 @@ | |||
| 20 | VMLINUX_SYMBOL(__idmap_text_start) = .; \ | 20 | VMLINUX_SYMBOL(__idmap_text_start) = .; \ |
| 21 | *(.idmap.text) \ | 21 | *(.idmap.text) \ |
| 22 | VMLINUX_SYMBOL(__idmap_text_end) = .; \ | 22 | VMLINUX_SYMBOL(__idmap_text_end) = .; \ |
| 23 | ALIGN_FUNCTION(); \ | 23 | . = ALIGN(32); \ |
| 24 | VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \ | 24 | VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \ |
| 25 | *(.hyp.idmap.text) \ | 25 | *(.hyp.idmap.text) \ |
| 26 | VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; | 26 | VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; |
| @@ -315,3 +315,8 @@ SECTIONS | |||
| 315 | */ | 315 | */ |
| 316 | ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support") | 316 | ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support") |
| 317 | ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined") | 317 | ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined") |
| 318 | /* | ||
| 319 | * The HYP init code can't be more than a page long. | ||
| 320 | * The above comment applies as well. | ||
| 321 | */ | ||
| 322 | ASSERT(((__hyp_idmap_text_end - __hyp_idmap_text_start) <= PAGE_SIZE), "HYP init code too big") | ||
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 49dd64e579c2..370e1a8af6ac 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig | |||
| @@ -41,9 +41,9 @@ config KVM_ARM_HOST | |||
| 41 | Provides host support for ARM processors. | 41 | Provides host support for ARM processors. |
| 42 | 42 | ||
| 43 | config KVM_ARM_MAX_VCPUS | 43 | config KVM_ARM_MAX_VCPUS |
| 44 | int "Number maximum supported virtual CPUs per VM" | 44 | int "Number maximum supported virtual CPUs per VM" if KVM_ARM_HOST |
| 45 | depends on KVM_ARM_HOST | 45 | default 4 if KVM_ARM_HOST |
| 46 | default 4 | 46 | default 0 |
| 47 | help | 47 | help |
| 48 | Static number of max supported virtual CPUs per VM. | 48 | Static number of max supported virtual CPUs per VM. |
| 49 | 49 | ||
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 8dc5e76cb789..53c5ed83d16f 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile | |||
| @@ -18,6 +18,6 @@ kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) | |||
| 18 | 18 | ||
| 19 | obj-y += kvm-arm.o init.o interrupts.o | 19 | obj-y += kvm-arm.o init.o interrupts.o |
| 20 | obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o | 20 | obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o |
| 21 | obj-y += coproc.o coproc_a15.o mmio.o psci.o | 21 | obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o |
| 22 | obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o | 22 | obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o |
| 23 | obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o | 23 | obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o |
diff --git a/arch/arm/kvm/arch_timer.c b/arch/arm/kvm/arch_timer.c index 6ac938d46297..c55b6089e923 100644 --- a/arch/arm/kvm/arch_timer.c +++ b/arch/arm/kvm/arch_timer.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include <linux/kvm_host.h> | 22 | #include <linux/kvm_host.h> |
| 23 | #include <linux/interrupt.h> | 23 | #include <linux/interrupt.h> |
| 24 | 24 | ||
| 25 | #include <clocksource/arm_arch_timer.h> | ||
| 25 | #include <asm/arch_timer.h> | 26 | #include <asm/arch_timer.h> |
| 26 | 27 | ||
| 27 | #include <asm/kvm_vgic.h> | 28 | #include <asm/kvm_vgic.h> |
| @@ -64,7 +65,7 @@ static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) | |||
| 64 | { | 65 | { |
| 65 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 66 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
| 66 | 67 | ||
| 67 | timer->cntv_ctl |= 1 << 1; /* Mask the interrupt in the guest */ | 68 | timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; |
| 68 | kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, | 69 | kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, |
| 69 | vcpu->arch.timer_cpu.irq->irq, | 70 | vcpu->arch.timer_cpu.irq->irq, |
| 70 | vcpu->arch.timer_cpu.irq->level); | 71 | vcpu->arch.timer_cpu.irq->level); |
| @@ -133,8 +134,8 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) | |||
| 133 | cycle_t cval, now; | 134 | cycle_t cval, now; |
| 134 | u64 ns; | 135 | u64 ns; |
| 135 | 136 | ||
| 136 | /* Check if the timer is enabled and unmasked first */ | 137 | if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || |
| 137 | if ((timer->cntv_ctl & 3) != 1) | 138 | !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE)) |
| 138 | return; | 139 | return; |
| 139 | 140 | ||
| 140 | cval = timer->cntv_cval; | 141 | cval = timer->cntv_cval; |
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index a0dfc2a53f91..37d216d814cd 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | 16 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 17 | */ | 17 | */ |
| 18 | 18 | ||
| 19 | #include <linux/cpu.h> | ||
| 19 | #include <linux/errno.h> | 20 | #include <linux/errno.h> |
| 20 | #include <linux/err.h> | 21 | #include <linux/err.h> |
| 21 | #include <linux/kvm_host.h> | 22 | #include <linux/kvm_host.h> |
| @@ -48,7 +49,7 @@ __asm__(".arch_extension virt"); | |||
| 48 | #endif | 49 | #endif |
| 49 | 50 | ||
| 50 | static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); | 51 | static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); |
| 51 | static kvm_kernel_vfp_t __percpu *kvm_host_vfp_state; | 52 | static kvm_cpu_context_t __percpu *kvm_host_cpu_state; |
| 52 | static unsigned long hyp_default_vectors; | 53 | static unsigned long hyp_default_vectors; |
| 53 | 54 | ||
| 54 | /* Per-CPU variable containing the currently running vcpu. */ | 55 | /* Per-CPU variable containing the currently running vcpu. */ |
| @@ -206,7 +207,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 206 | r = KVM_MAX_VCPUS; | 207 | r = KVM_MAX_VCPUS; |
| 207 | break; | 208 | break; |
| 208 | default: | 209 | default: |
| 209 | r = 0; | 210 | r = kvm_arch_dev_ioctl_check_extension(ext); |
| 210 | break; | 211 | break; |
| 211 | } | 212 | } |
| 212 | return r; | 213 | return r; |
| @@ -218,27 +219,18 @@ long kvm_arch_dev_ioctl(struct file *filp, | |||
| 218 | return -EINVAL; | 219 | return -EINVAL; |
| 219 | } | 220 | } |
| 220 | 221 | ||
| 221 | int kvm_arch_set_memory_region(struct kvm *kvm, | ||
| 222 | struct kvm_userspace_memory_region *mem, | ||
| 223 | struct kvm_memory_slot old, | ||
| 224 | int user_alloc) | ||
| 225 | { | ||
| 226 | return 0; | ||
| 227 | } | ||
| 228 | |||
| 229 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 222 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
| 230 | struct kvm_memory_slot *memslot, | 223 | struct kvm_memory_slot *memslot, |
| 231 | struct kvm_memory_slot old, | ||
| 232 | struct kvm_userspace_memory_region *mem, | 224 | struct kvm_userspace_memory_region *mem, |
| 233 | bool user_alloc) | 225 | enum kvm_mr_change change) |
| 234 | { | 226 | { |
| 235 | return 0; | 227 | return 0; |
| 236 | } | 228 | } |
| 237 | 229 | ||
| 238 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 230 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
| 239 | struct kvm_userspace_memory_region *mem, | 231 | struct kvm_userspace_memory_region *mem, |
| 240 | struct kvm_memory_slot old, | 232 | const struct kvm_memory_slot *old, |
| 241 | bool user_alloc) | 233 | enum kvm_mr_change change) |
| 242 | { | 234 | { |
| 243 | } | 235 | } |
| 244 | 236 | ||
| @@ -326,7 +318,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | |||
| 326 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 318 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
| 327 | { | 319 | { |
| 328 | vcpu->cpu = cpu; | 320 | vcpu->cpu = cpu; |
| 329 | vcpu->arch.vfp_host = this_cpu_ptr(kvm_host_vfp_state); | 321 | vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state); |
| 330 | 322 | ||
| 331 | /* | 323 | /* |
| 332 | * Check whether this vcpu requires the cache to be flushed on | 324 | * Check whether this vcpu requires the cache to be flushed on |
| @@ -639,7 +631,8 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) | |||
| 639 | return 0; | 631 | return 0; |
| 640 | } | 632 | } |
| 641 | 633 | ||
| 642 | int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level) | 634 | int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, |
| 635 | bool line_status) | ||
| 643 | { | 636 | { |
| 644 | u32 irq = irq_level->irq; | 637 | u32 irq = irq_level->irq; |
| 645 | unsigned int irq_type, vcpu_idx, irq_num; | 638 | unsigned int irq_type, vcpu_idx, irq_num; |
| @@ -794,30 +787,48 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 794 | } | 787 | } |
| 795 | } | 788 | } |
| 796 | 789 | ||
| 797 | static void cpu_init_hyp_mode(void *vector) | 790 | static void cpu_init_hyp_mode(void *dummy) |
| 798 | { | 791 | { |
| 792 | unsigned long long boot_pgd_ptr; | ||
| 799 | unsigned long long pgd_ptr; | 793 | unsigned long long pgd_ptr; |
| 800 | unsigned long hyp_stack_ptr; | 794 | unsigned long hyp_stack_ptr; |
| 801 | unsigned long stack_page; | 795 | unsigned long stack_page; |
| 802 | unsigned long vector_ptr; | 796 | unsigned long vector_ptr; |
| 803 | 797 | ||
| 804 | /* Switch from the HYP stub to our own HYP init vector */ | 798 | /* Switch from the HYP stub to our own HYP init vector */ |
| 805 | __hyp_set_vectors((unsigned long)vector); | 799 | __hyp_set_vectors(kvm_get_idmap_vector()); |
| 806 | 800 | ||
| 801 | boot_pgd_ptr = (unsigned long long)kvm_mmu_get_boot_httbr(); | ||
| 807 | pgd_ptr = (unsigned long long)kvm_mmu_get_httbr(); | 802 | pgd_ptr = (unsigned long long)kvm_mmu_get_httbr(); |
| 808 | stack_page = __get_cpu_var(kvm_arm_hyp_stack_page); | 803 | stack_page = __get_cpu_var(kvm_arm_hyp_stack_page); |
| 809 | hyp_stack_ptr = stack_page + PAGE_SIZE; | 804 | hyp_stack_ptr = stack_page + PAGE_SIZE; |
| 810 | vector_ptr = (unsigned long)__kvm_hyp_vector; | 805 | vector_ptr = (unsigned long)__kvm_hyp_vector; |
| 811 | 806 | ||
| 812 | __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); | 807 | __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr); |
| 808 | } | ||
| 809 | |||
| 810 | static int hyp_init_cpu_notify(struct notifier_block *self, | ||
| 811 | unsigned long action, void *cpu) | ||
| 812 | { | ||
| 813 | switch (action) { | ||
| 814 | case CPU_STARTING: | ||
| 815 | case CPU_STARTING_FROZEN: | ||
| 816 | cpu_init_hyp_mode(NULL); | ||
| 817 | break; | ||
| 818 | } | ||
| 819 | |||
| 820 | return NOTIFY_OK; | ||
| 813 | } | 821 | } |
| 814 | 822 | ||
| 823 | static struct notifier_block hyp_init_cpu_nb = { | ||
| 824 | .notifier_call = hyp_init_cpu_notify, | ||
| 825 | }; | ||
| 826 | |||
| 815 | /** | 827 | /** |
| 816 | * Inits Hyp-mode on all online CPUs | 828 | * Inits Hyp-mode on all online CPUs |
| 817 | */ | 829 | */ |
| 818 | static int init_hyp_mode(void) | 830 | static int init_hyp_mode(void) |
| 819 | { | 831 | { |
| 820 | phys_addr_t init_phys_addr; | ||
| 821 | int cpu; | 832 | int cpu; |
| 822 | int err = 0; | 833 | int err = 0; |
| 823 | 834 | ||
| @@ -850,24 +861,6 @@ static int init_hyp_mode(void) | |||
| 850 | } | 861 | } |
| 851 | 862 | ||
| 852 | /* | 863 | /* |
| 853 | * Execute the init code on each CPU. | ||
| 854 | * | ||
| 855 | * Note: The stack is not mapped yet, so don't do anything else than | ||
| 856 | * initializing the hypervisor mode on each CPU using a local stack | ||
| 857 | * space for temporary storage. | ||
| 858 | */ | ||
| 859 | init_phys_addr = virt_to_phys(__kvm_hyp_init); | ||
| 860 | for_each_online_cpu(cpu) { | ||
| 861 | smp_call_function_single(cpu, cpu_init_hyp_mode, | ||
| 862 | (void *)(long)init_phys_addr, 1); | ||
| 863 | } | ||
| 864 | |||
| 865 | /* | ||
| 866 | * Unmap the identity mapping | ||
| 867 | */ | ||
| 868 | kvm_clear_hyp_idmap(); | ||
| 869 | |||
| 870 | /* | ||
| 871 | * Map the Hyp-code called directly from the host | 864 | * Map the Hyp-code called directly from the host |
| 872 | */ | 865 | */ |
| 873 | err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end); | 866 | err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end); |
| @@ -890,33 +883,38 @@ static int init_hyp_mode(void) | |||
| 890 | } | 883 | } |
| 891 | 884 | ||
| 892 | /* | 885 | /* |
| 893 | * Map the host VFP structures | 886 | * Map the host CPU structures |
| 894 | */ | 887 | */ |
| 895 | kvm_host_vfp_state = alloc_percpu(kvm_kernel_vfp_t); | 888 | kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t); |
| 896 | if (!kvm_host_vfp_state) { | 889 | if (!kvm_host_cpu_state) { |
| 897 | err = -ENOMEM; | 890 | err = -ENOMEM; |
| 898 | kvm_err("Cannot allocate host VFP state\n"); | 891 | kvm_err("Cannot allocate host CPU state\n"); |
| 899 | goto out_free_mappings; | 892 | goto out_free_mappings; |
| 900 | } | 893 | } |
| 901 | 894 | ||
| 902 | for_each_possible_cpu(cpu) { | 895 | for_each_possible_cpu(cpu) { |
| 903 | kvm_kernel_vfp_t *vfp; | 896 | kvm_cpu_context_t *cpu_ctxt; |
| 904 | 897 | ||
| 905 | vfp = per_cpu_ptr(kvm_host_vfp_state, cpu); | 898 | cpu_ctxt = per_cpu_ptr(kvm_host_cpu_state, cpu); |
| 906 | err = create_hyp_mappings(vfp, vfp + 1); | 899 | err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1); |
| 907 | 900 | ||
| 908 | if (err) { | 901 | if (err) { |
| 909 | kvm_err("Cannot map host VFP state: %d\n", err); | 902 | kvm_err("Cannot map host CPU state: %d\n", err); |
| 910 | goto out_free_vfp; | 903 | goto out_free_context; |
| 911 | } | 904 | } |
| 912 | } | 905 | } |
| 913 | 906 | ||
| 914 | /* | 907 | /* |
| 908 | * Execute the init code on each CPU. | ||
| 909 | */ | ||
| 910 | on_each_cpu(cpu_init_hyp_mode, NULL, 1); | ||
| 911 | |||
| 912 | /* | ||
| 915 | * Init HYP view of VGIC | 913 | * Init HYP view of VGIC |
| 916 | */ | 914 | */ |
| 917 | err = kvm_vgic_hyp_init(); | 915 | err = kvm_vgic_hyp_init(); |
| 918 | if (err) | 916 | if (err) |
| 919 | goto out_free_vfp; | 917 | goto out_free_context; |
| 920 | 918 | ||
| 921 | #ifdef CONFIG_KVM_ARM_VGIC | 919 | #ifdef CONFIG_KVM_ARM_VGIC |
| 922 | vgic_present = true; | 920 | vgic_present = true; |
| @@ -929,12 +927,19 @@ static int init_hyp_mode(void) | |||
| 929 | if (err) | 927 | if (err) |
| 930 | goto out_free_mappings; | 928 | goto out_free_mappings; |
| 931 | 929 | ||
| 930 | #ifndef CONFIG_HOTPLUG_CPU | ||
| 931 | free_boot_hyp_pgd(); | ||
| 932 | #endif | ||
| 933 | |||
| 934 | kvm_perf_init(); | ||
| 935 | |||
| 932 | kvm_info("Hyp mode initialized successfully\n"); | 936 | kvm_info("Hyp mode initialized successfully\n"); |
| 937 | |||
| 933 | return 0; | 938 | return 0; |
| 934 | out_free_vfp: | 939 | out_free_context: |
| 935 | free_percpu(kvm_host_vfp_state); | 940 | free_percpu(kvm_host_cpu_state); |
| 936 | out_free_mappings: | 941 | out_free_mappings: |
| 937 | free_hyp_pmds(); | 942 | free_hyp_pgds(); |
| 938 | out_free_stack_pages: | 943 | out_free_stack_pages: |
| 939 | for_each_possible_cpu(cpu) | 944 | for_each_possible_cpu(cpu) |
| 940 | free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); | 945 | free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); |
| @@ -943,27 +948,42 @@ out_err: | |||
| 943 | return err; | 948 | return err; |
| 944 | } | 949 | } |
| 945 | 950 | ||
| 951 | static void check_kvm_target_cpu(void *ret) | ||
| 952 | { | ||
| 953 | *(int *)ret = kvm_target_cpu(); | ||
| 954 | } | ||
| 955 | |||
| 946 | /** | 956 | /** |
| 947 | * Initialize Hyp-mode and memory mappings on all CPUs. | 957 | * Initialize Hyp-mode and memory mappings on all CPUs. |
| 948 | */ | 958 | */ |
| 949 | int kvm_arch_init(void *opaque) | 959 | int kvm_arch_init(void *opaque) |
| 950 | { | 960 | { |
| 951 | int err; | 961 | int err; |
| 962 | int ret, cpu; | ||
| 952 | 963 | ||
| 953 | if (!is_hyp_mode_available()) { | 964 | if (!is_hyp_mode_available()) { |
| 954 | kvm_err("HYP mode not available\n"); | 965 | kvm_err("HYP mode not available\n"); |
| 955 | return -ENODEV; | 966 | return -ENODEV; |
| 956 | } | 967 | } |
| 957 | 968 | ||
| 958 | if (kvm_target_cpu() < 0) { | 969 | for_each_online_cpu(cpu) { |
| 959 | kvm_err("Target CPU not supported!\n"); | 970 | smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1); |
| 960 | return -ENODEV; | 971 | if (ret < 0) { |
| 972 | kvm_err("Error, CPU %d not supported!\n", cpu); | ||
| 973 | return -ENODEV; | ||
| 974 | } | ||
| 961 | } | 975 | } |
| 962 | 976 | ||
| 963 | err = init_hyp_mode(); | 977 | err = init_hyp_mode(); |
| 964 | if (err) | 978 | if (err) |
| 965 | goto out_err; | 979 | goto out_err; |
| 966 | 980 | ||
| 981 | err = register_cpu_notifier(&hyp_init_cpu_nb); | ||
| 982 | if (err) { | ||
| 983 | kvm_err("Cannot register HYP init CPU notifier (%d)\n", err); | ||
| 984 | goto out_err; | ||
| 985 | } | ||
| 986 | |||
| 967 | kvm_coproc_table_init(); | 987 | kvm_coproc_table_init(); |
| 968 | return 0; | 988 | return 0; |
| 969 | out_err: | 989 | out_err: |
| @@ -973,6 +993,7 @@ out_err: | |||
| 973 | /* NOP: Compiling as a module not supported */ | 993 | /* NOP: Compiling as a module not supported */ |
| 974 | void kvm_arch_exit(void) | 994 | void kvm_arch_exit(void) |
| 975 | { | 995 | { |
| 996 | kvm_perf_teardown(); | ||
| 976 | } | 997 | } |
| 977 | 998 | ||
| 978 | static int arm_init(void) | 999 | static int arm_init(void) |
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S index 9f37a79b880b..f048338135f7 100644 --- a/arch/arm/kvm/init.S +++ b/arch/arm/kvm/init.S | |||
| @@ -21,13 +21,33 @@ | |||
| 21 | #include <asm/asm-offsets.h> | 21 | #include <asm/asm-offsets.h> |
| 22 | #include <asm/kvm_asm.h> | 22 | #include <asm/kvm_asm.h> |
| 23 | #include <asm/kvm_arm.h> | 23 | #include <asm/kvm_arm.h> |
| 24 | #include <asm/kvm_mmu.h> | ||
| 24 | 25 | ||
| 25 | /******************************************************************** | 26 | /******************************************************************** |
| 26 | * Hypervisor initialization | 27 | * Hypervisor initialization |
| 27 | * - should be called with: | 28 | * - should be called with: |
| 28 | * r0,r1 = Hypervisor pgd pointer | 29 | * r0 = top of Hyp stack (kernel VA) |
| 29 | * r2 = top of Hyp stack (kernel VA) | 30 | * r1 = pointer to hyp vectors |
| 30 | * r3 = pointer to hyp vectors | 31 | * r2,r3 = Hypervisor pgd pointer |
| 32 | * | ||
| 33 | * The init scenario is: | ||
| 34 | * - We jump in HYP with four parameters: boot HYP pgd, runtime HYP pgd, | ||
| 35 | * runtime stack, runtime vectors | ||
| 36 | * - Enable the MMU with the boot pgd | ||
| 37 | * - Jump to a target into the trampoline page (remember, this is the same | ||
| 38 | * physical page!) | ||
| 39 | * - Now switch to the runtime pgd (same VA, and still the same physical | ||
| 40 | * page!) | ||
| 41 | * - Invalidate TLBs | ||
| 42 | * - Set stack and vectors | ||
| 43 | * - Profit! (or eret, if you only care about the code). | ||
| 44 | * | ||
| 45 | * As we only have four registers available to pass parameters (and we | ||
| 46 | * need six), we split the init in two phases: | ||
| 47 | * - Phase 1: r0 = 0, r1 = 0, r2,r3 contain the boot PGD. | ||
| 48 | * Provides the basic HYP init, and enable the MMU. | ||
| 49 | * - Phase 2: r0 = ToS, r1 = vectors, r2,r3 contain the runtime PGD. | ||
| 50 | * Switches to the runtime PGD, set stack and vectors. | ||
| 31 | */ | 51 | */ |
| 32 | 52 | ||
| 33 | .text | 53 | .text |
| @@ -47,22 +67,25 @@ __kvm_hyp_init: | |||
| 47 | W(b) . | 67 | W(b) . |
| 48 | 68 | ||
| 49 | __do_hyp_init: | 69 | __do_hyp_init: |
| 70 | cmp r0, #0 @ We have a SP? | ||
| 71 | bne phase2 @ Yes, second stage init | ||
| 72 | |||
| 50 | @ Set the HTTBR to point to the hypervisor PGD pointer passed | 73 | @ Set the HTTBR to point to the hypervisor PGD pointer passed |
| 51 | mcrr p15, 4, r0, r1, c2 | 74 | mcrr p15, 4, r2, r3, c2 |
| 52 | 75 | ||
| 53 | @ Set the HTCR and VTCR to the same shareability and cacheability | 76 | @ Set the HTCR and VTCR to the same shareability and cacheability |
| 54 | @ settings as the non-secure TTBCR and with T0SZ == 0. | 77 | @ settings as the non-secure TTBCR and with T0SZ == 0. |
| 55 | mrc p15, 4, r0, c2, c0, 2 @ HTCR | 78 | mrc p15, 4, r0, c2, c0, 2 @ HTCR |
| 56 | ldr r12, =HTCR_MASK | 79 | ldr r2, =HTCR_MASK |
| 57 | bic r0, r0, r12 | 80 | bic r0, r0, r2 |
| 58 | mrc p15, 0, r1, c2, c0, 2 @ TTBCR | 81 | mrc p15, 0, r1, c2, c0, 2 @ TTBCR |
| 59 | and r1, r1, #(HTCR_MASK & ~TTBCR_T0SZ) | 82 | and r1, r1, #(HTCR_MASK & ~TTBCR_T0SZ) |
| 60 | orr r0, r0, r1 | 83 | orr r0, r0, r1 |
| 61 | mcr p15, 4, r0, c2, c0, 2 @ HTCR | 84 | mcr p15, 4, r0, c2, c0, 2 @ HTCR |
| 62 | 85 | ||
| 63 | mrc p15, 4, r1, c2, c1, 2 @ VTCR | 86 | mrc p15, 4, r1, c2, c1, 2 @ VTCR |
| 64 | ldr r12, =VTCR_MASK | 87 | ldr r2, =VTCR_MASK |
| 65 | bic r1, r1, r12 | 88 | bic r1, r1, r2 |
| 66 | bic r0, r0, #(~VTCR_HTCR_SH) @ clear non-reusable HTCR bits | 89 | bic r0, r0, #(~VTCR_HTCR_SH) @ clear non-reusable HTCR bits |
| 67 | orr r1, r0, r1 | 90 | orr r1, r0, r1 |
| 68 | orr r1, r1, #(KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S) | 91 | orr r1, r1, #(KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S) |
| @@ -85,24 +108,41 @@ __do_hyp_init: | |||
| 85 | @ - Memory alignment checks: enabled | 108 | @ - Memory alignment checks: enabled |
| 86 | @ - MMU: enabled (this code must be run from an identity mapping) | 109 | @ - MMU: enabled (this code must be run from an identity mapping) |
| 87 | mrc p15, 4, r0, c1, c0, 0 @ HSCR | 110 | mrc p15, 4, r0, c1, c0, 0 @ HSCR |
| 88 | ldr r12, =HSCTLR_MASK | 111 | ldr r2, =HSCTLR_MASK |
| 89 | bic r0, r0, r12 | 112 | bic r0, r0, r2 |
| 90 | mrc p15, 0, r1, c1, c0, 0 @ SCTLR | 113 | mrc p15, 0, r1, c1, c0, 0 @ SCTLR |
| 91 | ldr r12, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C) | 114 | ldr r2, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C) |
| 92 | and r1, r1, r12 | 115 | and r1, r1, r2 |
| 93 | ARM( ldr r12, =(HSCTLR_M | HSCTLR_A) ) | 116 | ARM( ldr r2, =(HSCTLR_M | HSCTLR_A) ) |
| 94 | THUMB( ldr r12, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) ) | 117 | THUMB( ldr r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) ) |
| 95 | orr r1, r1, r12 | 118 | orr r1, r1, r2 |
| 96 | orr r0, r0, r1 | 119 | orr r0, r0, r1 |
| 97 | isb | 120 | isb |
| 98 | mcr p15, 4, r0, c1, c0, 0 @ HSCR | 121 | mcr p15, 4, r0, c1, c0, 0 @ HSCR |
| 99 | isb | ||
| 100 | 122 | ||
| 101 | @ Set stack pointer and return to the kernel | 123 | @ End of init phase-1 |
| 102 | mov sp, r2 | 124 | eret |
| 125 | |||
| 126 | phase2: | ||
| 127 | @ Set stack pointer | ||
| 128 | mov sp, r0 | ||
| 103 | 129 | ||
| 104 | @ Set HVBAR to point to the HYP vectors | 130 | @ Set HVBAR to point to the HYP vectors |
| 105 | mcr p15, 4, r3, c12, c0, 0 @ HVBAR | 131 | mcr p15, 4, r1, c12, c0, 0 @ HVBAR |
| 132 | |||
| 133 | @ Jump to the trampoline page | ||
| 134 | ldr r0, =TRAMPOLINE_VA | ||
| 135 | adr r1, target | ||
| 136 | bfi r0, r1, #0, #PAGE_SHIFT | ||
| 137 | mov pc, r0 | ||
| 138 | |||
| 139 | target: @ We're now in the trampoline code, switch page tables | ||
| 140 | mcrr p15, 4, r2, r3, c2 | ||
| 141 | isb | ||
| 142 | |||
| 143 | @ Invalidate the old TLBs | ||
| 144 | mcr p15, 4, r0, c8, c7, 0 @ TLBIALLH | ||
| 145 | dsb | ||
| 106 | 146 | ||
| 107 | eret | 147 | eret |
| 108 | 148 | ||
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 2f12e4056408..965706578f13 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c | |||
| @@ -32,8 +32,15 @@ | |||
| 32 | 32 | ||
| 33 | extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; | 33 | extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; |
| 34 | 34 | ||
| 35 | static pgd_t *boot_hyp_pgd; | ||
| 36 | static pgd_t *hyp_pgd; | ||
| 35 | static DEFINE_MUTEX(kvm_hyp_pgd_mutex); | 37 | static DEFINE_MUTEX(kvm_hyp_pgd_mutex); |
| 36 | 38 | ||
| 39 | static void *init_bounce_page; | ||
| 40 | static unsigned long hyp_idmap_start; | ||
| 41 | static unsigned long hyp_idmap_end; | ||
| 42 | static phys_addr_t hyp_idmap_vector; | ||
| 43 | |||
| 37 | static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) | 44 | static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) |
| 38 | { | 45 | { |
| 39 | kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); | 46 | kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); |
| @@ -71,172 +78,224 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) | |||
| 71 | return p; | 78 | return p; |
| 72 | } | 79 | } |
| 73 | 80 | ||
| 74 | static void free_ptes(pmd_t *pmd, unsigned long addr) | 81 | static void clear_pud_entry(pud_t *pud) |
| 75 | { | 82 | { |
| 76 | pte_t *pte; | 83 | pmd_t *pmd_table = pmd_offset(pud, 0); |
| 77 | unsigned int i; | 84 | pud_clear(pud); |
| 85 | pmd_free(NULL, pmd_table); | ||
| 86 | put_page(virt_to_page(pud)); | ||
| 87 | } | ||
| 78 | 88 | ||
| 79 | for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) { | 89 | static void clear_pmd_entry(pmd_t *pmd) |
| 80 | if (!pmd_none(*pmd) && pmd_table(*pmd)) { | 90 | { |
| 81 | pte = pte_offset_kernel(pmd, addr); | 91 | pte_t *pte_table = pte_offset_kernel(pmd, 0); |
| 82 | pte_free_kernel(NULL, pte); | 92 | pmd_clear(pmd); |
| 83 | } | 93 | pte_free_kernel(NULL, pte_table); |
| 84 | pmd++; | 94 | put_page(virt_to_page(pmd)); |
| 95 | } | ||
| 96 | |||
| 97 | static bool pmd_empty(pmd_t *pmd) | ||
| 98 | { | ||
| 99 | struct page *pmd_page = virt_to_page(pmd); | ||
| 100 | return page_count(pmd_page) == 1; | ||
| 101 | } | ||
| 102 | |||
| 103 | static void clear_pte_entry(pte_t *pte) | ||
| 104 | { | ||
| 105 | if (pte_present(*pte)) { | ||
| 106 | kvm_set_pte(pte, __pte(0)); | ||
| 107 | put_page(virt_to_page(pte)); | ||
| 85 | } | 108 | } |
| 86 | } | 109 | } |
| 87 | 110 | ||
| 88 | static void free_hyp_pgd_entry(unsigned long addr) | 111 | static bool pte_empty(pte_t *pte) |
| 112 | { | ||
| 113 | struct page *pte_page = virt_to_page(pte); | ||
| 114 | return page_count(pte_page) == 1; | ||
| 115 | } | ||
| 116 | |||
| 117 | static void unmap_range(pgd_t *pgdp, unsigned long long start, u64 size) | ||
| 89 | { | 118 | { |
| 90 | pgd_t *pgd; | 119 | pgd_t *pgd; |
| 91 | pud_t *pud; | 120 | pud_t *pud; |
| 92 | pmd_t *pmd; | 121 | pmd_t *pmd; |
| 93 | unsigned long hyp_addr = KERN_TO_HYP(addr); | 122 | pte_t *pte; |
| 123 | unsigned long long addr = start, end = start + size; | ||
| 124 | u64 range; | ||
| 125 | |||
| 126 | while (addr < end) { | ||
| 127 | pgd = pgdp + pgd_index(addr); | ||
| 128 | pud = pud_offset(pgd, addr); | ||
| 129 | if (pud_none(*pud)) { | ||
| 130 | addr += PUD_SIZE; | ||
| 131 | continue; | ||
| 132 | } | ||
| 94 | 133 | ||
| 95 | pgd = hyp_pgd + pgd_index(hyp_addr); | 134 | pmd = pmd_offset(pud, addr); |
| 96 | pud = pud_offset(pgd, hyp_addr); | 135 | if (pmd_none(*pmd)) { |
| 136 | addr += PMD_SIZE; | ||
| 137 | continue; | ||
| 138 | } | ||
| 97 | 139 | ||
| 98 | if (pud_none(*pud)) | 140 | pte = pte_offset_kernel(pmd, addr); |
| 99 | return; | 141 | clear_pte_entry(pte); |
| 100 | BUG_ON(pud_bad(*pud)); | 142 | range = PAGE_SIZE; |
| 101 | 143 | ||
| 102 | pmd = pmd_offset(pud, hyp_addr); | 144 | /* If we emptied the pte, walk back up the ladder */ |
| 103 | free_ptes(pmd, addr); | 145 | if (pte_empty(pte)) { |
| 104 | pmd_free(NULL, pmd); | 146 | clear_pmd_entry(pmd); |
| 105 | pud_clear(pud); | 147 | range = PMD_SIZE; |
| 148 | if (pmd_empty(pmd)) { | ||
| 149 | clear_pud_entry(pud); | ||
| 150 | range = PUD_SIZE; | ||
| 151 | } | ||
| 152 | } | ||
| 153 | |||
| 154 | addr += range; | ||
| 155 | } | ||
| 106 | } | 156 | } |
| 107 | 157 | ||
| 108 | /** | 158 | /** |
| 109 | * free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables | 159 | * free_boot_hyp_pgd - free HYP boot page tables |
| 110 | * | 160 | * |
| 111 | * Assumes this is a page table used strictly in Hyp-mode and therefore contains | 161 | * Free the HYP boot page tables. The bounce page is also freed. |
| 112 | * either mappings in the kernel memory area (above PAGE_OFFSET), or | ||
| 113 | * device mappings in the vmalloc range (from VMALLOC_START to VMALLOC_END). | ||
| 114 | */ | 162 | */ |
| 115 | void free_hyp_pmds(void) | 163 | void free_boot_hyp_pgd(void) |
| 116 | { | 164 | { |
| 117 | unsigned long addr; | ||
| 118 | |||
| 119 | mutex_lock(&kvm_hyp_pgd_mutex); | 165 | mutex_lock(&kvm_hyp_pgd_mutex); |
| 120 | for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) | 166 | |
| 121 | free_hyp_pgd_entry(addr); | 167 | if (boot_hyp_pgd) { |
| 122 | for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) | 168 | unmap_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); |
| 123 | free_hyp_pgd_entry(addr); | 169 | unmap_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); |
| 170 | kfree(boot_hyp_pgd); | ||
| 171 | boot_hyp_pgd = NULL; | ||
| 172 | } | ||
| 173 | |||
| 174 | if (hyp_pgd) | ||
| 175 | unmap_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); | ||
| 176 | |||
| 177 | kfree(init_bounce_page); | ||
| 178 | init_bounce_page = NULL; | ||
| 179 | |||
| 124 | mutex_unlock(&kvm_hyp_pgd_mutex); | 180 | mutex_unlock(&kvm_hyp_pgd_mutex); |
| 125 | } | 181 | } |
| 126 | 182 | ||
| 127 | static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, | 183 | /** |
| 128 | unsigned long end) | 184 | * free_hyp_pgds - free Hyp-mode page tables |
| 185 | * | ||
| 186 | * Assumes hyp_pgd is a page table used strictly in Hyp-mode and | ||
| 187 | * therefore contains either mappings in the kernel memory area (above | ||
| 188 | * PAGE_OFFSET), or device mappings in the vmalloc range (from | ||
| 189 | * VMALLOC_START to VMALLOC_END). | ||
| 190 | * | ||
| 191 | * boot_hyp_pgd should only map two pages for the init code. | ||
| 192 | */ | ||
| 193 | void free_hyp_pgds(void) | ||
| 129 | { | 194 | { |
| 130 | pte_t *pte; | ||
| 131 | unsigned long addr; | 195 | unsigned long addr; |
| 132 | struct page *page; | ||
| 133 | 196 | ||
| 134 | for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { | 197 | free_boot_hyp_pgd(); |
| 135 | unsigned long hyp_addr = KERN_TO_HYP(addr); | 198 | |
| 199 | mutex_lock(&kvm_hyp_pgd_mutex); | ||
| 136 | 200 | ||
| 137 | pte = pte_offset_kernel(pmd, hyp_addr); | 201 | if (hyp_pgd) { |
| 138 | BUG_ON(!virt_addr_valid(addr)); | 202 | for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) |
| 139 | page = virt_to_page(addr); | 203 | unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); |
| 140 | kvm_set_pte(pte, mk_pte(page, PAGE_HYP)); | 204 | for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) |
| 205 | unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); | ||
| 206 | kfree(hyp_pgd); | ||
| 207 | hyp_pgd = NULL; | ||
| 141 | } | 208 | } |
| 209 | |||
| 210 | mutex_unlock(&kvm_hyp_pgd_mutex); | ||
| 142 | } | 211 | } |
| 143 | 212 | ||
| 144 | static void create_hyp_io_pte_mappings(pmd_t *pmd, unsigned long start, | 213 | static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, |
| 145 | unsigned long end, | 214 | unsigned long end, unsigned long pfn, |
| 146 | unsigned long *pfn_base) | 215 | pgprot_t prot) |
| 147 | { | 216 | { |
| 148 | pte_t *pte; | 217 | pte_t *pte; |
| 149 | unsigned long addr; | 218 | unsigned long addr; |
| 150 | 219 | ||
| 151 | for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { | 220 | addr = start; |
| 152 | unsigned long hyp_addr = KERN_TO_HYP(addr); | 221 | do { |
| 153 | 222 | pte = pte_offset_kernel(pmd, addr); | |
| 154 | pte = pte_offset_kernel(pmd, hyp_addr); | 223 | kvm_set_pte(pte, pfn_pte(pfn, prot)); |
| 155 | BUG_ON(pfn_valid(*pfn_base)); | 224 | get_page(virt_to_page(pte)); |
| 156 | kvm_set_pte(pte, pfn_pte(*pfn_base, PAGE_HYP_DEVICE)); | 225 | kvm_flush_dcache_to_poc(pte, sizeof(*pte)); |
| 157 | (*pfn_base)++; | 226 | pfn++; |
| 158 | } | 227 | } while (addr += PAGE_SIZE, addr != end); |
| 159 | } | 228 | } |
| 160 | 229 | ||
| 161 | static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, | 230 | static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, |
| 162 | unsigned long end, unsigned long *pfn_base) | 231 | unsigned long end, unsigned long pfn, |
| 232 | pgprot_t prot) | ||
| 163 | { | 233 | { |
| 164 | pmd_t *pmd; | 234 | pmd_t *pmd; |
| 165 | pte_t *pte; | 235 | pte_t *pte; |
| 166 | unsigned long addr, next; | 236 | unsigned long addr, next; |
| 167 | 237 | ||
| 168 | for (addr = start; addr < end; addr = next) { | 238 | addr = start; |
| 169 | unsigned long hyp_addr = KERN_TO_HYP(addr); | 239 | do { |
| 170 | pmd = pmd_offset(pud, hyp_addr); | 240 | pmd = pmd_offset(pud, addr); |
| 171 | 241 | ||
| 172 | BUG_ON(pmd_sect(*pmd)); | 242 | BUG_ON(pmd_sect(*pmd)); |
| 173 | 243 | ||
| 174 | if (pmd_none(*pmd)) { | 244 | if (pmd_none(*pmd)) { |
| 175 | pte = pte_alloc_one_kernel(NULL, hyp_addr); | 245 | pte = pte_alloc_one_kernel(NULL, addr); |
| 176 | if (!pte) { | 246 | if (!pte) { |
| 177 | kvm_err("Cannot allocate Hyp pte\n"); | 247 | kvm_err("Cannot allocate Hyp pte\n"); |
| 178 | return -ENOMEM; | 248 | return -ENOMEM; |
| 179 | } | 249 | } |
| 180 | pmd_populate_kernel(NULL, pmd, pte); | 250 | pmd_populate_kernel(NULL, pmd, pte); |
| 251 | get_page(virt_to_page(pmd)); | ||
| 252 | kvm_flush_dcache_to_poc(pmd, sizeof(*pmd)); | ||
| 181 | } | 253 | } |
| 182 | 254 | ||
| 183 | next = pmd_addr_end(addr, end); | 255 | next = pmd_addr_end(addr, end); |
| 184 | 256 | ||
| 185 | /* | 257 | create_hyp_pte_mappings(pmd, addr, next, pfn, prot); |
| 186 | * If pfn_base is NULL, we map kernel pages into HYP with the | 258 | pfn += (next - addr) >> PAGE_SHIFT; |
| 187 | * virtual address. Otherwise, this is considered an I/O | 259 | } while (addr = next, addr != end); |
| 188 | * mapping and we map the physical region starting at | ||
| 189 | * *pfn_base to [start, end[. | ||
| 190 | */ | ||
| 191 | if (!pfn_base) | ||
| 192 | create_hyp_pte_mappings(pmd, addr, next); | ||
| 193 | else | ||
| 194 | create_hyp_io_pte_mappings(pmd, addr, next, pfn_base); | ||
| 195 | } | ||
| 196 | 260 | ||
| 197 | return 0; | 261 | return 0; |
| 198 | } | 262 | } |
| 199 | 263 | ||
| 200 | static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base) | 264 | static int __create_hyp_mappings(pgd_t *pgdp, |
| 265 | unsigned long start, unsigned long end, | ||
| 266 | unsigned long pfn, pgprot_t prot) | ||
| 201 | { | 267 | { |
| 202 | unsigned long start = (unsigned long)from; | ||
| 203 | unsigned long end = (unsigned long)to; | ||
| 204 | pgd_t *pgd; | 268 | pgd_t *pgd; |
| 205 | pud_t *pud; | 269 | pud_t *pud; |
| 206 | pmd_t *pmd; | 270 | pmd_t *pmd; |
| 207 | unsigned long addr, next; | 271 | unsigned long addr, next; |
| 208 | int err = 0; | 272 | int err = 0; |
| 209 | 273 | ||
| 210 | if (start >= end) | ||
| 211 | return -EINVAL; | ||
| 212 | /* Check for a valid kernel memory mapping */ | ||
| 213 | if (!pfn_base && (!virt_addr_valid(from) || !virt_addr_valid(to - 1))) | ||
| 214 | return -EINVAL; | ||
| 215 | /* Check for a valid kernel IO mapping */ | ||
| 216 | if (pfn_base && (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))) | ||
| 217 | return -EINVAL; | ||
| 218 | |||
| 219 | mutex_lock(&kvm_hyp_pgd_mutex); | 274 | mutex_lock(&kvm_hyp_pgd_mutex); |
| 220 | for (addr = start; addr < end; addr = next) { | 275 | addr = start & PAGE_MASK; |
| 221 | unsigned long hyp_addr = KERN_TO_HYP(addr); | 276 | end = PAGE_ALIGN(end); |
| 222 | pgd = hyp_pgd + pgd_index(hyp_addr); | 277 | do { |
| 223 | pud = pud_offset(pgd, hyp_addr); | 278 | pgd = pgdp + pgd_index(addr); |
| 279 | pud = pud_offset(pgd, addr); | ||
| 224 | 280 | ||
| 225 | if (pud_none_or_clear_bad(pud)) { | 281 | if (pud_none_or_clear_bad(pud)) { |
| 226 | pmd = pmd_alloc_one(NULL, hyp_addr); | 282 | pmd = pmd_alloc_one(NULL, addr); |
| 227 | if (!pmd) { | 283 | if (!pmd) { |
| 228 | kvm_err("Cannot allocate Hyp pmd\n"); | 284 | kvm_err("Cannot allocate Hyp pmd\n"); |
| 229 | err = -ENOMEM; | 285 | err = -ENOMEM; |
| 230 | goto out; | 286 | goto out; |
| 231 | } | 287 | } |
| 232 | pud_populate(NULL, pud, pmd); | 288 | pud_populate(NULL, pud, pmd); |
| 289 | get_page(virt_to_page(pud)); | ||
| 290 | kvm_flush_dcache_to_poc(pud, sizeof(*pud)); | ||
| 233 | } | 291 | } |
| 234 | 292 | ||
| 235 | next = pgd_addr_end(addr, end); | 293 | next = pgd_addr_end(addr, end); |
| 236 | err = create_hyp_pmd_mappings(pud, addr, next, pfn_base); | 294 | err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot); |
| 237 | if (err) | 295 | if (err) |
| 238 | goto out; | 296 | goto out; |
| 239 | } | 297 | pfn += (next - addr) >> PAGE_SHIFT; |
| 298 | } while (addr = next, addr != end); | ||
| 240 | out: | 299 | out: |
| 241 | mutex_unlock(&kvm_hyp_pgd_mutex); | 300 | mutex_unlock(&kvm_hyp_pgd_mutex); |
| 242 | return err; | 301 | return err; |
| @@ -250,27 +309,41 @@ out: | |||
| 250 | * The same virtual address as the kernel virtual address is also used | 309 | * The same virtual address as the kernel virtual address is also used |
| 251 | * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying | 310 | * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying |
| 252 | * physical pages. | 311 | * physical pages. |
| 253 | * | ||
| 254 | * Note: Wrapping around zero in the "to" address is not supported. | ||
| 255 | */ | 312 | */ |
| 256 | int create_hyp_mappings(void *from, void *to) | 313 | int create_hyp_mappings(void *from, void *to) |
| 257 | { | 314 | { |
| 258 | return __create_hyp_mappings(from, to, NULL); | 315 | unsigned long phys_addr = virt_to_phys(from); |
| 316 | unsigned long start = KERN_TO_HYP((unsigned long)from); | ||
| 317 | unsigned long end = KERN_TO_HYP((unsigned long)to); | ||
| 318 | |||
| 319 | /* Check for a valid kernel memory mapping */ | ||
| 320 | if (!virt_addr_valid(from) || !virt_addr_valid(to - 1)) | ||
| 321 | return -EINVAL; | ||
| 322 | |||
| 323 | return __create_hyp_mappings(hyp_pgd, start, end, | ||
| 324 | __phys_to_pfn(phys_addr), PAGE_HYP); | ||
| 259 | } | 325 | } |
| 260 | 326 | ||
| 261 | /** | 327 | /** |
| 262 | * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode | 328 | * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode |
| 263 | * @from: The kernel start VA of the range | 329 | * @from: The kernel start VA of the range |
| 264 | * @to: The kernel end VA of the range (exclusive) | 330 | * @to: The kernel end VA of the range (exclusive) |
| 265 | * @addr: The physical start address which gets mapped | 331 | * @phys_addr: The physical start address which gets mapped |
| 266 | * | 332 | * |
| 267 | * The resulting HYP VA is the same as the kernel VA, modulo | 333 | * The resulting HYP VA is the same as the kernel VA, modulo |
| 268 | * HYP_PAGE_OFFSET. | 334 | * HYP_PAGE_OFFSET. |
| 269 | */ | 335 | */ |
| 270 | int create_hyp_io_mappings(void *from, void *to, phys_addr_t addr) | 336 | int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) |
| 271 | { | 337 | { |
| 272 | unsigned long pfn = __phys_to_pfn(addr); | 338 | unsigned long start = KERN_TO_HYP((unsigned long)from); |
| 273 | return __create_hyp_mappings(from, to, &pfn); | 339 | unsigned long end = KERN_TO_HYP((unsigned long)to); |
| 340 | |||
| 341 | /* Check for a valid kernel IO mapping */ | ||
| 342 | if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)) | ||
| 343 | return -EINVAL; | ||
| 344 | |||
| 345 | return __create_hyp_mappings(hyp_pgd, start, end, | ||
| 346 | __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); | ||
| 274 | } | 347 | } |
| 275 | 348 | ||
| 276 | /** | 349 | /** |
| @@ -307,42 +380,6 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) | |||
| 307 | return 0; | 380 | return 0; |
| 308 | } | 381 | } |
| 309 | 382 | ||
| 310 | static void clear_pud_entry(pud_t *pud) | ||
| 311 | { | ||
| 312 | pmd_t *pmd_table = pmd_offset(pud, 0); | ||
| 313 | pud_clear(pud); | ||
| 314 | pmd_free(NULL, pmd_table); | ||
| 315 | put_page(virt_to_page(pud)); | ||
| 316 | } | ||
| 317 | |||
| 318 | static void clear_pmd_entry(pmd_t *pmd) | ||
| 319 | { | ||
| 320 | pte_t *pte_table = pte_offset_kernel(pmd, 0); | ||
| 321 | pmd_clear(pmd); | ||
| 322 | pte_free_kernel(NULL, pte_table); | ||
| 323 | put_page(virt_to_page(pmd)); | ||
| 324 | } | ||
| 325 | |||
| 326 | static bool pmd_empty(pmd_t *pmd) | ||
| 327 | { | ||
| 328 | struct page *pmd_page = virt_to_page(pmd); | ||
| 329 | return page_count(pmd_page) == 1; | ||
| 330 | } | ||
| 331 | |||
| 332 | static void clear_pte_entry(pte_t *pte) | ||
| 333 | { | ||
| 334 | if (pte_present(*pte)) { | ||
| 335 | kvm_set_pte(pte, __pte(0)); | ||
| 336 | put_page(virt_to_page(pte)); | ||
| 337 | } | ||
| 338 | } | ||
| 339 | |||
| 340 | static bool pte_empty(pte_t *pte) | ||
| 341 | { | ||
| 342 | struct page *pte_page = virt_to_page(pte); | ||
| 343 | return page_count(pte_page) == 1; | ||
| 344 | } | ||
| 345 | |||
| 346 | /** | 383 | /** |
| 347 | * unmap_stage2_range -- Clear stage2 page table entries to unmap a range | 384 | * unmap_stage2_range -- Clear stage2 page table entries to unmap a range |
| 348 | * @kvm: The VM pointer | 385 | * @kvm: The VM pointer |
| @@ -356,43 +393,7 @@ static bool pte_empty(pte_t *pte) | |||
| 356 | */ | 393 | */ |
| 357 | static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) | 394 | static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) |
| 358 | { | 395 | { |
| 359 | pgd_t *pgd; | 396 | unmap_range(kvm->arch.pgd, start, size); |
| 360 | pud_t *pud; | ||
| 361 | pmd_t *pmd; | ||
| 362 | pte_t *pte; | ||
| 363 | phys_addr_t addr = start, end = start + size; | ||
| 364 | u64 range; | ||
| 365 | |||
| 366 | while (addr < end) { | ||
| 367 | pgd = kvm->arch.pgd + pgd_index(addr); | ||
| 368 | pud = pud_offset(pgd, addr); | ||
| 369 | if (pud_none(*pud)) { | ||
| 370 | addr += PUD_SIZE; | ||
| 371 | continue; | ||
| 372 | } | ||
| 373 | |||
| 374 | pmd = pmd_offset(pud, addr); | ||
| 375 | if (pmd_none(*pmd)) { | ||
| 376 | addr += PMD_SIZE; | ||
| 377 | continue; | ||
| 378 | } | ||
| 379 | |||
| 380 | pte = pte_offset_kernel(pmd, addr); | ||
| 381 | clear_pte_entry(pte); | ||
| 382 | range = PAGE_SIZE; | ||
| 383 | |||
| 384 | /* If we emptied the pte, walk back up the ladder */ | ||
| 385 | if (pte_empty(pte)) { | ||
| 386 | clear_pmd_entry(pmd); | ||
| 387 | range = PMD_SIZE; | ||
| 388 | if (pmd_empty(pmd)) { | ||
| 389 | clear_pud_entry(pud); | ||
| 390 | range = PUD_SIZE; | ||
| 391 | } | ||
| 392 | } | ||
| 393 | |||
| 394 | addr += range; | ||
| 395 | } | ||
| 396 | } | 397 | } |
| 397 | 398 | ||
| 398 | /** | 399 | /** |
| @@ -728,47 +729,105 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) | |||
| 728 | 729 | ||
| 729 | phys_addr_t kvm_mmu_get_httbr(void) | 730 | phys_addr_t kvm_mmu_get_httbr(void) |
| 730 | { | 731 | { |
| 731 | VM_BUG_ON(!virt_addr_valid(hyp_pgd)); | ||
| 732 | return virt_to_phys(hyp_pgd); | 732 | return virt_to_phys(hyp_pgd); |
| 733 | } | 733 | } |
| 734 | 734 | ||
| 735 | phys_addr_t kvm_mmu_get_boot_httbr(void) | ||
| 736 | { | ||
| 737 | return virt_to_phys(boot_hyp_pgd); | ||
| 738 | } | ||
| 739 | |||
| 740 | phys_addr_t kvm_get_idmap_vector(void) | ||
| 741 | { | ||
| 742 | return hyp_idmap_vector; | ||
| 743 | } | ||
| 744 | |||
| 735 | int kvm_mmu_init(void) | 745 | int kvm_mmu_init(void) |
| 736 | { | 746 | { |
| 737 | if (!hyp_pgd) { | 747 | int err; |
| 748 | |||
| 749 | hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start); | ||
| 750 | hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end); | ||
| 751 | hyp_idmap_vector = virt_to_phys(__kvm_hyp_init); | ||
| 752 | |||
| 753 | if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) { | ||
| 754 | /* | ||
| 755 | * Our init code is crossing a page boundary. Allocate | ||
| 756 | * a bounce page, copy the code over and use that. | ||
| 757 | */ | ||
| 758 | size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start; | ||
| 759 | phys_addr_t phys_base; | ||
| 760 | |||
| 761 | init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL); | ||
| 762 | if (!init_bounce_page) { | ||
| 763 | kvm_err("Couldn't allocate HYP init bounce page\n"); | ||
| 764 | err = -ENOMEM; | ||
| 765 | goto out; | ||
| 766 | } | ||
| 767 | |||
| 768 | memcpy(init_bounce_page, __hyp_idmap_text_start, len); | ||
| 769 | /* | ||
| 770 | * Warning: the code we just copied to the bounce page | ||
| 771 | * must be flushed to the point of coherency. | ||
| 772 | * Otherwise, the data may be sitting in L2, and HYP | ||
| 773 | * mode won't be able to observe it as it runs with | ||
| 774 | * caches off at that point. | ||
| 775 | */ | ||
| 776 | kvm_flush_dcache_to_poc(init_bounce_page, len); | ||
| 777 | |||
| 778 | phys_base = virt_to_phys(init_bounce_page); | ||
| 779 | hyp_idmap_vector += phys_base - hyp_idmap_start; | ||
| 780 | hyp_idmap_start = phys_base; | ||
| 781 | hyp_idmap_end = phys_base + len; | ||
| 782 | |||
| 783 | kvm_info("Using HYP init bounce page @%lx\n", | ||
| 784 | (unsigned long)phys_base); | ||
| 785 | } | ||
| 786 | |||
| 787 | hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); | ||
| 788 | boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); | ||
| 789 | if (!hyp_pgd || !boot_hyp_pgd) { | ||
| 738 | kvm_err("Hyp mode PGD not allocated\n"); | 790 | kvm_err("Hyp mode PGD not allocated\n"); |
| 739 | return -ENOMEM; | 791 | err = -ENOMEM; |
| 792 | goto out; | ||
| 740 | } | 793 | } |
| 741 | 794 | ||
| 742 | return 0; | 795 | /* Create the idmap in the boot page tables */ |
| 743 | } | 796 | err = __create_hyp_mappings(boot_hyp_pgd, |
| 797 | hyp_idmap_start, hyp_idmap_end, | ||
| 798 | __phys_to_pfn(hyp_idmap_start), | ||
| 799 | PAGE_HYP); | ||
| 744 | 800 | ||
| 745 | /** | 801 | if (err) { |
| 746 | * kvm_clear_idmap - remove all idmaps from the hyp pgd | 802 | kvm_err("Failed to idmap %lx-%lx\n", |
| 747 | * | 803 | hyp_idmap_start, hyp_idmap_end); |
| 748 | * Free the underlying pmds for all pgds in range and clear the pgds (but | 804 | goto out; |
| 749 | * don't free them) afterwards. | 805 | } |
| 750 | */ | ||
| 751 | void kvm_clear_hyp_idmap(void) | ||
| 752 | { | ||
| 753 | unsigned long addr, end; | ||
| 754 | unsigned long next; | ||
| 755 | pgd_t *pgd = hyp_pgd; | ||
| 756 | pud_t *pud; | ||
| 757 | pmd_t *pmd; | ||
| 758 | 806 | ||
| 759 | addr = virt_to_phys(__hyp_idmap_text_start); | 807 | /* Map the very same page at the trampoline VA */ |
| 760 | end = virt_to_phys(__hyp_idmap_text_end); | 808 | err = __create_hyp_mappings(boot_hyp_pgd, |
| 809 | TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE, | ||
| 810 | __phys_to_pfn(hyp_idmap_start), | ||
| 811 | PAGE_HYP); | ||
| 812 | if (err) { | ||
| 813 | kvm_err("Failed to map trampoline @%lx into boot HYP pgd\n", | ||
| 814 | TRAMPOLINE_VA); | ||
| 815 | goto out; | ||
| 816 | } | ||
| 761 | 817 | ||
| 762 | pgd += pgd_index(addr); | 818 | /* Map the same page again into the runtime page tables */ |
| 763 | do { | 819 | err = __create_hyp_mappings(hyp_pgd, |
| 764 | next = pgd_addr_end(addr, end); | 820 | TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE, |
| 765 | if (pgd_none_or_clear_bad(pgd)) | 821 | __phys_to_pfn(hyp_idmap_start), |
| 766 | continue; | 822 | PAGE_HYP); |
| 767 | pud = pud_offset(pgd, addr); | 823 | if (err) { |
| 768 | pmd = pmd_offset(pud, addr); | 824 | kvm_err("Failed to map trampoline @%lx into runtime HYP pgd\n", |
| 825 | TRAMPOLINE_VA); | ||
| 826 | goto out; | ||
| 827 | } | ||
| 769 | 828 | ||
| 770 | pud_clear(pud); | 829 | return 0; |
| 771 | kvm_clean_pmd_entry(pmd); | 830 | out: |
| 772 | pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK)); | 831 | free_hyp_pgds(); |
| 773 | } while (pgd++, addr = next, addr < end); | 832 | return err; |
| 774 | } | 833 | } |
diff --git a/arch/arm/kvm/perf.c b/arch/arm/kvm/perf.c new file mode 100644 index 000000000000..1a3849da0b4b --- /dev/null +++ b/arch/arm/kvm/perf.c | |||
| @@ -0,0 +1,68 @@ | |||
| 1 | /* | ||
| 2 | * Based on the x86 implementation. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2012 ARM Ltd. | ||
| 5 | * Author: Marc Zyngier <marc.zyngier@arm.com> | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or modify | ||
| 8 | * it under the terms of the GNU General Public License version 2 as | ||
| 9 | * published by the Free Software Foundation. | ||
| 10 | * | ||
| 11 | * This program is distributed in the hope that it will be useful, | ||
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 14 | * GNU General Public License for more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU General Public License | ||
| 17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 18 | */ | ||
| 19 | |||
| 20 | #include <linux/perf_event.h> | ||
| 21 | #include <linux/kvm_host.h> | ||
| 22 | |||
| 23 | #include <asm/kvm_emulate.h> | ||
| 24 | |||
| 25 | static int kvm_is_in_guest(void) | ||
| 26 | { | ||
| 27 | return kvm_arm_get_running_vcpu() != NULL; | ||
| 28 | } | ||
| 29 | |||
| 30 | static int kvm_is_user_mode(void) | ||
| 31 | { | ||
| 32 | struct kvm_vcpu *vcpu; | ||
| 33 | |||
| 34 | vcpu = kvm_arm_get_running_vcpu(); | ||
| 35 | |||
| 36 | if (vcpu) | ||
| 37 | return !vcpu_mode_priv(vcpu); | ||
| 38 | |||
| 39 | return 0; | ||
| 40 | } | ||
| 41 | |||
| 42 | static unsigned long kvm_get_guest_ip(void) | ||
| 43 | { | ||
| 44 | struct kvm_vcpu *vcpu; | ||
| 45 | |||
| 46 | vcpu = kvm_arm_get_running_vcpu(); | ||
| 47 | |||
| 48 | if (vcpu) | ||
| 49 | return *vcpu_pc(vcpu); | ||
| 50 | |||
| 51 | return 0; | ||
| 52 | } | ||
| 53 | |||
| 54 | static struct perf_guest_info_callbacks kvm_guest_cbs = { | ||
| 55 | .is_in_guest = kvm_is_in_guest, | ||
| 56 | .is_user_mode = kvm_is_user_mode, | ||
| 57 | .get_guest_ip = kvm_get_guest_ip, | ||
| 58 | }; | ||
| 59 | |||
| 60 | int kvm_perf_init(void) | ||
| 61 | { | ||
| 62 | return perf_register_guest_info_callbacks(&kvm_guest_cbs); | ||
| 63 | } | ||
| 64 | |||
| 65 | int kvm_perf_teardown(void) | ||
| 66 | { | ||
| 67 | return perf_unregister_guest_info_callbacks(&kvm_guest_cbs); | ||
| 68 | } | ||
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index 5ee505c937d1..83cb3ac27095 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c | |||
| @@ -8,7 +8,6 @@ | |||
| 8 | #include <asm/pgtable.h> | 8 | #include <asm/pgtable.h> |
| 9 | #include <asm/sections.h> | 9 | #include <asm/sections.h> |
| 10 | #include <asm/system_info.h> | 10 | #include <asm/system_info.h> |
| 11 | #include <asm/virt.h> | ||
| 12 | 11 | ||
| 13 | pgd_t *idmap_pgd; | 12 | pgd_t *idmap_pgd; |
| 14 | 13 | ||
| @@ -83,37 +82,10 @@ static void identity_mapping_add(pgd_t *pgd, const char *text_start, | |||
| 83 | } while (pgd++, addr = next, addr != end); | 82 | } while (pgd++, addr = next, addr != end); |
| 84 | } | 83 | } |
| 85 | 84 | ||
| 86 | #if defined(CONFIG_ARM_VIRT_EXT) && defined(CONFIG_ARM_LPAE) | ||
| 87 | pgd_t *hyp_pgd; | ||
| 88 | |||
| 89 | extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; | ||
| 90 | |||
| 91 | static int __init init_static_idmap_hyp(void) | ||
| 92 | { | ||
| 93 | hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); | ||
| 94 | if (!hyp_pgd) | ||
| 95 | return -ENOMEM; | ||
| 96 | |||
| 97 | pr_info("Setting up static HYP identity map for 0x%p - 0x%p\n", | ||
| 98 | __hyp_idmap_text_start, __hyp_idmap_text_end); | ||
| 99 | identity_mapping_add(hyp_pgd, __hyp_idmap_text_start, | ||
| 100 | __hyp_idmap_text_end, PMD_SECT_AP1); | ||
| 101 | |||
| 102 | return 0; | ||
| 103 | } | ||
| 104 | #else | ||
| 105 | static int __init init_static_idmap_hyp(void) | ||
| 106 | { | ||
| 107 | return 0; | ||
| 108 | } | ||
| 109 | #endif | ||
| 110 | |||
| 111 | extern char __idmap_text_start[], __idmap_text_end[]; | 85 | extern char __idmap_text_start[], __idmap_text_end[]; |
| 112 | 86 | ||
| 113 | static int __init init_static_idmap(void) | 87 | static int __init init_static_idmap(void) |
| 114 | { | 88 | { |
| 115 | int ret; | ||
| 116 | |||
| 117 | idmap_pgd = pgd_alloc(&init_mm); | 89 | idmap_pgd = pgd_alloc(&init_mm); |
| 118 | if (!idmap_pgd) | 90 | if (!idmap_pgd) |
| 119 | return -ENOMEM; | 91 | return -ENOMEM; |
| @@ -123,12 +95,10 @@ static int __init init_static_idmap(void) | |||
| 123 | identity_mapping_add(idmap_pgd, __idmap_text_start, | 95 | identity_mapping_add(idmap_pgd, __idmap_text_start, |
| 124 | __idmap_text_end, 0); | 96 | __idmap_text_end, 0); |
| 125 | 97 | ||
| 126 | ret = init_static_idmap_hyp(); | ||
| 127 | |||
| 128 | /* Flush L1 for the hardware to see this page table content */ | 98 | /* Flush L1 for the hardware to see this page table content */ |
| 129 | flush_cache_louis(); | 99 | flush_cache_louis(); |
| 130 | 100 | ||
| 131 | return ret; | 101 | return 0; |
| 132 | } | 102 | } |
| 133 | early_initcall(init_static_idmap); | 103 | early_initcall(init_static_idmap); |
| 134 | 104 | ||
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index cfa74983c675..989dd3fe8de1 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #define KVM_USER_MEM_SLOTS 32 | 26 | #define KVM_USER_MEM_SLOTS 32 |
| 27 | 27 | ||
| 28 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 | 28 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 |
| 29 | #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS | ||
| 29 | 30 | ||
| 30 | /* define exit reasons from vmm to kvm*/ | 31 | /* define exit reasons from vmm to kvm*/ |
| 31 | #define EXIT_REASON_VM_PANIC 0 | 32 | #define EXIT_REASON_VM_PANIC 0 |
diff --git a/arch/ia64/include/uapi/asm/kvm.h b/arch/ia64/include/uapi/asm/kvm.h index ec6c6b301238..99503c284400 100644 --- a/arch/ia64/include/uapi/asm/kvm.h +++ b/arch/ia64/include/uapi/asm/kvm.h | |||
| @@ -27,7 +27,6 @@ | |||
| 27 | /* Select x86 specific features in <linux/kvm.h> */ | 27 | /* Select x86 specific features in <linux/kvm.h> */ |
| 28 | #define __KVM_HAVE_IOAPIC | 28 | #define __KVM_HAVE_IOAPIC |
| 29 | #define __KVM_HAVE_IRQ_LINE | 29 | #define __KVM_HAVE_IRQ_LINE |
| 30 | #define __KVM_HAVE_DEVICE_ASSIGNMENT | ||
| 31 | 30 | ||
| 32 | /* Architectural interrupt line count. */ | 31 | /* Architectural interrupt line count. */ |
| 33 | #define KVM_NR_INTERRUPTS 256 | 32 | #define KVM_NR_INTERRUPTS 256 |
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig index 2cd225f8c68d..990b86420cc6 100644 --- a/arch/ia64/kvm/Kconfig +++ b/arch/ia64/kvm/Kconfig | |||
| @@ -21,12 +21,11 @@ config KVM | |||
| 21 | tristate "Kernel-based Virtual Machine (KVM) support" | 21 | tristate "Kernel-based Virtual Machine (KVM) support" |
| 22 | depends on BROKEN | 22 | depends on BROKEN |
| 23 | depends on HAVE_KVM && MODULES | 23 | depends on HAVE_KVM && MODULES |
| 24 | # for device assignment: | ||
| 25 | depends on PCI | ||
| 26 | depends on BROKEN | 24 | depends on BROKEN |
| 27 | select PREEMPT_NOTIFIERS | 25 | select PREEMPT_NOTIFIERS |
| 28 | select ANON_INODES | 26 | select ANON_INODES |
| 29 | select HAVE_KVM_IRQCHIP | 27 | select HAVE_KVM_IRQCHIP |
| 28 | select HAVE_KVM_IRQ_ROUTING | ||
| 30 | select KVM_APIC_ARCHITECTURE | 29 | select KVM_APIC_ARCHITECTURE |
| 31 | select KVM_MMIO | 30 | select KVM_MMIO |
| 32 | ---help--- | 31 | ---help--- |
| @@ -50,6 +49,17 @@ config KVM_INTEL | |||
| 50 | Provides support for KVM on Itanium 2 processors equipped with the VT | 49 | Provides support for KVM on Itanium 2 processors equipped with the VT |
| 51 | extensions. | 50 | extensions. |
| 52 | 51 | ||
| 52 | config KVM_DEVICE_ASSIGNMENT | ||
| 53 | bool "KVM legacy PCI device assignment support" | ||
| 54 | depends on KVM && PCI && IOMMU_API | ||
| 55 | default y | ||
| 56 | ---help--- | ||
| 57 | Provide support for legacy PCI device assignment through KVM. The | ||
| 58 | kernel now also supports a full featured userspace device driver | ||
| 59 | framework through VFIO, which supersedes much of this support. | ||
| 60 | |||
| 61 | If unsure, say Y. | ||
| 62 | |||
| 53 | source drivers/vhost/Kconfig | 63 | source drivers/vhost/Kconfig |
| 54 | 64 | ||
| 55 | endif # VIRTUALIZATION | 65 | endif # VIRTUALIZATION |
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index db3d7c5d1071..1a4053789d01 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile | |||
| @@ -49,10 +49,10 @@ ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ | |||
| 49 | asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ | 49 | asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ |
| 50 | 50 | ||
| 51 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ | 51 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ |
| 52 | coalesced_mmio.o irq_comm.o assigned-dev.o) | 52 | coalesced_mmio.o irq_comm.o) |
| 53 | 53 | ||
| 54 | ifeq ($(CONFIG_IOMMU_API),y) | 54 | ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y) |
| 55 | common-objs += $(addprefix ../../../virt/kvm/, iommu.o) | 55 | common-objs += $(addprefix ../../../virt/kvm/, assigned-dev.o iommu.o) |
| 56 | endif | 56 | endif |
| 57 | 57 | ||
| 58 | kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o | 58 | kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o |
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index ad3126a58644..5b2dc0d10c8f 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c | |||
| @@ -204,9 +204,11 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 204 | case KVM_CAP_COALESCED_MMIO: | 204 | case KVM_CAP_COALESCED_MMIO: |
| 205 | r = KVM_COALESCED_MMIO_PAGE_OFFSET; | 205 | r = KVM_COALESCED_MMIO_PAGE_OFFSET; |
| 206 | break; | 206 | break; |
| 207 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT | ||
| 207 | case KVM_CAP_IOMMU: | 208 | case KVM_CAP_IOMMU: |
| 208 | r = iommu_present(&pci_bus_type); | 209 | r = iommu_present(&pci_bus_type); |
| 209 | break; | 210 | break; |
| 211 | #endif | ||
| 210 | default: | 212 | default: |
| 211 | r = 0; | 213 | r = 0; |
| 212 | } | 214 | } |
| @@ -924,13 +926,15 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 924 | return 0; | 926 | return 0; |
| 925 | } | 927 | } |
| 926 | 928 | ||
| 927 | int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event) | 929 | int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, |
| 930 | bool line_status) | ||
| 928 | { | 931 | { |
| 929 | if (!irqchip_in_kernel(kvm)) | 932 | if (!irqchip_in_kernel(kvm)) |
| 930 | return -ENXIO; | 933 | return -ENXIO; |
| 931 | 934 | ||
| 932 | irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, | 935 | irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, |
| 933 | irq_event->irq, irq_event->level); | 936 | irq_event->irq, irq_event->level, |
| 937 | line_status); | ||
| 934 | return 0; | 938 | return 0; |
| 935 | } | 939 | } |
| 936 | 940 | ||
| @@ -942,24 +946,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 942 | int r = -ENOTTY; | 946 | int r = -ENOTTY; |
| 943 | 947 | ||
| 944 | switch (ioctl) { | 948 | switch (ioctl) { |
| 945 | case KVM_SET_MEMORY_REGION: { | ||
| 946 | struct kvm_memory_region kvm_mem; | ||
| 947 | struct kvm_userspace_memory_region kvm_userspace_mem; | ||
| 948 | |||
| 949 | r = -EFAULT; | ||
| 950 | if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) | ||
| 951 | goto out; | ||
| 952 | kvm_userspace_mem.slot = kvm_mem.slot; | ||
| 953 | kvm_userspace_mem.flags = kvm_mem.flags; | ||
| 954 | kvm_userspace_mem.guest_phys_addr = | ||
| 955 | kvm_mem.guest_phys_addr; | ||
| 956 | kvm_userspace_mem.memory_size = kvm_mem.memory_size; | ||
| 957 | r = kvm_vm_ioctl_set_memory_region(kvm, | ||
| 958 | &kvm_userspace_mem, false); | ||
| 959 | if (r) | ||
| 960 | goto out; | ||
| 961 | break; | ||
| 962 | } | ||
| 963 | case KVM_CREATE_IRQCHIP: | 949 | case KVM_CREATE_IRQCHIP: |
| 964 | r = -EFAULT; | 950 | r = -EFAULT; |
| 965 | r = kvm_ioapic_init(kvm); | 951 | r = kvm_ioapic_init(kvm); |
| @@ -1384,9 +1370,7 @@ void kvm_arch_sync_events(struct kvm *kvm) | |||
| 1384 | void kvm_arch_destroy_vm(struct kvm *kvm) | 1370 | void kvm_arch_destroy_vm(struct kvm *kvm) |
| 1385 | { | 1371 | { |
| 1386 | kvm_iommu_unmap_guest(kvm); | 1372 | kvm_iommu_unmap_guest(kvm); |
| 1387 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT | ||
| 1388 | kvm_free_all_assigned_devices(kvm); | 1373 | kvm_free_all_assigned_devices(kvm); |
| 1389 | #endif | ||
| 1390 | kfree(kvm->arch.vioapic); | 1374 | kfree(kvm->arch.vioapic); |
| 1391 | kvm_release_vm_pages(kvm); | 1375 | kvm_release_vm_pages(kvm); |
| 1392 | } | 1376 | } |
| @@ -1578,9 +1562,8 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | |||
| 1578 | 1562 | ||
| 1579 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 1563 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
| 1580 | struct kvm_memory_slot *memslot, | 1564 | struct kvm_memory_slot *memslot, |
| 1581 | struct kvm_memory_slot old, | ||
| 1582 | struct kvm_userspace_memory_region *mem, | 1565 | struct kvm_userspace_memory_region *mem, |
| 1583 | bool user_alloc) | 1566 | enum kvm_mr_change change) |
| 1584 | { | 1567 | { |
| 1585 | unsigned long i; | 1568 | unsigned long i; |
| 1586 | unsigned long pfn; | 1569 | unsigned long pfn; |
| @@ -1610,8 +1593,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
| 1610 | 1593 | ||
| 1611 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 1594 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
| 1612 | struct kvm_userspace_memory_region *mem, | 1595 | struct kvm_userspace_memory_region *mem, |
| 1613 | struct kvm_memory_slot old, | 1596 | const struct kvm_memory_slot *old, |
| 1614 | bool user_alloc) | 1597 | enum kvm_mr_change change) |
| 1615 | { | 1598 | { |
| 1616 | return; | 1599 | return; |
| 1617 | } | 1600 | } |
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h index c3e2935b6db4..c5f92a926a9a 100644 --- a/arch/ia64/kvm/lapic.h +++ b/arch/ia64/kvm/lapic.h | |||
| @@ -27,10 +27,4 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); | |||
| 27 | #define kvm_apic_present(x) (true) | 27 | #define kvm_apic_present(x) (true) |
| 28 | #define kvm_lapic_enabled(x) (true) | 28 | #define kvm_lapic_enabled(x) (true) |
| 29 | 29 | ||
| 30 | static inline bool kvm_apic_vid_enabled(void) | ||
| 31 | { | ||
| 32 | /* IA64 has no apicv supporting, do nothing here */ | ||
| 33 | return false; | ||
| 34 | } | ||
| 35 | |||
| 36 | #endif | 30 | #endif |
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 4bc2c3dad6ad..cf4df8e2139a 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h | |||
| @@ -270,6 +270,9 @@ | |||
| 270 | #define H_SET_MODE 0x31C | 270 | #define H_SET_MODE 0x31C |
| 271 | #define MAX_HCALL_OPCODE H_SET_MODE | 271 | #define MAX_HCALL_OPCODE H_SET_MODE |
| 272 | 272 | ||
| 273 | /* Platform specific hcalls, used by KVM */ | ||
| 274 | #define H_RTAS 0xf000 | ||
| 275 | |||
| 273 | #ifndef __ASSEMBLY__ | 276 | #ifndef __ASSEMBLY__ |
| 274 | 277 | ||
| 275 | /** | 278 | /** |
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 5a56e1c5f851..349ed85c7d61 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h | |||
| @@ -142,6 +142,8 @@ extern int kvmppc_mmu_hv_init(void); | |||
| 142 | extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); | 142 | extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); |
| 143 | extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); | 143 | extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); |
| 144 | extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); | 144 | extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); |
| 145 | extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, | ||
| 146 | unsigned int vec); | ||
| 145 | extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags); | 147 | extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags); |
| 146 | extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, | 148 | extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, |
| 147 | bool upper, u32 val); | 149 | bool upper, u32 val); |
| @@ -156,7 +158,8 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep, | |||
| 156 | unsigned long pte_index); | 158 | unsigned long pte_index); |
| 157 | extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, | 159 | extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, |
| 158 | unsigned long *nb_ret); | 160 | unsigned long *nb_ret); |
| 159 | extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr); | 161 | extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr, |
| 162 | unsigned long gpa, bool dirty); | ||
| 160 | extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | 163 | extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, |
| 161 | long pte_index, unsigned long pteh, unsigned long ptel); | 164 | long pte_index, unsigned long pteh, unsigned long ptel); |
| 162 | extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, | 165 | extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, |
| @@ -458,6 +461,8 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) | |||
| 458 | #define OSI_SC_MAGIC_R4 0x77810F9B | 461 | #define OSI_SC_MAGIC_R4 0x77810F9B |
| 459 | 462 | ||
| 460 | #define INS_DCBZ 0x7c0007ec | 463 | #define INS_DCBZ 0x7c0007ec |
| 464 | /* TO = 31 for unconditional trap */ | ||
| 465 | #define INS_TW 0x7fe00008 | ||
| 461 | 466 | ||
| 462 | /* LPIDs we support with this build -- runtime limit may be lower */ | 467 | /* LPIDs we support with this build -- runtime limit may be lower */ |
| 463 | #define KVMPPC_NR_LPIDS (LPID_RSVD + 1) | 468 | #define KVMPPC_NR_LPIDS (LPID_RSVD + 1) |
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 38bec1dc9928..9c1ff330c805 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h | |||
| @@ -268,4 +268,17 @@ static inline int is_vrma_hpte(unsigned long hpte_v) | |||
| 268 | (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16))); | 268 | (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16))); |
| 269 | } | 269 | } |
| 270 | 270 | ||
| 271 | #ifdef CONFIG_KVM_BOOK3S_64_HV | ||
| 272 | /* | ||
| 273 | * Note modification of an HPTE; set the HPTE modified bit | ||
| 274 | * if anyone is interested. | ||
| 275 | */ | ||
| 276 | static inline void note_hpte_modification(struct kvm *kvm, | ||
| 277 | struct revmap_entry *rev) | ||
| 278 | { | ||
| 279 | if (atomic_read(&kvm->arch.hpte_mod_interest)) | ||
| 280 | rev->guest_rpte |= HPTE_GR_MODIFIED; | ||
| 281 | } | ||
| 282 | #endif /* CONFIG_KVM_BOOK3S_64_HV */ | ||
| 283 | |||
| 271 | #endif /* __ASM_KVM_BOOK3S_64_H__ */ | 284 | #endif /* __ASM_KVM_BOOK3S_64_H__ */ |
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index cdc3d2717cc6..9039d3c97eec 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h | |||
| @@ -20,6 +20,11 @@ | |||
| 20 | #ifndef __ASM_KVM_BOOK3S_ASM_H__ | 20 | #ifndef __ASM_KVM_BOOK3S_ASM_H__ |
| 21 | #define __ASM_KVM_BOOK3S_ASM_H__ | 21 | #define __ASM_KVM_BOOK3S_ASM_H__ |
| 22 | 22 | ||
| 23 | /* XICS ICP register offsets */ | ||
| 24 | #define XICS_XIRR 4 | ||
| 25 | #define XICS_MFRR 0xc | ||
| 26 | #define XICS_IPI 2 /* interrupt source # for IPIs */ | ||
| 27 | |||
| 23 | #ifdef __ASSEMBLY__ | 28 | #ifdef __ASSEMBLY__ |
| 24 | 29 | ||
| 25 | #ifdef CONFIG_KVM_BOOK3S_HANDLER | 30 | #ifdef CONFIG_KVM_BOOK3S_HANDLER |
| @@ -81,10 +86,11 @@ struct kvmppc_host_state { | |||
| 81 | #ifdef CONFIG_KVM_BOOK3S_64_HV | 86 | #ifdef CONFIG_KVM_BOOK3S_64_HV |
| 82 | u8 hwthread_req; | 87 | u8 hwthread_req; |
| 83 | u8 hwthread_state; | 88 | u8 hwthread_state; |
| 84 | 89 | u8 host_ipi; | |
| 85 | struct kvm_vcpu *kvm_vcpu; | 90 | struct kvm_vcpu *kvm_vcpu; |
| 86 | struct kvmppc_vcore *kvm_vcore; | 91 | struct kvmppc_vcore *kvm_vcore; |
| 87 | unsigned long xics_phys; | 92 | unsigned long xics_phys; |
| 93 | u32 saved_xirr; | ||
| 88 | u64 dabr; | 94 | u64 dabr; |
| 89 | u64 host_mmcr[3]; | 95 | u64 host_mmcr[3]; |
| 90 | u32 host_pmc[8]; | 96 | u32 host_pmc[8]; |
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index b7cd3356a532..d3c1eb34c986 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h | |||
| @@ -26,6 +26,8 @@ | |||
| 26 | /* LPIDs we support with this build -- runtime limit may be lower */ | 26 | /* LPIDs we support with this build -- runtime limit may be lower */ |
| 27 | #define KVMPPC_NR_LPIDS 64 | 27 | #define KVMPPC_NR_LPIDS 64 |
| 28 | 28 | ||
| 29 | #define KVMPPC_INST_EHPRIV 0x7c00021c | ||
| 30 | |||
| 29 | static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) | 31 | static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) |
| 30 | { | 32 | { |
| 31 | vcpu->arch.gpr[num] = val; | 33 | vcpu->arch.gpr[num] = val; |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index d1bb86074721..af326cde7cb6 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
| @@ -44,6 +44,10 @@ | |||
| 44 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 | 44 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 |
| 45 | #endif | 45 | #endif |
| 46 | 46 | ||
| 47 | /* These values are internal and can be increased later */ | ||
| 48 | #define KVM_NR_IRQCHIPS 1 | ||
| 49 | #define KVM_IRQCHIP_NUM_PINS 256 | ||
| 50 | |||
| 47 | #if !defined(CONFIG_KVM_440) | 51 | #if !defined(CONFIG_KVM_440) |
| 48 | #include <linux/mmu_notifier.h> | 52 | #include <linux/mmu_notifier.h> |
| 49 | 53 | ||
| @@ -188,6 +192,10 @@ struct kvmppc_linear_info { | |||
| 188 | int type; | 192 | int type; |
| 189 | }; | 193 | }; |
| 190 | 194 | ||
| 195 | /* XICS components, defined in book3s_xics.c */ | ||
| 196 | struct kvmppc_xics; | ||
| 197 | struct kvmppc_icp; | ||
| 198 | |||
| 191 | /* | 199 | /* |
| 192 | * The reverse mapping array has one entry for each HPTE, | 200 | * The reverse mapping array has one entry for each HPTE, |
| 193 | * which stores the guest's view of the second word of the HPTE | 201 | * which stores the guest's view of the second word of the HPTE |
| @@ -255,6 +263,13 @@ struct kvm_arch { | |||
| 255 | #endif /* CONFIG_KVM_BOOK3S_64_HV */ | 263 | #endif /* CONFIG_KVM_BOOK3S_64_HV */ |
| 256 | #ifdef CONFIG_PPC_BOOK3S_64 | 264 | #ifdef CONFIG_PPC_BOOK3S_64 |
| 257 | struct list_head spapr_tce_tables; | 265 | struct list_head spapr_tce_tables; |
| 266 | struct list_head rtas_tokens; | ||
| 267 | #endif | ||
| 268 | #ifdef CONFIG_KVM_MPIC | ||
| 269 | struct openpic *mpic; | ||
| 270 | #endif | ||
| 271 | #ifdef CONFIG_KVM_XICS | ||
| 272 | struct kvmppc_xics *xics; | ||
| 258 | #endif | 273 | #endif |
| 259 | }; | 274 | }; |
| 260 | 275 | ||
| @@ -301,11 +316,13 @@ struct kvmppc_vcore { | |||
| 301 | * that a guest can register. | 316 | * that a guest can register. |
| 302 | */ | 317 | */ |
| 303 | struct kvmppc_vpa { | 318 | struct kvmppc_vpa { |
| 319 | unsigned long gpa; /* Current guest phys addr */ | ||
| 304 | void *pinned_addr; /* Address in kernel linear mapping */ | 320 | void *pinned_addr; /* Address in kernel linear mapping */ |
| 305 | void *pinned_end; /* End of region */ | 321 | void *pinned_end; /* End of region */ |
| 306 | unsigned long next_gpa; /* Guest phys addr for update */ | 322 | unsigned long next_gpa; /* Guest phys addr for update */ |
| 307 | unsigned long len; /* Number of bytes required */ | 323 | unsigned long len; /* Number of bytes required */ |
| 308 | u8 update_pending; /* 1 => update pinned_addr from next_gpa */ | 324 | u8 update_pending; /* 1 => update pinned_addr from next_gpa */ |
| 325 | bool dirty; /* true => area has been modified by kernel */ | ||
| 309 | }; | 326 | }; |
| 310 | 327 | ||
| 311 | struct kvmppc_pte { | 328 | struct kvmppc_pte { |
| @@ -359,6 +376,11 @@ struct kvmppc_slb { | |||
| 359 | #define KVMPPC_BOOKE_MAX_IAC 4 | 376 | #define KVMPPC_BOOKE_MAX_IAC 4 |
| 360 | #define KVMPPC_BOOKE_MAX_DAC 2 | 377 | #define KVMPPC_BOOKE_MAX_DAC 2 |
| 361 | 378 | ||
| 379 | /* KVMPPC_EPR_USER takes precedence over KVMPPC_EPR_KERNEL */ | ||
| 380 | #define KVMPPC_EPR_NONE 0 /* EPR not supported */ | ||
| 381 | #define KVMPPC_EPR_USER 1 /* exit to userspace to fill EPR */ | ||
| 382 | #define KVMPPC_EPR_KERNEL 2 /* in-kernel irqchip */ | ||
| 383 | |||
| 362 | struct kvmppc_booke_debug_reg { | 384 | struct kvmppc_booke_debug_reg { |
| 363 | u32 dbcr0; | 385 | u32 dbcr0; |
| 364 | u32 dbcr1; | 386 | u32 dbcr1; |
| @@ -370,6 +392,12 @@ struct kvmppc_booke_debug_reg { | |||
| 370 | u64 dac[KVMPPC_BOOKE_MAX_DAC]; | 392 | u64 dac[KVMPPC_BOOKE_MAX_DAC]; |
| 371 | }; | 393 | }; |
| 372 | 394 | ||
| 395 | #define KVMPPC_IRQ_DEFAULT 0 | ||
| 396 | #define KVMPPC_IRQ_MPIC 1 | ||
| 397 | #define KVMPPC_IRQ_XICS 2 | ||
| 398 | |||
| 399 | struct openpic; | ||
| 400 | |||
| 373 | struct kvm_vcpu_arch { | 401 | struct kvm_vcpu_arch { |
| 374 | ulong host_stack; | 402 | ulong host_stack; |
| 375 | u32 host_pid; | 403 | u32 host_pid; |
| @@ -502,8 +530,11 @@ struct kvm_vcpu_arch { | |||
| 502 | spinlock_t wdt_lock; | 530 | spinlock_t wdt_lock; |
| 503 | struct timer_list wdt_timer; | 531 | struct timer_list wdt_timer; |
| 504 | u32 tlbcfg[4]; | 532 | u32 tlbcfg[4]; |
| 533 | u32 tlbps[4]; | ||
| 505 | u32 mmucfg; | 534 | u32 mmucfg; |
| 535 | u32 eptcfg; | ||
| 506 | u32 epr; | 536 | u32 epr; |
| 537 | u32 crit_save; | ||
| 507 | struct kvmppc_booke_debug_reg dbg_reg; | 538 | struct kvmppc_booke_debug_reg dbg_reg; |
| 508 | #endif | 539 | #endif |
| 509 | gpa_t paddr_accessed; | 540 | gpa_t paddr_accessed; |
| @@ -521,7 +552,7 @@ struct kvm_vcpu_arch { | |||
| 521 | u8 sane; | 552 | u8 sane; |
| 522 | u8 cpu_type; | 553 | u8 cpu_type; |
| 523 | u8 hcall_needed; | 554 | u8 hcall_needed; |
| 524 | u8 epr_enabled; | 555 | u8 epr_flags; /* KVMPPC_EPR_xxx */ |
| 525 | u8 epr_needed; | 556 | u8 epr_needed; |
| 526 | 557 | ||
| 527 | u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ | 558 | u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ |
| @@ -548,6 +579,13 @@ struct kvm_vcpu_arch { | |||
| 548 | unsigned long magic_page_pa; /* phys addr to map the magic page to */ | 579 | unsigned long magic_page_pa; /* phys addr to map the magic page to */ |
| 549 | unsigned long magic_page_ea; /* effect. addr to map the magic page to */ | 580 | unsigned long magic_page_ea; /* effect. addr to map the magic page to */ |
| 550 | 581 | ||
| 582 | int irq_type; /* one of KVM_IRQ_* */ | ||
| 583 | int irq_cpu_id; | ||
| 584 | struct openpic *mpic; /* KVM_IRQ_MPIC */ | ||
| 585 | #ifdef CONFIG_KVM_XICS | ||
| 586 | struct kvmppc_icp *icp; /* XICS presentation controller */ | ||
| 587 | #endif | ||
| 588 | |||
| 551 | #ifdef CONFIG_KVM_BOOK3S_64_HV | 589 | #ifdef CONFIG_KVM_BOOK3S_64_HV |
| 552 | struct kvm_vcpu_arch_shared shregs; | 590 | struct kvm_vcpu_arch_shared shregs; |
| 553 | 591 | ||
| @@ -588,5 +626,6 @@ struct kvm_vcpu_arch { | |||
| 588 | #define KVM_MMIO_REG_FQPR 0x0060 | 626 | #define KVM_MMIO_REG_FQPR 0x0060 |
| 589 | 627 | ||
| 590 | #define __KVM_HAVE_ARCH_WQP | 628 | #define __KVM_HAVE_ARCH_WQP |
| 629 | #define __KVM_HAVE_CREATE_DEVICE | ||
| 591 | 630 | ||
| 592 | #endif /* __POWERPC_KVM_HOST_H__ */ | 631 | #endif /* __POWERPC_KVM_HOST_H__ */ |
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 44a657adf416..a5287fe03d77 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
| @@ -44,7 +44,7 @@ enum emulation_result { | |||
| 44 | EMULATE_DO_DCR, /* kvm_run filled with DCR request */ | 44 | EMULATE_DO_DCR, /* kvm_run filled with DCR request */ |
| 45 | EMULATE_FAIL, /* can't emulate this instruction */ | 45 | EMULATE_FAIL, /* can't emulate this instruction */ |
| 46 | EMULATE_AGAIN, /* something went wrong. go again */ | 46 | EMULATE_AGAIN, /* something went wrong. go again */ |
| 47 | EMULATE_DO_PAPR, /* kvm_run filled with PAPR request */ | 47 | EMULATE_EXIT_USER, /* emulation requires exit to user-space */ |
| 48 | }; | 48 | }; |
| 49 | 49 | ||
| 50 | extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); | 50 | extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); |
| @@ -104,8 +104,7 @@ extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); | |||
| 104 | extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); | 104 | extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); |
| 105 | extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, | 105 | extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, |
| 106 | struct kvm_interrupt *irq); | 106 | struct kvm_interrupt *irq); |
| 107 | extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, | 107 | extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu); |
| 108 | struct kvm_interrupt *irq); | ||
| 109 | extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu); | 108 | extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu); |
| 110 | 109 | ||
| 111 | extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | 110 | extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, |
| @@ -131,6 +130,7 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm, | |||
| 131 | extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, | 130 | extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, |
| 132 | struct kvm_memory_slot *memslot, unsigned long porder); | 131 | struct kvm_memory_slot *memslot, unsigned long porder); |
| 133 | extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); | 132 | extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); |
| 133 | |||
| 134 | extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | 134 | extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, |
| 135 | struct kvm_create_spapr_tce *args); | 135 | struct kvm_create_spapr_tce *args); |
| 136 | extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | 136 | extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, |
| @@ -152,7 +152,7 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, | |||
| 152 | struct kvm_userspace_memory_region *mem); | 152 | struct kvm_userspace_memory_region *mem); |
| 153 | extern void kvmppc_core_commit_memory_region(struct kvm *kvm, | 153 | extern void kvmppc_core_commit_memory_region(struct kvm *kvm, |
| 154 | struct kvm_userspace_memory_region *mem, | 154 | struct kvm_userspace_memory_region *mem, |
| 155 | struct kvm_memory_slot old); | 155 | const struct kvm_memory_slot *old); |
| 156 | extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, | 156 | extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, |
| 157 | struct kvm_ppc_smmu_info *info); | 157 | struct kvm_ppc_smmu_info *info); |
| 158 | extern void kvmppc_core_flush_memslot(struct kvm *kvm, | 158 | extern void kvmppc_core_flush_memslot(struct kvm *kvm, |
| @@ -165,6 +165,18 @@ extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu); | |||
| 165 | 165 | ||
| 166 | extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *); | 166 | extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *); |
| 167 | 167 | ||
| 168 | int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); | ||
| 169 | |||
| 170 | extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp); | ||
| 171 | extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu); | ||
| 172 | extern void kvmppc_rtas_tokens_free(struct kvm *kvm); | ||
| 173 | extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, | ||
| 174 | u32 priority); | ||
| 175 | extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, | ||
| 176 | u32 *priority); | ||
| 177 | extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq); | ||
| 178 | extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq); | ||
| 179 | |||
| 168 | /* | 180 | /* |
| 169 | * Cuts out inst bits with ordering according to spec. | 181 | * Cuts out inst bits with ordering according to spec. |
| 170 | * That means the leftmost bit is zero. All given bits are included. | 182 | * That means the leftmost bit is zero. All given bits are included. |
| @@ -246,12 +258,29 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *); | |||
| 246 | 258 | ||
| 247 | void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); | 259 | void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); |
| 248 | 260 | ||
| 261 | struct openpic; | ||
| 262 | |||
| 249 | #ifdef CONFIG_KVM_BOOK3S_64_HV | 263 | #ifdef CONFIG_KVM_BOOK3S_64_HV |
| 250 | static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) | 264 | static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) |
| 251 | { | 265 | { |
| 252 | paca[cpu].kvm_hstate.xics_phys = addr; | 266 | paca[cpu].kvm_hstate.xics_phys = addr; |
| 253 | } | 267 | } |
| 254 | 268 | ||
| 269 | static inline u32 kvmppc_get_xics_latch(void) | ||
| 270 | { | ||
| 271 | u32 xirr = get_paca()->kvm_hstate.saved_xirr; | ||
| 272 | |||
| 273 | get_paca()->kvm_hstate.saved_xirr = 0; | ||
| 274 | |||
| 275 | return xirr; | ||
| 276 | } | ||
| 277 | |||
| 278 | static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) | ||
| 279 | { | ||
| 280 | paca[cpu].kvm_hstate.host_ipi = host_ipi; | ||
| 281 | } | ||
| 282 | |||
| 283 | extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu); | ||
| 255 | extern void kvm_linear_init(void); | 284 | extern void kvm_linear_init(void); |
| 256 | 285 | ||
| 257 | #else | 286 | #else |
| @@ -260,6 +289,46 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) | |||
| 260 | 289 | ||
| 261 | static inline void kvm_linear_init(void) | 290 | static inline void kvm_linear_init(void) |
| 262 | {} | 291 | {} |
| 292 | |||
| 293 | static inline u32 kvmppc_get_xics_latch(void) | ||
| 294 | { | ||
| 295 | return 0; | ||
| 296 | } | ||
| 297 | |||
| 298 | static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) | ||
| 299 | {} | ||
| 300 | |||
| 301 | static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) | ||
| 302 | { | ||
| 303 | kvm_vcpu_kick(vcpu); | ||
| 304 | } | ||
| 305 | #endif | ||
| 306 | |||
| 307 | #ifdef CONFIG_KVM_XICS | ||
| 308 | static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) | ||
| 309 | { | ||
| 310 | return vcpu->arch.irq_type == KVMPPC_IRQ_XICS; | ||
| 311 | } | ||
| 312 | extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu); | ||
| 313 | extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server); | ||
| 314 | extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args); | ||
| 315 | extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd); | ||
| 316 | extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu); | ||
| 317 | extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval); | ||
| 318 | extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev, | ||
| 319 | struct kvm_vcpu *vcpu, u32 cpu); | ||
| 320 | #else | ||
| 321 | static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) | ||
| 322 | { return 0; } | ||
| 323 | static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { } | ||
| 324 | static inline int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, | ||
| 325 | unsigned long server) | ||
| 326 | { return -EINVAL; } | ||
| 327 | static inline int kvm_vm_ioctl_xics_irq(struct kvm *kvm, | ||
| 328 | struct kvm_irq_level *args) | ||
| 329 | { return -ENOTTY; } | ||
| 330 | static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) | ||
| 331 | { return 0; } | ||
| 263 | #endif | 332 | #endif |
| 264 | 333 | ||
| 265 | static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) | 334 | static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) |
| @@ -271,6 +340,32 @@ static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) | |||
| 271 | #endif | 340 | #endif |
| 272 | } | 341 | } |
| 273 | 342 | ||
| 343 | #ifdef CONFIG_KVM_MPIC | ||
| 344 | |||
| 345 | void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu); | ||
| 346 | int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, | ||
| 347 | u32 cpu); | ||
| 348 | void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu); | ||
| 349 | |||
| 350 | #else | ||
| 351 | |||
| 352 | static inline void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu) | ||
| 353 | { | ||
| 354 | } | ||
| 355 | |||
| 356 | static inline int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, | ||
| 357 | struct kvm_vcpu *vcpu, u32 cpu) | ||
| 358 | { | ||
| 359 | return -EINVAL; | ||
| 360 | } | ||
| 361 | |||
| 362 | static inline void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, | ||
| 363 | struct kvm_vcpu *vcpu) | ||
| 364 | { | ||
| 365 | } | ||
| 366 | |||
| 367 | #endif /* CONFIG_KVM_MPIC */ | ||
| 368 | |||
| 274 | int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, | 369 | int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, |
| 275 | struct kvm_config_tlb *cfg); | 370 | struct kvm_config_tlb *cfg); |
| 276 | int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, | 371 | int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, |
| @@ -283,8 +378,15 @@ void kvmppc_init_lpid(unsigned long nr_lpids); | |||
| 283 | 378 | ||
| 284 | static inline void kvmppc_mmu_flush_icache(pfn_t pfn) | 379 | static inline void kvmppc_mmu_flush_icache(pfn_t pfn) |
| 285 | { | 380 | { |
| 286 | /* Clear i-cache for new pages */ | ||
| 287 | struct page *page; | 381 | struct page *page; |
| 382 | /* | ||
| 383 | * We can only access pages that the kernel maps | ||
| 384 | * as memory. Bail out for unmapped ones. | ||
| 385 | */ | ||
| 386 | if (!pfn_valid(pfn)) | ||
| 387 | return; | ||
| 388 | |||
| 389 | /* Clear i-cache for new pages */ | ||
| 288 | page = pfn_to_page(pfn); | 390 | page = pfn_to_page(pfn); |
| 289 | if (!test_bit(PG_arch_1, &page->flags)) { | 391 | if (!test_bit(PG_arch_1, &page->flags)) { |
| 290 | flush_dcache_icache_page(page); | 392 | flush_dcache_icache_page(page); |
| @@ -324,4 +426,6 @@ static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb) | |||
| 324 | return ea; | 426 | return ea; |
| 325 | } | 427 | } |
| 326 | 428 | ||
| 429 | extern void xics_wake_cpu(int cpu); | ||
| 430 | |||
| 327 | #endif /* __POWERPC_KVM_PPC_H__ */ | 431 | #endif /* __POWERPC_KVM_PPC_H__ */ |
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 3d17427e4fd7..a6136515c7f2 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h | |||
| @@ -300,6 +300,7 @@ | |||
| 300 | #define LPCR_PECE1 0x00002000 /* decrementer can cause exit */ | 300 | #define LPCR_PECE1 0x00002000 /* decrementer can cause exit */ |
| 301 | #define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */ | 301 | #define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */ |
| 302 | #define LPCR_MER 0x00000800 /* Mediated External Exception */ | 302 | #define LPCR_MER 0x00000800 /* Mediated External Exception */ |
| 303 | #define LPCR_MER_SH 11 | ||
| 303 | #define LPCR_LPES 0x0000000c | 304 | #define LPCR_LPES 0x0000000c |
| 304 | #define LPCR_LPES0 0x00000008 /* LPAR Env selector 0 */ | 305 | #define LPCR_LPES0 0x00000008 /* LPAR Env selector 0 */ |
| 305 | #define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */ | 306 | #define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */ |
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 16064d00adb9..0fb1a6e9ff90 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h | |||
| @@ -25,6 +25,8 @@ | |||
| 25 | /* Select powerpc specific features in <linux/kvm.h> */ | 25 | /* Select powerpc specific features in <linux/kvm.h> */ |
| 26 | #define __KVM_HAVE_SPAPR_TCE | 26 | #define __KVM_HAVE_SPAPR_TCE |
| 27 | #define __KVM_HAVE_PPC_SMT | 27 | #define __KVM_HAVE_PPC_SMT |
| 28 | #define __KVM_HAVE_IRQCHIP | ||
| 29 | #define __KVM_HAVE_IRQ_LINE | ||
| 28 | 30 | ||
| 29 | struct kvm_regs { | 31 | struct kvm_regs { |
| 30 | __u64 pc; | 32 | __u64 pc; |
| @@ -272,8 +274,31 @@ struct kvm_debug_exit_arch { | |||
| 272 | 274 | ||
| 273 | /* for KVM_SET_GUEST_DEBUG */ | 275 | /* for KVM_SET_GUEST_DEBUG */ |
| 274 | struct kvm_guest_debug_arch { | 276 | struct kvm_guest_debug_arch { |
| 277 | struct { | ||
| 278 | /* H/W breakpoint/watchpoint address */ | ||
| 279 | __u64 addr; | ||
| 280 | /* | ||
| 281 | * Type denotes h/w breakpoint, read watchpoint, write | ||
| 282 | * watchpoint or watchpoint (both read and write). | ||
| 283 | */ | ||
| 284 | #define KVMPPC_DEBUG_NONE 0x0 | ||
| 285 | #define KVMPPC_DEBUG_BREAKPOINT (1UL << 1) | ||
| 286 | #define KVMPPC_DEBUG_WATCH_WRITE (1UL << 2) | ||
| 287 | #define KVMPPC_DEBUG_WATCH_READ (1UL << 3) | ||
| 288 | __u32 type; | ||
| 289 | __u32 reserved; | ||
| 290 | } bp[16]; | ||
| 275 | }; | 291 | }; |
| 276 | 292 | ||
| 293 | /* Debug related defines */ | ||
| 294 | /* | ||
| 295 | * kvm_guest_debug->control is a 32 bit field. The lower 16 bits are generic | ||
| 296 | * and upper 16 bits are architecture specific. Architecture specific defines | ||
| 297 | * that ioctl is for setting hardware breakpoint or software breakpoint. | ||
| 298 | */ | ||
| 299 | #define KVM_GUESTDBG_USE_SW_BP 0x00010000 | ||
| 300 | #define KVM_GUESTDBG_USE_HW_BP 0x00020000 | ||
| 301 | |||
| 277 | /* definition of registers in kvm_run */ | 302 | /* definition of registers in kvm_run */ |
| 278 | struct kvm_sync_regs { | 303 | struct kvm_sync_regs { |
| 279 | }; | 304 | }; |
| @@ -299,6 +324,12 @@ struct kvm_allocate_rma { | |||
| 299 | __u64 rma_size; | 324 | __u64 rma_size; |
| 300 | }; | 325 | }; |
| 301 | 326 | ||
| 327 | /* for KVM_CAP_PPC_RTAS */ | ||
| 328 | struct kvm_rtas_token_args { | ||
| 329 | char name[120]; | ||
| 330 | __u64 token; /* Use a token of 0 to undefine a mapping */ | ||
| 331 | }; | ||
| 332 | |||
| 302 | struct kvm_book3e_206_tlb_entry { | 333 | struct kvm_book3e_206_tlb_entry { |
| 303 | __u32 mas8; | 334 | __u32 mas8; |
| 304 | __u32 mas1; | 335 | __u32 mas1; |
| @@ -359,6 +390,26 @@ struct kvm_get_htab_header { | |||
| 359 | __u16 n_invalid; | 390 | __u16 n_invalid; |
| 360 | }; | 391 | }; |
| 361 | 392 | ||
| 393 | /* Per-vcpu XICS interrupt controller state */ | ||
| 394 | #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) | ||
| 395 | |||
| 396 | #define KVM_REG_PPC_ICP_CPPR_SHIFT 56 /* current proc priority */ | ||
| 397 | #define KVM_REG_PPC_ICP_CPPR_MASK 0xff | ||
| 398 | #define KVM_REG_PPC_ICP_XISR_SHIFT 32 /* interrupt status field */ | ||
| 399 | #define KVM_REG_PPC_ICP_XISR_MASK 0xffffff | ||
| 400 | #define KVM_REG_PPC_ICP_MFRR_SHIFT 24 /* pending IPI priority */ | ||
| 401 | #define KVM_REG_PPC_ICP_MFRR_MASK 0xff | ||
| 402 | #define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */ | ||
| 403 | #define KVM_REG_PPC_ICP_PPRI_MASK 0xff | ||
| 404 | |||
| 405 | /* Device control API: PPC-specific devices */ | ||
| 406 | #define KVM_DEV_MPIC_GRP_MISC 1 | ||
| 407 | #define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */ | ||
| 408 | |||
| 409 | #define KVM_DEV_MPIC_GRP_REGISTER 2 /* 32-bit */ | ||
| 410 | #define KVM_DEV_MPIC_GRP_IRQ_ACTIVE 3 /* 32-bit */ | ||
| 411 | |||
| 412 | /* One-Reg API: PPC-specific registers */ | ||
| 362 | #define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1) | 413 | #define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1) |
| 363 | #define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2) | 414 | #define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2) |
| 364 | #define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3) | 415 | #define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3) |
| @@ -417,4 +468,47 @@ struct kvm_get_htab_header { | |||
| 417 | #define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85) | 468 | #define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85) |
| 418 | #define KVM_REG_PPC_EPR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86) | 469 | #define KVM_REG_PPC_EPR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86) |
| 419 | 470 | ||
| 471 | /* Timer Status Register OR/CLEAR interface */ | ||
| 472 | #define KVM_REG_PPC_OR_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x87) | ||
| 473 | #define KVM_REG_PPC_CLEAR_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x88) | ||
| 474 | #define KVM_REG_PPC_TCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x89) | ||
| 475 | #define KVM_REG_PPC_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8a) | ||
| 476 | |||
| 477 | /* Debugging: Special instruction for software breakpoint */ | ||
| 478 | #define KVM_REG_PPC_DEBUG_INST (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8b) | ||
| 479 | |||
| 480 | /* MMU registers */ | ||
| 481 | #define KVM_REG_PPC_MAS0 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8c) | ||
| 482 | #define KVM_REG_PPC_MAS1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8d) | ||
| 483 | #define KVM_REG_PPC_MAS2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8e) | ||
| 484 | #define KVM_REG_PPC_MAS7_3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8f) | ||
| 485 | #define KVM_REG_PPC_MAS4 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x90) | ||
| 486 | #define KVM_REG_PPC_MAS6 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x91) | ||
| 487 | #define KVM_REG_PPC_MMUCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x92) | ||
| 488 | /* | ||
| 489 | * TLBnCFG fields TLBnCFG_N_ENTRY and TLBnCFG_ASSOC can be changed only using | ||
| 490 | * KVM_CAP_SW_TLB ioctl | ||
| 491 | */ | ||
| 492 | #define KVM_REG_PPC_TLB0CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x93) | ||
| 493 | #define KVM_REG_PPC_TLB1CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x94) | ||
| 494 | #define KVM_REG_PPC_TLB2CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x95) | ||
| 495 | #define KVM_REG_PPC_TLB3CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x96) | ||
| 496 | #define KVM_REG_PPC_TLB0PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x97) | ||
| 497 | #define KVM_REG_PPC_TLB1PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x98) | ||
| 498 | #define KVM_REG_PPC_TLB2PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x99) | ||
| 499 | #define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a) | ||
| 500 | #define KVM_REG_PPC_EPTCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b) | ||
| 501 | |||
| 502 | /* PPC64 eXternal Interrupt Controller Specification */ | ||
| 503 | #define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */ | ||
| 504 | |||
| 505 | /* Layout of 64-bit source attribute values */ | ||
| 506 | #define KVM_XICS_DESTINATION_SHIFT 0 | ||
| 507 | #define KVM_XICS_DESTINATION_MASK 0xffffffffULL | ||
| 508 | #define KVM_XICS_PRIORITY_SHIFT 32 | ||
| 509 | #define KVM_XICS_PRIORITY_MASK 0xff | ||
| 510 | #define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40) | ||
| 511 | #define KVM_XICS_MASKED (1ULL << 41) | ||
| 512 | #define KVM_XICS_PENDING (1ULL << 42) | ||
| 513 | |||
| 420 | #endif /* __LINUX_KVM_POWERPC_H */ | 514 | #endif /* __LINUX_KVM_POWERPC_H */ |
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 172233eab799..b51a97cfedf8 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
| @@ -480,6 +480,7 @@ int main(void) | |||
| 480 | DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); | 480 | DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); |
| 481 | DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); | 481 | DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); |
| 482 | DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); | 482 | DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); |
| 483 | DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty)); | ||
| 483 | #endif | 484 | #endif |
| 484 | #ifdef CONFIG_PPC_BOOK3S | 485 | #ifdef CONFIG_PPC_BOOK3S |
| 485 | DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); | 486 | DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); |
| @@ -576,6 +577,8 @@ int main(void) | |||
| 576 | HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); | 577 | HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); |
| 577 | HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); | 578 | HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); |
| 578 | HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); | 579 | HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); |
| 580 | HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr); | ||
| 581 | HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); | ||
| 579 | HSTATE_FIELD(HSTATE_MMCR, host_mmcr); | 582 | HSTATE_FIELD(HSTATE_MMCR, host_mmcr); |
| 580 | HSTATE_FIELD(HSTATE_PMC, host_pmc); | 583 | HSTATE_FIELD(HSTATE_PMC, host_pmc); |
| 581 | HSTATE_FIELD(HSTATE_PURR, host_purr); | 584 | HSTATE_FIELD(HSTATE_PURR, host_purr); |
| @@ -599,6 +602,7 @@ int main(void) | |||
| 599 | DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); | 602 | DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); |
| 600 | DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); | 603 | DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); |
| 601 | DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); | 604 | DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); |
| 605 | DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save)); | ||
| 602 | #endif /* CONFIG_PPC_BOOK3S */ | 606 | #endif /* CONFIG_PPC_BOOK3S */ |
| 603 | #endif /* CONFIG_KVM */ | 607 | #endif /* CONFIG_KVM */ |
| 604 | 608 | ||
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 3d7fd21c65f9..2f5c6b6d6877 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c | |||
| @@ -124,6 +124,18 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) | |||
| 124 | return kvmppc_set_sregs_ivor(vcpu, sregs); | 124 | return kvmppc_set_sregs_ivor(vcpu, sregs); |
| 125 | } | 125 | } |
| 126 | 126 | ||
| 127 | int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, | ||
| 128 | union kvmppc_one_reg *val) | ||
| 129 | { | ||
| 130 | return -EINVAL; | ||
| 131 | } | ||
| 132 | |||
| 133 | int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, | ||
| 134 | union kvmppc_one_reg *val) | ||
| 135 | { | ||
| 136 | return -EINVAL; | ||
| 137 | } | ||
| 138 | |||
| 127 | struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | 139 | struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) |
| 128 | { | 140 | { |
| 129 | struct kvmppc_vcpu_44x *vcpu_44x; | 141 | struct kvmppc_vcpu_44x *vcpu_44x; |
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 63c67ec72e43..eb643f862579 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig | |||
| @@ -136,21 +136,41 @@ config KVM_E500V2 | |||
| 136 | If unsure, say N. | 136 | If unsure, say N. |
| 137 | 137 | ||
| 138 | config KVM_E500MC | 138 | config KVM_E500MC |
| 139 | bool "KVM support for PowerPC E500MC/E5500 processors" | 139 | bool "KVM support for PowerPC E500MC/E5500/E6500 processors" |
| 140 | depends on PPC_E500MC | 140 | depends on PPC_E500MC |
| 141 | select KVM | 141 | select KVM |
| 142 | select KVM_MMIO | 142 | select KVM_MMIO |
| 143 | select KVM_BOOKE_HV | 143 | select KVM_BOOKE_HV |
| 144 | select MMU_NOTIFIER | 144 | select MMU_NOTIFIER |
| 145 | ---help--- | 145 | ---help--- |
| 146 | Support running unmodified E500MC/E5500 (32-bit) guest kernels in | 146 | Support running unmodified E500MC/E5500/E6500 guest kernels in |
| 147 | virtual machines on E500MC/E5500 host processors. | 147 | virtual machines on E500MC/E5500/E6500 host processors. |
| 148 | 148 | ||
| 149 | This module provides access to the hardware capabilities through | 149 | This module provides access to the hardware capabilities through |
| 150 | a character device node named /dev/kvm. | 150 | a character device node named /dev/kvm. |
| 151 | 151 | ||
| 152 | If unsure, say N. | 152 | If unsure, say N. |
| 153 | 153 | ||
| 154 | config KVM_MPIC | ||
| 155 | bool "KVM in-kernel MPIC emulation" | ||
| 156 | depends on KVM && E500 | ||
| 157 | select HAVE_KVM_IRQCHIP | ||
| 158 | select HAVE_KVM_IRQ_ROUTING | ||
| 159 | select HAVE_KVM_MSI | ||
| 160 | help | ||
| 161 | Enable support for emulating MPIC devices inside the | ||
| 162 | host kernel, rather than relying on userspace to emulate. | ||
| 163 | Currently, support is limited to certain versions of | ||
| 164 | Freescale's MPIC implementation. | ||
| 165 | |||
| 166 | config KVM_XICS | ||
| 167 | bool "KVM in-kernel XICS emulation" | ||
| 168 | depends on KVM_BOOK3S_64 && !KVM_MPIC | ||
| 169 | ---help--- | ||
| 170 | Include support for the XICS (eXternal Interrupt Controller | ||
| 171 | Specification) interrupt controller architecture used on | ||
| 172 | IBM POWER (pSeries) servers. | ||
| 173 | |||
| 154 | source drivers/vhost/Kconfig | 174 | source drivers/vhost/Kconfig |
| 155 | 175 | ||
| 156 | endif # VIRTUALIZATION | 176 | endif # VIRTUALIZATION |
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index b772eded8c26..422de3f4d46c 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile | |||
| @@ -72,12 +72,18 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ | |||
| 72 | book3s_hv.o \ | 72 | book3s_hv.o \ |
| 73 | book3s_hv_interrupts.o \ | 73 | book3s_hv_interrupts.o \ |
| 74 | book3s_64_mmu_hv.o | 74 | book3s_64_mmu_hv.o |
| 75 | kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ | ||
| 76 | book3s_hv_rm_xics.o | ||
| 75 | kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ | 77 | kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ |
| 76 | book3s_hv_rmhandlers.o \ | 78 | book3s_hv_rmhandlers.o \ |
| 77 | book3s_hv_rm_mmu.o \ | 79 | book3s_hv_rm_mmu.o \ |
| 78 | book3s_64_vio_hv.o \ | 80 | book3s_64_vio_hv.o \ |
| 79 | book3s_hv_ras.o \ | 81 | book3s_hv_ras.o \ |
| 80 | book3s_hv_builtin.o | 82 | book3s_hv_builtin.o \ |
| 83 | $(kvm-book3s_64-builtin-xics-objs-y) | ||
| 84 | |||
| 85 | kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ | ||
| 86 | book3s_xics.o | ||
| 81 | 87 | ||
| 82 | kvm-book3s_64-module-objs := \ | 88 | kvm-book3s_64-module-objs := \ |
| 83 | ../../../virt/kvm/kvm_main.o \ | 89 | ../../../virt/kvm/kvm_main.o \ |
| @@ -86,6 +92,7 @@ kvm-book3s_64-module-objs := \ | |||
| 86 | emulate.o \ | 92 | emulate.o \ |
| 87 | book3s.o \ | 93 | book3s.o \ |
| 88 | book3s_64_vio.o \ | 94 | book3s_64_vio.o \ |
| 95 | book3s_rtas.o \ | ||
| 89 | $(kvm-book3s_64-objs-y) | 96 | $(kvm-book3s_64-objs-y) |
| 90 | 97 | ||
| 91 | kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs) | 98 | kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs) |
| @@ -103,6 +110,9 @@ kvm-book3s_32-objs := \ | |||
| 103 | book3s_32_mmu.o | 110 | book3s_32_mmu.o |
| 104 | kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) | 111 | kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) |
| 105 | 112 | ||
| 113 | kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o | ||
| 114 | kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o) | ||
| 115 | |||
| 106 | kvm-objs := $(kvm-objs-m) $(kvm-objs-y) | 116 | kvm-objs := $(kvm-objs-m) $(kvm-objs-y) |
| 107 | 117 | ||
| 108 | obj-$(CONFIG_KVM_440) += kvm.o | 118 | obj-$(CONFIG_KVM_440) += kvm.o |
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index a4b645285240..700df6f1d32c 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c | |||
| @@ -104,7 +104,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec) | |||
| 104 | return prio; | 104 | return prio; |
| 105 | } | 105 | } |
| 106 | 106 | ||
| 107 | static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, | 107 | void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, |
| 108 | unsigned int vec) | 108 | unsigned int vec) |
| 109 | { | 109 | { |
| 110 | unsigned long old_pending = vcpu->arch.pending_exceptions; | 110 | unsigned long old_pending = vcpu->arch.pending_exceptions; |
| @@ -160,8 +160,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, | |||
| 160 | kvmppc_book3s_queue_irqprio(vcpu, vec); | 160 | kvmppc_book3s_queue_irqprio(vcpu, vec); |
| 161 | } | 161 | } |
| 162 | 162 | ||
| 163 | void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, | 163 | void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) |
| 164 | struct kvm_interrupt *irq) | ||
| 165 | { | 164 | { |
| 166 | kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); | 165 | kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); |
| 167 | kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL); | 166 | kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL); |
| @@ -530,6 +529,21 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | |||
| 530 | val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]); | 529 | val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]); |
| 531 | break; | 530 | break; |
| 532 | #endif /* CONFIG_ALTIVEC */ | 531 | #endif /* CONFIG_ALTIVEC */ |
| 532 | case KVM_REG_PPC_DEBUG_INST: { | ||
| 533 | u32 opcode = INS_TW; | ||
| 534 | r = copy_to_user((u32 __user *)(long)reg->addr, | ||
| 535 | &opcode, sizeof(u32)); | ||
| 536 | break; | ||
| 537 | } | ||
| 538 | #ifdef CONFIG_KVM_XICS | ||
| 539 | case KVM_REG_PPC_ICP_STATE: | ||
| 540 | if (!vcpu->arch.icp) { | ||
| 541 | r = -ENXIO; | ||
| 542 | break; | ||
| 543 | } | ||
| 544 | val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu)); | ||
| 545 | break; | ||
| 546 | #endif /* CONFIG_KVM_XICS */ | ||
| 533 | default: | 547 | default: |
| 534 | r = -EINVAL; | 548 | r = -EINVAL; |
| 535 | break; | 549 | break; |
| @@ -592,6 +606,16 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | |||
| 592 | vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val); | 606 | vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val); |
| 593 | break; | 607 | break; |
| 594 | #endif /* CONFIG_ALTIVEC */ | 608 | #endif /* CONFIG_ALTIVEC */ |
| 609 | #ifdef CONFIG_KVM_XICS | ||
| 610 | case KVM_REG_PPC_ICP_STATE: | ||
| 611 | if (!vcpu->arch.icp) { | ||
| 612 | r = -ENXIO; | ||
| 613 | break; | ||
| 614 | } | ||
| 615 | r = kvmppc_xics_set_icp(vcpu, | ||
| 616 | set_reg_val(reg->id, val)); | ||
| 617 | break; | ||
| 618 | #endif /* CONFIG_KVM_XICS */ | ||
| 595 | default: | 619 | default: |
| 596 | r = -EINVAL; | 620 | r = -EINVAL; |
| 597 | break; | 621 | break; |
| @@ -607,6 +631,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | |||
| 607 | return 0; | 631 | return 0; |
| 608 | } | 632 | } |
| 609 | 633 | ||
| 634 | int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | ||
| 635 | struct kvm_guest_debug *dbg) | ||
| 636 | { | ||
| 637 | return -EINVAL; | ||
| 638 | } | ||
| 639 | |||
| 610 | void kvmppc_decrementer_func(unsigned long data) | 640 | void kvmppc_decrementer_func(unsigned long data) |
| 611 | { | 641 | { |
| 612 | struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; | 642 | struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index da98e26f6e45..5880dfb31074 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
| @@ -893,7 +893,10 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
| 893 | /* Harvest R and C */ | 893 | /* Harvest R and C */ |
| 894 | rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); | 894 | rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); |
| 895 | *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; | 895 | *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; |
| 896 | rev[i].guest_rpte = ptel | rcbits; | 896 | if (rcbits & ~rev[i].guest_rpte) { |
| 897 | rev[i].guest_rpte = ptel | rcbits; | ||
| 898 | note_hpte_modification(kvm, &rev[i]); | ||
| 899 | } | ||
| 897 | } | 900 | } |
| 898 | unlock_rmap(rmapp); | 901 | unlock_rmap(rmapp); |
| 899 | hptep[0] &= ~HPTE_V_HVLOCK; | 902 | hptep[0] &= ~HPTE_V_HVLOCK; |
| @@ -976,7 +979,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
| 976 | /* Now check and modify the HPTE */ | 979 | /* Now check and modify the HPTE */ |
| 977 | if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) { | 980 | if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) { |
| 978 | kvmppc_clear_ref_hpte(kvm, hptep, i); | 981 | kvmppc_clear_ref_hpte(kvm, hptep, i); |
| 979 | rev[i].guest_rpte |= HPTE_R_R; | 982 | if (!(rev[i].guest_rpte & HPTE_R_R)) { |
| 983 | rev[i].guest_rpte |= HPTE_R_R; | ||
| 984 | note_hpte_modification(kvm, &rev[i]); | ||
| 985 | } | ||
| 980 | ret = 1; | 986 | ret = 1; |
| 981 | } | 987 | } |
| 982 | hptep[0] &= ~HPTE_V_HVLOCK; | 988 | hptep[0] &= ~HPTE_V_HVLOCK; |
| @@ -1080,7 +1086,10 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) | |||
| 1080 | hptep[1] &= ~HPTE_R_C; | 1086 | hptep[1] &= ~HPTE_R_C; |
| 1081 | eieio(); | 1087 | eieio(); |
| 1082 | hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; | 1088 | hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; |
| 1083 | rev[i].guest_rpte |= HPTE_R_C; | 1089 | if (!(rev[i].guest_rpte & HPTE_R_C)) { |
| 1090 | rev[i].guest_rpte |= HPTE_R_C; | ||
| 1091 | note_hpte_modification(kvm, &rev[i]); | ||
| 1092 | } | ||
| 1084 | ret = 1; | 1093 | ret = 1; |
| 1085 | } | 1094 | } |
| 1086 | hptep[0] &= ~HPTE_V_HVLOCK; | 1095 | hptep[0] &= ~HPTE_V_HVLOCK; |
| @@ -1090,11 +1099,30 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) | |||
| 1090 | return ret; | 1099 | return ret; |
| 1091 | } | 1100 | } |
| 1092 | 1101 | ||
| 1102 | static void harvest_vpa_dirty(struct kvmppc_vpa *vpa, | ||
| 1103 | struct kvm_memory_slot *memslot, | ||
| 1104 | unsigned long *map) | ||
| 1105 | { | ||
| 1106 | unsigned long gfn; | ||
| 1107 | |||
| 1108 | if (!vpa->dirty || !vpa->pinned_addr) | ||
| 1109 | return; | ||
| 1110 | gfn = vpa->gpa >> PAGE_SHIFT; | ||
| 1111 | if (gfn < memslot->base_gfn || | ||
| 1112 | gfn >= memslot->base_gfn + memslot->npages) | ||
| 1113 | return; | ||
| 1114 | |||
| 1115 | vpa->dirty = false; | ||
| 1116 | if (map) | ||
| 1117 | __set_bit_le(gfn - memslot->base_gfn, map); | ||
| 1118 | } | ||
| 1119 | |||
| 1093 | long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, | 1120 | long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, |
| 1094 | unsigned long *map) | 1121 | unsigned long *map) |
| 1095 | { | 1122 | { |
| 1096 | unsigned long i; | 1123 | unsigned long i; |
| 1097 | unsigned long *rmapp; | 1124 | unsigned long *rmapp; |
| 1125 | struct kvm_vcpu *vcpu; | ||
| 1098 | 1126 | ||
| 1099 | preempt_disable(); | 1127 | preempt_disable(); |
| 1100 | rmapp = memslot->arch.rmap; | 1128 | rmapp = memslot->arch.rmap; |
| @@ -1103,6 +1131,15 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, | |||
| 1103 | __set_bit_le(i, map); | 1131 | __set_bit_le(i, map); |
| 1104 | ++rmapp; | 1132 | ++rmapp; |
| 1105 | } | 1133 | } |
| 1134 | |||
| 1135 | /* Harvest dirty bits from VPA and DTL updates */ | ||
| 1136 | /* Note: we never modify the SLB shadow buffer areas */ | ||
| 1137 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
| 1138 | spin_lock(&vcpu->arch.vpa_update_lock); | ||
| 1139 | harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map); | ||
| 1140 | harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map); | ||
| 1141 | spin_unlock(&vcpu->arch.vpa_update_lock); | ||
| 1142 | } | ||
| 1106 | preempt_enable(); | 1143 | preempt_enable(); |
| 1107 | return 0; | 1144 | return 0; |
| 1108 | } | 1145 | } |
| @@ -1114,7 +1151,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, | |||
| 1114 | unsigned long gfn = gpa >> PAGE_SHIFT; | 1151 | unsigned long gfn = gpa >> PAGE_SHIFT; |
| 1115 | struct page *page, *pages[1]; | 1152 | struct page *page, *pages[1]; |
| 1116 | int npages; | 1153 | int npages; |
| 1117 | unsigned long hva, psize, offset; | 1154 | unsigned long hva, offset; |
| 1118 | unsigned long pa; | 1155 | unsigned long pa; |
| 1119 | unsigned long *physp; | 1156 | unsigned long *physp; |
| 1120 | int srcu_idx; | 1157 | int srcu_idx; |
| @@ -1146,14 +1183,9 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, | |||
| 1146 | } | 1183 | } |
| 1147 | srcu_read_unlock(&kvm->srcu, srcu_idx); | 1184 | srcu_read_unlock(&kvm->srcu, srcu_idx); |
| 1148 | 1185 | ||
| 1149 | psize = PAGE_SIZE; | 1186 | offset = gpa & (PAGE_SIZE - 1); |
| 1150 | if (PageHuge(page)) { | ||
| 1151 | page = compound_head(page); | ||
| 1152 | psize <<= compound_order(page); | ||
| 1153 | } | ||
| 1154 | offset = gpa & (psize - 1); | ||
| 1155 | if (nb_ret) | 1187 | if (nb_ret) |
| 1156 | *nb_ret = psize - offset; | 1188 | *nb_ret = PAGE_SIZE - offset; |
| 1157 | return page_address(page) + offset; | 1189 | return page_address(page) + offset; |
| 1158 | 1190 | ||
| 1159 | err: | 1191 | err: |
| @@ -1161,11 +1193,31 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, | |||
| 1161 | return NULL; | 1193 | return NULL; |
| 1162 | } | 1194 | } |
| 1163 | 1195 | ||
| 1164 | void kvmppc_unpin_guest_page(struct kvm *kvm, void *va) | 1196 | void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa, |
| 1197 | bool dirty) | ||
| 1165 | { | 1198 | { |
| 1166 | struct page *page = virt_to_page(va); | 1199 | struct page *page = virt_to_page(va); |
| 1200 | struct kvm_memory_slot *memslot; | ||
| 1201 | unsigned long gfn; | ||
| 1202 | unsigned long *rmap; | ||
| 1203 | int srcu_idx; | ||
| 1167 | 1204 | ||
| 1168 | put_page(page); | 1205 | put_page(page); |
| 1206 | |||
| 1207 | if (!dirty || !kvm->arch.using_mmu_notifiers) | ||
| 1208 | return; | ||
| 1209 | |||
| 1210 | /* We need to mark this page dirty in the rmap chain */ | ||
| 1211 | gfn = gpa >> PAGE_SHIFT; | ||
| 1212 | srcu_idx = srcu_read_lock(&kvm->srcu); | ||
| 1213 | memslot = gfn_to_memslot(kvm, gfn); | ||
| 1214 | if (memslot) { | ||
| 1215 | rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; | ||
| 1216 | lock_rmap(rmap); | ||
| 1217 | *rmap |= KVMPPC_RMAP_CHANGED; | ||
| 1218 | unlock_rmap(rmap); | ||
| 1219 | } | ||
| 1220 | srcu_read_unlock(&kvm->srcu, srcu_idx); | ||
| 1169 | } | 1221 | } |
| 1170 | 1222 | ||
| 1171 | /* | 1223 | /* |
| @@ -1193,16 +1245,36 @@ struct kvm_htab_ctx { | |||
| 1193 | 1245 | ||
| 1194 | #define HPTE_SIZE (2 * sizeof(unsigned long)) | 1246 | #define HPTE_SIZE (2 * sizeof(unsigned long)) |
| 1195 | 1247 | ||
| 1248 | /* | ||
| 1249 | * Returns 1 if this HPT entry has been modified or has pending | ||
| 1250 | * R/C bit changes. | ||
| 1251 | */ | ||
| 1252 | static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp) | ||
| 1253 | { | ||
| 1254 | unsigned long rcbits_unset; | ||
| 1255 | |||
| 1256 | if (revp->guest_rpte & HPTE_GR_MODIFIED) | ||
| 1257 | return 1; | ||
| 1258 | |||
| 1259 | /* Also need to consider changes in reference and changed bits */ | ||
| 1260 | rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); | ||
| 1261 | if ((hptp[0] & HPTE_V_VALID) && (hptp[1] & rcbits_unset)) | ||
| 1262 | return 1; | ||
| 1263 | |||
| 1264 | return 0; | ||
| 1265 | } | ||
| 1266 | |||
| 1196 | static long record_hpte(unsigned long flags, unsigned long *hptp, | 1267 | static long record_hpte(unsigned long flags, unsigned long *hptp, |
| 1197 | unsigned long *hpte, struct revmap_entry *revp, | 1268 | unsigned long *hpte, struct revmap_entry *revp, |
| 1198 | int want_valid, int first_pass) | 1269 | int want_valid, int first_pass) |
| 1199 | { | 1270 | { |
| 1200 | unsigned long v, r; | 1271 | unsigned long v, r; |
| 1272 | unsigned long rcbits_unset; | ||
| 1201 | int ok = 1; | 1273 | int ok = 1; |
| 1202 | int valid, dirty; | 1274 | int valid, dirty; |
| 1203 | 1275 | ||
| 1204 | /* Unmodified entries are uninteresting except on the first pass */ | 1276 | /* Unmodified entries are uninteresting except on the first pass */ |
| 1205 | dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); | 1277 | dirty = hpte_dirty(revp, hptp); |
| 1206 | if (!first_pass && !dirty) | 1278 | if (!first_pass && !dirty) |
| 1207 | return 0; | 1279 | return 0; |
| 1208 | 1280 | ||
| @@ -1223,16 +1295,28 @@ static long record_hpte(unsigned long flags, unsigned long *hptp, | |||
| 1223 | while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) | 1295 | while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) |
| 1224 | cpu_relax(); | 1296 | cpu_relax(); |
| 1225 | v = hptp[0]; | 1297 | v = hptp[0]; |
| 1298 | |||
| 1299 | /* re-evaluate valid and dirty from synchronized HPTE value */ | ||
| 1300 | valid = !!(v & HPTE_V_VALID); | ||
| 1301 | dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); | ||
| 1302 | |||
| 1303 | /* Harvest R and C into guest view if necessary */ | ||
| 1304 | rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); | ||
| 1305 | if (valid && (rcbits_unset & hptp[1])) { | ||
| 1306 | revp->guest_rpte |= (hptp[1] & (HPTE_R_R | HPTE_R_C)) | | ||
| 1307 | HPTE_GR_MODIFIED; | ||
| 1308 | dirty = 1; | ||
| 1309 | } | ||
| 1310 | |||
| 1226 | if (v & HPTE_V_ABSENT) { | 1311 | if (v & HPTE_V_ABSENT) { |
| 1227 | v &= ~HPTE_V_ABSENT; | 1312 | v &= ~HPTE_V_ABSENT; |
| 1228 | v |= HPTE_V_VALID; | 1313 | v |= HPTE_V_VALID; |
| 1314 | valid = 1; | ||
| 1229 | } | 1315 | } |
| 1230 | /* re-evaluate valid and dirty from synchronized HPTE value */ | ||
| 1231 | valid = !!(v & HPTE_V_VALID); | ||
| 1232 | if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED)) | 1316 | if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED)) |
| 1233 | valid = 0; | 1317 | valid = 0; |
| 1234 | r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C)); | 1318 | |
| 1235 | dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); | 1319 | r = revp->guest_rpte; |
| 1236 | /* only clear modified if this is the right sort of entry */ | 1320 | /* only clear modified if this is the right sort of entry */ |
| 1237 | if (valid == want_valid && dirty) { | 1321 | if (valid == want_valid && dirty) { |
| 1238 | r &= ~HPTE_GR_MODIFIED; | 1322 | r &= ~HPTE_GR_MODIFIED; |
| @@ -1288,7 +1372,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, | |||
| 1288 | /* Skip uninteresting entries, i.e. clean on not-first pass */ | 1372 | /* Skip uninteresting entries, i.e. clean on not-first pass */ |
| 1289 | if (!first_pass) { | 1373 | if (!first_pass) { |
| 1290 | while (i < kvm->arch.hpt_npte && | 1374 | while (i < kvm->arch.hpt_npte && |
| 1291 | !(revp->guest_rpte & HPTE_GR_MODIFIED)) { | 1375 | !hpte_dirty(revp, hptp)) { |
| 1292 | ++i; | 1376 | ++i; |
| 1293 | hptp += 2; | 1377 | hptp += 2; |
| 1294 | ++revp; | 1378 | ++revp; |
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 836c56975e21..1f6344c4408d 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c | |||
| @@ -194,7 +194,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 194 | run->papr_hcall.args[i] = gpr; | 194 | run->papr_hcall.args[i] = gpr; |
| 195 | } | 195 | } |
| 196 | 196 | ||
| 197 | emulated = EMULATE_DO_PAPR; | 197 | run->exit_reason = KVM_EXIT_PAPR_HCALL; |
| 198 | vcpu->arch.hcall_needed = 1; | ||
| 199 | emulated = EMULATE_EXIT_USER; | ||
| 198 | break; | 200 | break; |
| 199 | } | 201 | } |
| 200 | #endif | 202 | #endif |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index f5416934932b..9de24f8e03c7 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
| @@ -66,6 +66,31 @@ | |||
| 66 | static void kvmppc_end_cede(struct kvm_vcpu *vcpu); | 66 | static void kvmppc_end_cede(struct kvm_vcpu *vcpu); |
| 67 | static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); | 67 | static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); |
| 68 | 68 | ||
| 69 | void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) | ||
| 70 | { | ||
| 71 | int me; | ||
| 72 | int cpu = vcpu->cpu; | ||
| 73 | wait_queue_head_t *wqp; | ||
| 74 | |||
| 75 | wqp = kvm_arch_vcpu_wq(vcpu); | ||
| 76 | if (waitqueue_active(wqp)) { | ||
| 77 | wake_up_interruptible(wqp); | ||
| 78 | ++vcpu->stat.halt_wakeup; | ||
| 79 | } | ||
| 80 | |||
| 81 | me = get_cpu(); | ||
| 82 | |||
| 83 | /* CPU points to the first thread of the core */ | ||
| 84 | if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) { | ||
| 85 | int real_cpu = cpu + vcpu->arch.ptid; | ||
| 86 | if (paca[real_cpu].kvm_hstate.xics_phys) | ||
| 87 | xics_wake_cpu(real_cpu); | ||
| 88 | else if (cpu_online(cpu)) | ||
| 89 | smp_send_reschedule(cpu); | ||
| 90 | } | ||
| 91 | put_cpu(); | ||
| 92 | } | ||
| 93 | |||
| 69 | /* | 94 | /* |
| 70 | * We use the vcpu_load/put functions to measure stolen time. | 95 | * We use the vcpu_load/put functions to measure stolen time. |
| 71 | * Stolen time is counted as time when either the vcpu is able to | 96 | * Stolen time is counted as time when either the vcpu is able to |
| @@ -259,7 +284,7 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, | |||
| 259 | len = ((struct reg_vpa *)va)->length.hword; | 284 | len = ((struct reg_vpa *)va)->length.hword; |
| 260 | else | 285 | else |
| 261 | len = ((struct reg_vpa *)va)->length.word; | 286 | len = ((struct reg_vpa *)va)->length.word; |
| 262 | kvmppc_unpin_guest_page(kvm, va); | 287 | kvmppc_unpin_guest_page(kvm, va, vpa, false); |
| 263 | 288 | ||
| 264 | /* Check length */ | 289 | /* Check length */ |
| 265 | if (len > nb || len < sizeof(struct reg_vpa)) | 290 | if (len > nb || len < sizeof(struct reg_vpa)) |
| @@ -359,13 +384,13 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap) | |||
| 359 | va = NULL; | 384 | va = NULL; |
| 360 | nb = 0; | 385 | nb = 0; |
| 361 | if (gpa) | 386 | if (gpa) |
| 362 | va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb); | 387 | va = kvmppc_pin_guest_page(kvm, gpa, &nb); |
| 363 | spin_lock(&vcpu->arch.vpa_update_lock); | 388 | spin_lock(&vcpu->arch.vpa_update_lock); |
| 364 | if (gpa == vpap->next_gpa) | 389 | if (gpa == vpap->next_gpa) |
| 365 | break; | 390 | break; |
| 366 | /* sigh... unpin that one and try again */ | 391 | /* sigh... unpin that one and try again */ |
| 367 | if (va) | 392 | if (va) |
| 368 | kvmppc_unpin_guest_page(kvm, va); | 393 | kvmppc_unpin_guest_page(kvm, va, gpa, false); |
| 369 | } | 394 | } |
| 370 | 395 | ||
| 371 | vpap->update_pending = 0; | 396 | vpap->update_pending = 0; |
| @@ -375,12 +400,15 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap) | |||
| 375 | * has changed the mappings underlying guest memory, | 400 | * has changed the mappings underlying guest memory, |
| 376 | * so unregister the region. | 401 | * so unregister the region. |
| 377 | */ | 402 | */ |
| 378 | kvmppc_unpin_guest_page(kvm, va); | 403 | kvmppc_unpin_guest_page(kvm, va, gpa, false); |
| 379 | va = NULL; | 404 | va = NULL; |
| 380 | } | 405 | } |
| 381 | if (vpap->pinned_addr) | 406 | if (vpap->pinned_addr) |
| 382 | kvmppc_unpin_guest_page(kvm, vpap->pinned_addr); | 407 | kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa, |
| 408 | vpap->dirty); | ||
| 409 | vpap->gpa = gpa; | ||
| 383 | vpap->pinned_addr = va; | 410 | vpap->pinned_addr = va; |
| 411 | vpap->dirty = false; | ||
| 384 | if (va) | 412 | if (va) |
| 385 | vpap->pinned_end = va + vpap->len; | 413 | vpap->pinned_end = va + vpap->len; |
| 386 | } | 414 | } |
| @@ -472,6 +500,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, | |||
| 472 | /* order writing *dt vs. writing vpa->dtl_idx */ | 500 | /* order writing *dt vs. writing vpa->dtl_idx */ |
| 473 | smp_wmb(); | 501 | smp_wmb(); |
| 474 | vpa->dtl_idx = ++vcpu->arch.dtl_index; | 502 | vpa->dtl_idx = ++vcpu->arch.dtl_index; |
| 503 | vcpu->arch.dtl.dirty = true; | ||
| 475 | } | 504 | } |
| 476 | 505 | ||
| 477 | int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | 506 | int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) |
| @@ -479,7 +508,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | |||
| 479 | unsigned long req = kvmppc_get_gpr(vcpu, 3); | 508 | unsigned long req = kvmppc_get_gpr(vcpu, 3); |
| 480 | unsigned long target, ret = H_SUCCESS; | 509 | unsigned long target, ret = H_SUCCESS; |
| 481 | struct kvm_vcpu *tvcpu; | 510 | struct kvm_vcpu *tvcpu; |
| 482 | int idx; | 511 | int idx, rc; |
| 483 | 512 | ||
| 484 | switch (req) { | 513 | switch (req) { |
| 485 | case H_ENTER: | 514 | case H_ENTER: |
| @@ -515,6 +544,28 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | |||
| 515 | kvmppc_get_gpr(vcpu, 5), | 544 | kvmppc_get_gpr(vcpu, 5), |
| 516 | kvmppc_get_gpr(vcpu, 6)); | 545 | kvmppc_get_gpr(vcpu, 6)); |
| 517 | break; | 546 | break; |
| 547 | case H_RTAS: | ||
| 548 | if (list_empty(&vcpu->kvm->arch.rtas_tokens)) | ||
| 549 | return RESUME_HOST; | ||
| 550 | |||
| 551 | rc = kvmppc_rtas_hcall(vcpu); | ||
| 552 | |||
| 553 | if (rc == -ENOENT) | ||
| 554 | return RESUME_HOST; | ||
| 555 | else if (rc == 0) | ||
| 556 | break; | ||
| 557 | |||
| 558 | /* Send the error out to userspace via KVM_RUN */ | ||
| 559 | return rc; | ||
| 560 | |||
| 561 | case H_XIRR: | ||
| 562 | case H_CPPR: | ||
| 563 | case H_EOI: | ||
| 564 | case H_IPI: | ||
| 565 | if (kvmppc_xics_enabled(vcpu)) { | ||
| 566 | ret = kvmppc_xics_hcall(vcpu, req); | ||
| 567 | break; | ||
| 568 | } /* fallthrough */ | ||
| 518 | default: | 569 | default: |
| 519 | return RESUME_HOST; | 570 | return RESUME_HOST; |
| 520 | } | 571 | } |
| @@ -913,15 +964,19 @@ out: | |||
| 913 | return ERR_PTR(err); | 964 | return ERR_PTR(err); |
| 914 | } | 965 | } |
| 915 | 966 | ||
| 967 | static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa) | ||
| 968 | { | ||
| 969 | if (vpa->pinned_addr) | ||
| 970 | kvmppc_unpin_guest_page(kvm, vpa->pinned_addr, vpa->gpa, | ||
| 971 | vpa->dirty); | ||
| 972 | } | ||
| 973 | |||
| 916 | void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) | 974 | void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) |
| 917 | { | 975 | { |
| 918 | spin_lock(&vcpu->arch.vpa_update_lock); | 976 | spin_lock(&vcpu->arch.vpa_update_lock); |
| 919 | if (vcpu->arch.dtl.pinned_addr) | 977 | unpin_vpa(vcpu->kvm, &vcpu->arch.dtl); |
| 920 | kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr); | 978 | unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow); |
| 921 | if (vcpu->arch.slb_shadow.pinned_addr) | 979 | unpin_vpa(vcpu->kvm, &vcpu->arch.vpa); |
| 922 | kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr); | ||
| 923 | if (vcpu->arch.vpa.pinned_addr) | ||
| 924 | kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr); | ||
| 925 | spin_unlock(&vcpu->arch.vpa_update_lock); | 980 | spin_unlock(&vcpu->arch.vpa_update_lock); |
| 926 | kvm_vcpu_uninit(vcpu); | 981 | kvm_vcpu_uninit(vcpu); |
| 927 | kmem_cache_free(kvm_vcpu_cache, vcpu); | 982 | kmem_cache_free(kvm_vcpu_cache, vcpu); |
| @@ -955,7 +1010,6 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu) | |||
| 955 | } | 1010 | } |
| 956 | 1011 | ||
| 957 | extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); | 1012 | extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); |
| 958 | extern void xics_wake_cpu(int cpu); | ||
| 959 | 1013 | ||
| 960 | static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, | 1014 | static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, |
| 961 | struct kvm_vcpu *vcpu) | 1015 | struct kvm_vcpu *vcpu) |
| @@ -1330,9 +1384,12 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
| 1330 | break; | 1384 | break; |
| 1331 | vc->runner = vcpu; | 1385 | vc->runner = vcpu; |
| 1332 | n_ceded = 0; | 1386 | n_ceded = 0; |
| 1333 | list_for_each_entry(v, &vc->runnable_threads, arch.run_list) | 1387 | list_for_each_entry(v, &vc->runnable_threads, arch.run_list) { |
| 1334 | if (!v->arch.pending_exceptions) | 1388 | if (!v->arch.pending_exceptions) |
| 1335 | n_ceded += v->arch.ceded; | 1389 | n_ceded += v->arch.ceded; |
| 1390 | else | ||
| 1391 | v->arch.ceded = 0; | ||
| 1392 | } | ||
| 1336 | if (n_ceded == vc->n_runnable) | 1393 | if (n_ceded == vc->n_runnable) |
| 1337 | kvmppc_vcore_blocked(vc); | 1394 | kvmppc_vcore_blocked(vc); |
| 1338 | else | 1395 | else |
| @@ -1645,12 +1702,12 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, | |||
| 1645 | 1702 | ||
| 1646 | void kvmppc_core_commit_memory_region(struct kvm *kvm, | 1703 | void kvmppc_core_commit_memory_region(struct kvm *kvm, |
| 1647 | struct kvm_userspace_memory_region *mem, | 1704 | struct kvm_userspace_memory_region *mem, |
| 1648 | struct kvm_memory_slot old) | 1705 | const struct kvm_memory_slot *old) |
| 1649 | { | 1706 | { |
| 1650 | unsigned long npages = mem->memory_size >> PAGE_SHIFT; | 1707 | unsigned long npages = mem->memory_size >> PAGE_SHIFT; |
| 1651 | struct kvm_memory_slot *memslot; | 1708 | struct kvm_memory_slot *memslot; |
| 1652 | 1709 | ||
| 1653 | if (npages && old.npages) { | 1710 | if (npages && old->npages) { |
| 1654 | /* | 1711 | /* |
| 1655 | * If modifying a memslot, reset all the rmap dirty bits. | 1712 | * If modifying a memslot, reset all the rmap dirty bits. |
| 1656 | * If this is a new memslot, we don't need to do anything | 1713 | * If this is a new memslot, we don't need to do anything |
| @@ -1827,6 +1884,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) | |||
| 1827 | cpumask_setall(&kvm->arch.need_tlb_flush); | 1884 | cpumask_setall(&kvm->arch.need_tlb_flush); |
| 1828 | 1885 | ||
| 1829 | INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); | 1886 | INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); |
| 1887 | INIT_LIST_HEAD(&kvm->arch.rtas_tokens); | ||
| 1830 | 1888 | ||
| 1831 | kvm->arch.rma = NULL; | 1889 | kvm->arch.rma = NULL; |
| 1832 | 1890 | ||
| @@ -1872,6 +1930,8 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) | |||
| 1872 | kvm->arch.rma = NULL; | 1930 | kvm->arch.rma = NULL; |
| 1873 | } | 1931 | } |
| 1874 | 1932 | ||
| 1933 | kvmppc_rtas_tokens_free(kvm); | ||
| 1934 | |||
| 1875 | kvmppc_free_hpt(kvm); | 1935 | kvmppc_free_hpt(kvm); |
| 1876 | WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); | 1936 | WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); |
| 1877 | } | 1937 | } |
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 19c93bae1aea..6dcbb49105a4 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c | |||
| @@ -97,17 +97,6 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, | |||
| 97 | } | 97 | } |
| 98 | EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); | 98 | EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); |
| 99 | 99 | ||
| 100 | /* | ||
| 101 | * Note modification of an HPTE; set the HPTE modified bit | ||
| 102 | * if anyone is interested. | ||
| 103 | */ | ||
| 104 | static inline void note_hpte_modification(struct kvm *kvm, | ||
| 105 | struct revmap_entry *rev) | ||
| 106 | { | ||
| 107 | if (atomic_read(&kvm->arch.hpte_mod_interest)) | ||
| 108 | rev->guest_rpte |= HPTE_GR_MODIFIED; | ||
| 109 | } | ||
| 110 | |||
| 111 | /* Remove this HPTE from the chain for a real page */ | 100 | /* Remove this HPTE from the chain for a real page */ |
| 112 | static void remove_revmap_chain(struct kvm *kvm, long pte_index, | 101 | static void remove_revmap_chain(struct kvm *kvm, long pte_index, |
| 113 | struct revmap_entry *rev, | 102 | struct revmap_entry *rev, |
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c new file mode 100644 index 000000000000..b4b0082f761c --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c | |||
| @@ -0,0 +1,406 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2012 Michael Ellerman, IBM Corporation. | ||
| 3 | * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify | ||
| 6 | * it under the terms of the GNU General Public License, version 2, as | ||
| 7 | * published by the Free Software Foundation. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/kernel.h> | ||
| 11 | #include <linux/kvm_host.h> | ||
| 12 | #include <linux/err.h> | ||
| 13 | |||
| 14 | #include <asm/kvm_book3s.h> | ||
| 15 | #include <asm/kvm_ppc.h> | ||
| 16 | #include <asm/hvcall.h> | ||
| 17 | #include <asm/xics.h> | ||
| 18 | #include <asm/debug.h> | ||
| 19 | #include <asm/synch.h> | ||
| 20 | #include <asm/ppc-opcode.h> | ||
| 21 | |||
| 22 | #include "book3s_xics.h" | ||
| 23 | |||
| 24 | #define DEBUG_PASSUP | ||
| 25 | |||
| 26 | static inline void rm_writeb(unsigned long paddr, u8 val) | ||
| 27 | { | ||
| 28 | __asm__ __volatile__("sync; stbcix %0,0,%1" | ||
| 29 | : : "r" (val), "r" (paddr) : "memory"); | ||
| 30 | } | ||
| 31 | |||
| 32 | static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, | ||
| 33 | struct kvm_vcpu *this_vcpu) | ||
| 34 | { | ||
| 35 | struct kvmppc_icp *this_icp = this_vcpu->arch.icp; | ||
| 36 | unsigned long xics_phys; | ||
| 37 | int cpu; | ||
| 38 | |||
| 39 | /* Mark the target VCPU as having an interrupt pending */ | ||
| 40 | vcpu->stat.queue_intr++; | ||
| 41 | set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); | ||
| 42 | |||
| 43 | /* Kick self ? Just set MER and return */ | ||
| 44 | if (vcpu == this_vcpu) { | ||
| 45 | mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_MER); | ||
| 46 | return; | ||
| 47 | } | ||
| 48 | |||
| 49 | /* Check if the core is loaded, if not, too hard */ | ||
| 50 | cpu = vcpu->cpu; | ||
| 51 | if (cpu < 0 || cpu >= nr_cpu_ids) { | ||
| 52 | this_icp->rm_action |= XICS_RM_KICK_VCPU; | ||
| 53 | this_icp->rm_kick_target = vcpu; | ||
| 54 | return; | ||
| 55 | } | ||
| 56 | /* In SMT cpu will always point to thread 0, we adjust it */ | ||
| 57 | cpu += vcpu->arch.ptid; | ||
| 58 | |||
| 59 | /* Not too hard, then poke the target */ | ||
| 60 | xics_phys = paca[cpu].kvm_hstate.xics_phys; | ||
| 61 | rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY); | ||
| 62 | } | ||
| 63 | |||
| 64 | static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu) | ||
| 65 | { | ||
| 66 | /* Note: Only called on self ! */ | ||
| 67 | clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, | ||
| 68 | &vcpu->arch.pending_exceptions); | ||
| 69 | mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER); | ||
| 70 | } | ||
| 71 | |||
| 72 | static inline bool icp_rm_try_update(struct kvmppc_icp *icp, | ||
| 73 | union kvmppc_icp_state old, | ||
| 74 | union kvmppc_icp_state new) | ||
| 75 | { | ||
| 76 | struct kvm_vcpu *this_vcpu = local_paca->kvm_hstate.kvm_vcpu; | ||
| 77 | bool success; | ||
| 78 | |||
| 79 | /* Calculate new output value */ | ||
| 80 | new.out_ee = (new.xisr && (new.pending_pri < new.cppr)); | ||
| 81 | |||
| 82 | /* Attempt atomic update */ | ||
| 83 | success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw; | ||
| 84 | if (!success) | ||
| 85 | goto bail; | ||
| 86 | |||
| 87 | /* | ||
| 88 | * Check for output state update | ||
| 89 | * | ||
| 90 | * Note that this is racy since another processor could be updating | ||
| 91 | * the state already. This is why we never clear the interrupt output | ||
| 92 | * here, we only ever set it. The clear only happens prior to doing | ||
| 93 | * an update and only by the processor itself. Currently we do it | ||
| 94 | * in Accept (H_XIRR) and Up_Cppr (H_XPPR). | ||
| 95 | * | ||
| 96 | * We also do not try to figure out whether the EE state has changed, | ||
| 97 | * we unconditionally set it if the new state calls for it. The reason | ||
| 98 | * for that is that we opportunistically remove the pending interrupt | ||
| 99 | * flag when raising CPPR, so we need to set it back here if an | ||
| 100 | * interrupt is still pending. | ||
| 101 | */ | ||
| 102 | if (new.out_ee) | ||
| 103 | icp_rm_set_vcpu_irq(icp->vcpu, this_vcpu); | ||
| 104 | |||
| 105 | /* Expose the state change for debug purposes */ | ||
| 106 | this_vcpu->arch.icp->rm_dbgstate = new; | ||
| 107 | this_vcpu->arch.icp->rm_dbgtgt = icp->vcpu; | ||
| 108 | |||
| 109 | bail: | ||
| 110 | return success; | ||
| 111 | } | ||
| 112 | |||
| 113 | static inline int check_too_hard(struct kvmppc_xics *xics, | ||
| 114 | struct kvmppc_icp *icp) | ||
| 115 | { | ||
| 116 | return (xics->real_mode_dbg || icp->rm_action) ? H_TOO_HARD : H_SUCCESS; | ||
| 117 | } | ||
| 118 | |||
| 119 | static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp, | ||
| 120 | u8 new_cppr) | ||
| 121 | { | ||
| 122 | union kvmppc_icp_state old_state, new_state; | ||
| 123 | bool resend; | ||
| 124 | |||
| 125 | /* | ||
| 126 | * This handles several related states in one operation: | ||
| 127 | * | ||
| 128 | * ICP State: Down_CPPR | ||
| 129 | * | ||
| 130 | * Load CPPR with new value and if the XISR is 0 | ||
| 131 | * then check for resends: | ||
| 132 | * | ||
| 133 | * ICP State: Resend | ||
| 134 | * | ||
| 135 | * If MFRR is more favored than CPPR, check for IPIs | ||
| 136 | * and notify ICS of a potential resend. This is done | ||
| 137 | * asynchronously (when used in real mode, we will have | ||
| 138 | * to exit here). | ||
| 139 | * | ||
| 140 | * We do not handle the complete Check_IPI as documented | ||
| 141 | * here. In the PAPR, this state will be used for both | ||
| 142 | * Set_MFRR and Down_CPPR. However, we know that we aren't | ||
| 143 | * changing the MFRR state here so we don't need to handle | ||
| 144 | * the case of an MFRR causing a reject of a pending irq, | ||
| 145 | * this will have been handled when the MFRR was set in the | ||
| 146 | * first place. | ||
| 147 | * | ||
| 148 | * Thus we don't have to handle rejects, only resends. | ||
| 149 | * | ||
| 150 | * When implementing real mode for HV KVM, resend will lead to | ||
| 151 | * a H_TOO_HARD return and the whole transaction will be handled | ||
| 152 | * in virtual mode. | ||
| 153 | */ | ||
| 154 | do { | ||
| 155 | old_state = new_state = ACCESS_ONCE(icp->state); | ||
| 156 | |||
| 157 | /* Down_CPPR */ | ||
| 158 | new_state.cppr = new_cppr; | ||
| 159 | |||
| 160 | /* | ||
| 161 | * Cut down Resend / Check_IPI / IPI | ||
| 162 | * | ||
| 163 | * The logic is that we cannot have a pending interrupt | ||
| 164 | * trumped by an IPI at this point (see above), so we | ||
| 165 | * know that either the pending interrupt is already an | ||
| 166 | * IPI (in which case we don't care to override it) or | ||
| 167 | * it's either more favored than us or non existent | ||
| 168 | */ | ||
| 169 | if (new_state.mfrr < new_cppr && | ||
| 170 | new_state.mfrr <= new_state.pending_pri) { | ||
| 171 | new_state.pending_pri = new_state.mfrr; | ||
| 172 | new_state.xisr = XICS_IPI; | ||
| 173 | } | ||
| 174 | |||
| 175 | /* Latch/clear resend bit */ | ||
| 176 | resend = new_state.need_resend; | ||
| 177 | new_state.need_resend = 0; | ||
| 178 | |||
| 179 | } while (!icp_rm_try_update(icp, old_state, new_state)); | ||
| 180 | |||
| 181 | /* | ||
| 182 | * Now handle resend checks. Those are asynchronous to the ICP | ||
| 183 | * state update in HW (ie bus transactions) so we can handle them | ||
| 184 | * separately here as well. | ||
| 185 | */ | ||
| 186 | if (resend) | ||
| 187 | icp->rm_action |= XICS_RM_CHECK_RESEND; | ||
| 188 | } | ||
| 189 | |||
| 190 | |||
| 191 | unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu) | ||
| 192 | { | ||
| 193 | union kvmppc_icp_state old_state, new_state; | ||
| 194 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | ||
| 195 | struct kvmppc_icp *icp = vcpu->arch.icp; | ||
| 196 | u32 xirr; | ||
| 197 | |||
| 198 | if (!xics || !xics->real_mode) | ||
| 199 | return H_TOO_HARD; | ||
| 200 | |||
| 201 | /* First clear the interrupt */ | ||
| 202 | icp_rm_clr_vcpu_irq(icp->vcpu); | ||
| 203 | |||
| 204 | /* | ||
| 205 | * ICP State: Accept_Interrupt | ||
| 206 | * | ||
| 207 | * Return the pending interrupt (if any) along with the | ||
| 208 | * current CPPR, then clear the XISR & set CPPR to the | ||
| 209 | * pending priority | ||
| 210 | */ | ||
| 211 | do { | ||
| 212 | old_state = new_state = ACCESS_ONCE(icp->state); | ||
| 213 | |||
| 214 | xirr = old_state.xisr | (((u32)old_state.cppr) << 24); | ||
| 215 | if (!old_state.xisr) | ||
| 216 | break; | ||
| 217 | new_state.cppr = new_state.pending_pri; | ||
| 218 | new_state.pending_pri = 0xff; | ||
| 219 | new_state.xisr = 0; | ||
| 220 | |||
| 221 | } while (!icp_rm_try_update(icp, old_state, new_state)); | ||
| 222 | |||
| 223 | /* Return the result in GPR4 */ | ||
| 224 | vcpu->arch.gpr[4] = xirr; | ||
| 225 | |||
| 226 | return check_too_hard(xics, icp); | ||
| 227 | } | ||
| 228 | |||
| 229 | int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, | ||
| 230 | unsigned long mfrr) | ||
| 231 | { | ||
| 232 | union kvmppc_icp_state old_state, new_state; | ||
| 233 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | ||
| 234 | struct kvmppc_icp *icp, *this_icp = vcpu->arch.icp; | ||
| 235 | u32 reject; | ||
| 236 | bool resend; | ||
| 237 | bool local; | ||
| 238 | |||
| 239 | if (!xics || !xics->real_mode) | ||
| 240 | return H_TOO_HARD; | ||
| 241 | |||
| 242 | local = this_icp->server_num == server; | ||
| 243 | if (local) | ||
| 244 | icp = this_icp; | ||
| 245 | else | ||
| 246 | icp = kvmppc_xics_find_server(vcpu->kvm, server); | ||
| 247 | if (!icp) | ||
| 248 | return H_PARAMETER; | ||
| 249 | |||
| 250 | /* | ||
| 251 | * ICP state: Set_MFRR | ||
| 252 | * | ||
| 253 | * If the CPPR is more favored than the new MFRR, then | ||
| 254 | * nothing needs to be done as there can be no XISR to | ||
| 255 | * reject. | ||
| 256 | * | ||
| 257 | * If the CPPR is less favored, then we might be replacing | ||
| 258 | * an interrupt, and thus need to possibly reject it as in | ||
| 259 | * | ||
| 260 | * ICP state: Check_IPI | ||
| 261 | */ | ||
| 262 | do { | ||
| 263 | old_state = new_state = ACCESS_ONCE(icp->state); | ||
| 264 | |||
| 265 | /* Set_MFRR */ | ||
| 266 | new_state.mfrr = mfrr; | ||
| 267 | |||
| 268 | /* Check_IPI */ | ||
| 269 | reject = 0; | ||
| 270 | resend = false; | ||
| 271 | if (mfrr < new_state.cppr) { | ||
| 272 | /* Reject a pending interrupt if not an IPI */ | ||
| 273 | if (mfrr <= new_state.pending_pri) | ||
| 274 | reject = new_state.xisr; | ||
| 275 | new_state.pending_pri = mfrr; | ||
| 276 | new_state.xisr = XICS_IPI; | ||
| 277 | } | ||
| 278 | |||
| 279 | if (mfrr > old_state.mfrr && mfrr > new_state.cppr) { | ||
| 280 | resend = new_state.need_resend; | ||
| 281 | new_state.need_resend = 0; | ||
| 282 | } | ||
| 283 | } while (!icp_rm_try_update(icp, old_state, new_state)); | ||
| 284 | |||
| 285 | /* Pass rejects to virtual mode */ | ||
| 286 | if (reject && reject != XICS_IPI) { | ||
| 287 | this_icp->rm_action |= XICS_RM_REJECT; | ||
| 288 | this_icp->rm_reject = reject; | ||
| 289 | } | ||
| 290 | |||
| 291 | /* Pass resends to virtual mode */ | ||
| 292 | if (resend) | ||
| 293 | this_icp->rm_action |= XICS_RM_CHECK_RESEND; | ||
| 294 | |||
| 295 | return check_too_hard(xics, this_icp); | ||
| 296 | } | ||
| 297 | |||
| 298 | int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) | ||
| 299 | { | ||
| 300 | union kvmppc_icp_state old_state, new_state; | ||
| 301 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | ||
| 302 | struct kvmppc_icp *icp = vcpu->arch.icp; | ||
| 303 | u32 reject; | ||
| 304 | |||
| 305 | if (!xics || !xics->real_mode) | ||
| 306 | return H_TOO_HARD; | ||
| 307 | |||
| 308 | /* | ||
| 309 | * ICP State: Set_CPPR | ||
| 310 | * | ||
| 311 | * We can safely compare the new value with the current | ||
| 312 | * value outside of the transaction as the CPPR is only | ||
| 313 | * ever changed by the processor on itself | ||
| 314 | */ | ||
| 315 | if (cppr > icp->state.cppr) { | ||
| 316 | icp_rm_down_cppr(xics, icp, cppr); | ||
| 317 | goto bail; | ||
| 318 | } else if (cppr == icp->state.cppr) | ||
| 319 | return H_SUCCESS; | ||
| 320 | |||
| 321 | /* | ||
| 322 | * ICP State: Up_CPPR | ||
| 323 | * | ||
| 324 | * The processor is raising its priority, this can result | ||
| 325 | * in a rejection of a pending interrupt: | ||
| 326 | * | ||
| 327 | * ICP State: Reject_Current | ||
| 328 | * | ||
| 329 | * We can remove EE from the current processor, the update | ||
| 330 | * transaction will set it again if needed | ||
| 331 | */ | ||
| 332 | icp_rm_clr_vcpu_irq(icp->vcpu); | ||
| 333 | |||
| 334 | do { | ||
| 335 | old_state = new_state = ACCESS_ONCE(icp->state); | ||
| 336 | |||
| 337 | reject = 0; | ||
| 338 | new_state.cppr = cppr; | ||
| 339 | |||
| 340 | if (cppr <= new_state.pending_pri) { | ||
| 341 | reject = new_state.xisr; | ||
| 342 | new_state.xisr = 0; | ||
| 343 | new_state.pending_pri = 0xff; | ||
| 344 | } | ||
| 345 | |||
| 346 | } while (!icp_rm_try_update(icp, old_state, new_state)); | ||
| 347 | |||
| 348 | /* Pass rejects to virtual mode */ | ||
| 349 | if (reject && reject != XICS_IPI) { | ||
| 350 | icp->rm_action |= XICS_RM_REJECT; | ||
| 351 | icp->rm_reject = reject; | ||
| 352 | } | ||
| 353 | bail: | ||
| 354 | return check_too_hard(xics, icp); | ||
| 355 | } | ||
| 356 | |||
| 357 | int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) | ||
| 358 | { | ||
| 359 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | ||
| 360 | struct kvmppc_icp *icp = vcpu->arch.icp; | ||
| 361 | struct kvmppc_ics *ics; | ||
| 362 | struct ics_irq_state *state; | ||
| 363 | u32 irq = xirr & 0x00ffffff; | ||
| 364 | u16 src; | ||
| 365 | |||
| 366 | if (!xics || !xics->real_mode) | ||
| 367 | return H_TOO_HARD; | ||
| 368 | |||
| 369 | /* | ||
| 370 | * ICP State: EOI | ||
| 371 | * | ||
| 372 | * Note: If EOI is incorrectly used by SW to lower the CPPR | ||
| 373 | * value (ie more favored), we do not check for rejection of | ||
| 374 | * a pending interrupt, this is a SW error and PAPR sepcifies | ||
| 375 | * that we don't have to deal with it. | ||
| 376 | * | ||
| 377 | * The sending of an EOI to the ICS is handled after the | ||
| 378 | * CPPR update | ||
| 379 | * | ||
| 380 | * ICP State: Down_CPPR which we handle | ||
| 381 | * in a separate function as it's shared with H_CPPR. | ||
| 382 | */ | ||
| 383 | icp_rm_down_cppr(xics, icp, xirr >> 24); | ||
| 384 | |||
| 385 | /* IPIs have no EOI */ | ||
| 386 | if (irq == XICS_IPI) | ||
| 387 | goto bail; | ||
| 388 | /* | ||
| 389 | * EOI handling: If the interrupt is still asserted, we need to | ||
| 390 | * resend it. We can take a lockless "peek" at the ICS state here. | ||
| 391 | * | ||
| 392 | * "Message" interrupts will never have "asserted" set | ||
| 393 | */ | ||
| 394 | ics = kvmppc_xics_find_ics(xics, irq, &src); | ||
| 395 | if (!ics) | ||
| 396 | goto bail; | ||
| 397 | state = &ics->irq_state[src]; | ||
| 398 | |||
| 399 | /* Still asserted, resend it, we make it look like a reject */ | ||
| 400 | if (state->asserted) { | ||
| 401 | icp->rm_action |= XICS_RM_REJECT; | ||
| 402 | icp->rm_reject = irq; | ||
| 403 | } | ||
| 404 | bail: | ||
| 405 | return check_too_hard(xics, icp); | ||
| 406 | } | ||
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index e33d11f1b977..b02f91e4c70d 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S | |||
| @@ -79,10 +79,6 @@ _GLOBAL(kvmppc_hv_entry_trampoline) | |||
| 79 | * * | 79 | * * |
| 80 | *****************************************************************************/ | 80 | *****************************************************************************/ |
| 81 | 81 | ||
| 82 | #define XICS_XIRR 4 | ||
| 83 | #define XICS_QIRR 0xc | ||
| 84 | #define XICS_IPI 2 /* interrupt source # for IPIs */ | ||
| 85 | |||
| 86 | /* | 82 | /* |
| 87 | * We come in here when wakened from nap mode on a secondary hw thread. | 83 | * We come in here when wakened from nap mode on a secondary hw thread. |
| 88 | * Relocation is off and most register values are lost. | 84 | * Relocation is off and most register values are lost. |
| @@ -101,50 +97,51 @@ kvm_start_guest: | |||
| 101 | li r0,1 | 97 | li r0,1 |
| 102 | stb r0,PACA_NAPSTATELOST(r13) | 98 | stb r0,PACA_NAPSTATELOST(r13) |
| 103 | 99 | ||
| 104 | /* get vcpu pointer, NULL if we have no vcpu to run */ | 100 | /* were we napping due to cede? */ |
| 105 | ld r4,HSTATE_KVM_VCPU(r13) | 101 | lbz r0,HSTATE_NAPPING(r13) |
| 106 | cmpdi cr1,r4,0 | 102 | cmpwi r0,0 |
| 103 | bne kvm_end_cede | ||
| 104 | |||
| 105 | /* | ||
| 106 | * We weren't napping due to cede, so this must be a secondary | ||
| 107 | * thread being woken up to run a guest, or being woken up due | ||
| 108 | * to a stray IPI. (Or due to some machine check or hypervisor | ||
| 109 | * maintenance interrupt while the core is in KVM.) | ||
| 110 | */ | ||
| 107 | 111 | ||
| 108 | /* Check the wake reason in SRR1 to see why we got here */ | 112 | /* Check the wake reason in SRR1 to see why we got here */ |
| 109 | mfspr r3,SPRN_SRR1 | 113 | mfspr r3,SPRN_SRR1 |
| 110 | rlwinm r3,r3,44-31,0x7 /* extract wake reason field */ | 114 | rlwinm r3,r3,44-31,0x7 /* extract wake reason field */ |
| 111 | cmpwi r3,4 /* was it an external interrupt? */ | 115 | cmpwi r3,4 /* was it an external interrupt? */ |
| 112 | bne 27f | 116 | bne 27f /* if not */ |
| 113 | 117 | ld r5,HSTATE_XICS_PHYS(r13) | |
| 114 | /* | 118 | li r7,XICS_XIRR /* if it was an external interrupt, */ |
| 115 | * External interrupt - for now assume it is an IPI, since we | ||
| 116 | * should never get any other interrupts sent to offline threads. | ||
| 117 | * Only do this for secondary threads. | ||
| 118 | */ | ||
| 119 | beq cr1,25f | ||
| 120 | lwz r3,VCPU_PTID(r4) | ||
| 121 | cmpwi r3,0 | ||
| 122 | beq 27f | ||
| 123 | 25: ld r5,HSTATE_XICS_PHYS(r13) | ||
| 124 | li r0,0xff | ||
| 125 | li r6,XICS_QIRR | ||
| 126 | li r7,XICS_XIRR | ||
| 127 | lwzcix r8,r5,r7 /* get and ack the interrupt */ | 119 | lwzcix r8,r5,r7 /* get and ack the interrupt */ |
| 128 | sync | 120 | sync |
| 129 | clrldi. r9,r8,40 /* get interrupt source ID. */ | 121 | clrldi. r9,r8,40 /* get interrupt source ID. */ |
| 130 | beq 27f /* none there? */ | 122 | beq 28f /* none there? */ |
| 131 | cmpwi r9,XICS_IPI | 123 | cmpwi r9,XICS_IPI /* was it an IPI? */ |
| 132 | bne 26f | 124 | bne 29f |
| 125 | li r0,0xff | ||
| 126 | li r6,XICS_MFRR | ||
| 133 | stbcix r0,r5,r6 /* clear IPI */ | 127 | stbcix r0,r5,r6 /* clear IPI */ |
| 134 | 26: stwcix r8,r5,r7 /* EOI the interrupt */ | 128 | stwcix r8,r5,r7 /* EOI the interrupt */ |
| 135 | 129 | sync /* order loading of vcpu after that */ | |
| 136 | 27: /* XXX should handle hypervisor maintenance interrupts etc. here */ | ||
| 137 | 130 | ||
| 138 | /* reload vcpu pointer after clearing the IPI */ | 131 | /* get vcpu pointer, NULL if we have no vcpu to run */ |
| 139 | ld r4,HSTATE_KVM_VCPU(r13) | 132 | ld r4,HSTATE_KVM_VCPU(r13) |
| 140 | cmpdi r4,0 | 133 | cmpdi r4,0 |
| 141 | /* if we have no vcpu to run, go back to sleep */ | 134 | /* if we have no vcpu to run, go back to sleep */ |
| 142 | beq kvm_no_guest | 135 | beq kvm_no_guest |
| 136 | b kvmppc_hv_entry | ||
| 143 | 137 | ||
| 144 | /* were we napping due to cede? */ | 138 | 27: /* XXX should handle hypervisor maintenance interrupts etc. here */ |
| 145 | lbz r0,HSTATE_NAPPING(r13) | 139 | b kvm_no_guest |
| 146 | cmpwi r0,0 | 140 | 28: /* SRR1 said external but ICP said nope?? */ |
| 147 | bne kvm_end_cede | 141 | b kvm_no_guest |
| 142 | 29: /* External non-IPI interrupt to offline secondary thread? help?? */ | ||
| 143 | stw r8,HSTATE_SAVED_XIRR(r13) | ||
| 144 | b kvm_no_guest | ||
| 148 | 145 | ||
| 149 | .global kvmppc_hv_entry | 146 | .global kvmppc_hv_entry |
| 150 | kvmppc_hv_entry: | 147 | kvmppc_hv_entry: |
| @@ -260,6 +257,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) | |||
| 260 | lwz r5, LPPACA_YIELDCOUNT(r3) | 257 | lwz r5, LPPACA_YIELDCOUNT(r3) |
| 261 | addi r5, r5, 1 | 258 | addi r5, r5, 1 |
| 262 | stw r5, LPPACA_YIELDCOUNT(r3) | 259 | stw r5, LPPACA_YIELDCOUNT(r3) |
| 260 | li r6, 1 | ||
| 261 | stb r6, VCPU_VPA_DIRTY(r4) | ||
| 263 | 25: | 262 | 25: |
| 264 | /* Load up DAR and DSISR */ | 263 | /* Load up DAR and DSISR */ |
| 265 | ld r5, VCPU_DAR(r4) | 264 | ld r5, VCPU_DAR(r4) |
| @@ -485,20 +484,20 @@ toc_tlbie_lock: | |||
| 485 | mtctr r6 | 484 | mtctr r6 |
| 486 | mtxer r7 | 485 | mtxer r7 |
| 487 | 486 | ||
| 487 | ld r10, VCPU_PC(r4) | ||
| 488 | ld r11, VCPU_MSR(r4) | ||
| 488 | kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */ | 489 | kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */ |
| 489 | ld r6, VCPU_SRR0(r4) | 490 | ld r6, VCPU_SRR0(r4) |
| 490 | ld r7, VCPU_SRR1(r4) | 491 | ld r7, VCPU_SRR1(r4) |
| 491 | ld r10, VCPU_PC(r4) | ||
| 492 | ld r11, VCPU_MSR(r4) /* r11 = vcpu->arch.msr & ~MSR_HV */ | ||
| 493 | 492 | ||
| 493 | /* r11 = vcpu->arch.msr & ~MSR_HV */ | ||
| 494 | rldicl r11, r11, 63 - MSR_HV_LG, 1 | 494 | rldicl r11, r11, 63 - MSR_HV_LG, 1 |
| 495 | rotldi r11, r11, 1 + MSR_HV_LG | 495 | rotldi r11, r11, 1 + MSR_HV_LG |
| 496 | ori r11, r11, MSR_ME | 496 | ori r11, r11, MSR_ME |
| 497 | 497 | ||
| 498 | /* Check if we can deliver an external or decrementer interrupt now */ | 498 | /* Check if we can deliver an external or decrementer interrupt now */ |
| 499 | ld r0,VCPU_PENDING_EXC(r4) | 499 | ld r0,VCPU_PENDING_EXC(r4) |
| 500 | li r8,(1 << BOOK3S_IRQPRIO_EXTERNAL) | 500 | lis r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h |
| 501 | oris r8,r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h | ||
| 502 | and r0,r0,r8 | 501 | and r0,r0,r8 |
| 503 | cmpdi cr1,r0,0 | 502 | cmpdi cr1,r0,0 |
| 504 | andi. r0,r11,MSR_EE | 503 | andi. r0,r11,MSR_EE |
| @@ -526,10 +525,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) | |||
| 526 | /* Move SRR0 and SRR1 into the respective regs */ | 525 | /* Move SRR0 and SRR1 into the respective regs */ |
| 527 | 5: mtspr SPRN_SRR0, r6 | 526 | 5: mtspr SPRN_SRR0, r6 |
| 528 | mtspr SPRN_SRR1, r7 | 527 | mtspr SPRN_SRR1, r7 |
| 529 | li r0,0 | ||
| 530 | stb r0,VCPU_CEDED(r4) /* cancel cede */ | ||
| 531 | 528 | ||
| 532 | fast_guest_return: | 529 | fast_guest_return: |
| 530 | li r0,0 | ||
| 531 | stb r0,VCPU_CEDED(r4) /* cancel cede */ | ||
| 533 | mtspr SPRN_HSRR0,r10 | 532 | mtspr SPRN_HSRR0,r10 |
| 534 | mtspr SPRN_HSRR1,r11 | 533 | mtspr SPRN_HSRR1,r11 |
| 535 | 534 | ||
| @@ -676,17 +675,99 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) | |||
| 676 | cmpwi r12,BOOK3S_INTERRUPT_SYSCALL | 675 | cmpwi r12,BOOK3S_INTERRUPT_SYSCALL |
| 677 | beq hcall_try_real_mode | 676 | beq hcall_try_real_mode |
| 678 | 677 | ||
| 679 | /* Check for mediated interrupts (could be done earlier really ...) */ | 678 | /* Only handle external interrupts here on arch 206 and later */ |
| 680 | BEGIN_FTR_SECTION | 679 | BEGIN_FTR_SECTION |
| 681 | cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL | 680 | b ext_interrupt_to_host |
| 682 | bne+ 1f | 681 | END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) |
| 683 | andi. r0,r11,MSR_EE | 682 | |
| 684 | beq 1f | 683 | /* External interrupt ? */ |
| 685 | mfspr r5,SPRN_LPCR | 684 | cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL |
| 686 | andi. r0,r5,LPCR_MER | 685 | bne+ ext_interrupt_to_host |
| 687 | bne bounce_ext_interrupt | 686 | |
| 688 | 1: | 687 | /* External interrupt, first check for host_ipi. If this is |
| 689 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) | 688 | * set, we know the host wants us out so let's do it now |
| 689 | */ | ||
| 690 | do_ext_interrupt: | ||
| 691 | lbz r0, HSTATE_HOST_IPI(r13) | ||
| 692 | cmpwi r0, 0 | ||
| 693 | bne ext_interrupt_to_host | ||
| 694 | |||
| 695 | /* Now read the interrupt from the ICP */ | ||
| 696 | ld r5, HSTATE_XICS_PHYS(r13) | ||
| 697 | li r7, XICS_XIRR | ||
| 698 | cmpdi r5, 0 | ||
| 699 | beq- ext_interrupt_to_host | ||
| 700 | lwzcix r3, r5, r7 | ||
| 701 | rlwinm. r0, r3, 0, 0xffffff | ||
| 702 | sync | ||
| 703 | beq 3f /* if nothing pending in the ICP */ | ||
| 704 | |||
| 705 | /* We found something in the ICP... | ||
| 706 | * | ||
| 707 | * If it's not an IPI, stash it in the PACA and return to | ||
| 708 | * the host, we don't (yet) handle directing real external | ||
| 709 | * interrupts directly to the guest | ||
| 710 | */ | ||
| 711 | cmpwi r0, XICS_IPI | ||
| 712 | bne ext_stash_for_host | ||
| 713 | |||
| 714 | /* It's an IPI, clear the MFRR and EOI it */ | ||
| 715 | li r0, 0xff | ||
| 716 | li r6, XICS_MFRR | ||
| 717 | stbcix r0, r5, r6 /* clear the IPI */ | ||
| 718 | stwcix r3, r5, r7 /* EOI it */ | ||
| 719 | sync | ||
| 720 | |||
| 721 | /* We need to re-check host IPI now in case it got set in the | ||
| 722 | * meantime. If it's clear, we bounce the interrupt to the | ||
| 723 | * guest | ||
| 724 | */ | ||
| 725 | lbz r0, HSTATE_HOST_IPI(r13) | ||
| 726 | cmpwi r0, 0 | ||
| 727 | bne- 1f | ||
| 728 | |||
| 729 | /* Allright, looks like an IPI for the guest, we need to set MER */ | ||
| 730 | 3: | ||
| 731 | /* Check if any CPU is heading out to the host, if so head out too */ | ||
| 732 | ld r5, HSTATE_KVM_VCORE(r13) | ||
| 733 | lwz r0, VCORE_ENTRY_EXIT(r5) | ||
| 734 | cmpwi r0, 0x100 | ||
| 735 | bge ext_interrupt_to_host | ||
| 736 | |||
| 737 | /* See if there is a pending interrupt for the guest */ | ||
| 738 | mfspr r8, SPRN_LPCR | ||
| 739 | ld r0, VCPU_PENDING_EXC(r9) | ||
| 740 | /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */ | ||
| 741 | rldicl. r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63 | ||
| 742 | rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH | ||
| 743 | beq 2f | ||
| 744 | |||
| 745 | /* And if the guest EE is set, we can deliver immediately, else | ||
| 746 | * we return to the guest with MER set | ||
| 747 | */ | ||
| 748 | andi. r0, r11, MSR_EE | ||
| 749 | beq 2f | ||
| 750 | mtspr SPRN_SRR0, r10 | ||
| 751 | mtspr SPRN_SRR1, r11 | ||
| 752 | li r10, BOOK3S_INTERRUPT_EXTERNAL | ||
| 753 | li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ | ||
| 754 | rotldi r11, r11, 63 | ||
| 755 | 2: mr r4, r9 | ||
| 756 | mtspr SPRN_LPCR, r8 | ||
| 757 | b fast_guest_return | ||
| 758 | |||
| 759 | /* We raced with the host, we need to resend that IPI, bummer */ | ||
| 760 | 1: li r0, IPI_PRIORITY | ||
| 761 | stbcix r0, r5, r6 /* set the IPI */ | ||
| 762 | sync | ||
| 763 | b ext_interrupt_to_host | ||
| 764 | |||
| 765 | ext_stash_for_host: | ||
| 766 | /* It's not an IPI and it's for the host, stash it in the PACA | ||
| 767 | * before exit, it will be picked up by the host ICP driver | ||
| 768 | */ | ||
| 769 | stw r3, HSTATE_SAVED_XIRR(r13) | ||
| 770 | ext_interrupt_to_host: | ||
| 690 | 771 | ||
| 691 | guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ | 772 | guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ |
| 692 | /* Save DEC */ | 773 | /* Save DEC */ |
| @@ -829,7 +910,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) | |||
| 829 | beq 44f | 910 | beq 44f |
| 830 | ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */ | 911 | ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */ |
| 831 | li r0,IPI_PRIORITY | 912 | li r0,IPI_PRIORITY |
| 832 | li r7,XICS_QIRR | 913 | li r7,XICS_MFRR |
| 833 | stbcix r0,r7,r8 /* trigger the IPI */ | 914 | stbcix r0,r7,r8 /* trigger the IPI */ |
| 834 | 44: srdi. r3,r3,1 | 915 | 44: srdi. r3,r3,1 |
| 835 | addi r6,r6,PACA_SIZE | 916 | addi r6,r6,PACA_SIZE |
| @@ -1018,6 +1099,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) | |||
| 1018 | lwz r3, LPPACA_YIELDCOUNT(r8) | 1099 | lwz r3, LPPACA_YIELDCOUNT(r8) |
| 1019 | addi r3, r3, 1 | 1100 | addi r3, r3, 1 |
| 1020 | stw r3, LPPACA_YIELDCOUNT(r8) | 1101 | stw r3, LPPACA_YIELDCOUNT(r8) |
| 1102 | li r3, 1 | ||
| 1103 | stb r3, VCPU_VPA_DIRTY(r9) | ||
| 1021 | 25: | 1104 | 25: |
| 1022 | /* Save PMU registers if requested */ | 1105 | /* Save PMU registers if requested */ |
| 1023 | /* r8 and cr0.eq are live here */ | 1106 | /* r8 and cr0.eq are live here */ |
| @@ -1350,11 +1433,19 @@ hcall_real_table: | |||
| 1350 | .long 0 /* 0x58 */ | 1433 | .long 0 /* 0x58 */ |
| 1351 | .long 0 /* 0x5c */ | 1434 | .long 0 /* 0x5c */ |
| 1352 | .long 0 /* 0x60 */ | 1435 | .long 0 /* 0x60 */ |
| 1353 | .long 0 /* 0x64 */ | 1436 | #ifdef CONFIG_KVM_XICS |
| 1354 | .long 0 /* 0x68 */ | 1437 | .long .kvmppc_rm_h_eoi - hcall_real_table |
| 1355 | .long 0 /* 0x6c */ | 1438 | .long .kvmppc_rm_h_cppr - hcall_real_table |
| 1356 | .long 0 /* 0x70 */ | 1439 | .long .kvmppc_rm_h_ipi - hcall_real_table |
| 1357 | .long 0 /* 0x74 */ | 1440 | .long 0 /* 0x70 - H_IPOLL */ |
| 1441 | .long .kvmppc_rm_h_xirr - hcall_real_table | ||
| 1442 | #else | ||
| 1443 | .long 0 /* 0x64 - H_EOI */ | ||
| 1444 | .long 0 /* 0x68 - H_CPPR */ | ||
| 1445 | .long 0 /* 0x6c - H_IPI */ | ||
| 1446 | .long 0 /* 0x70 - H_IPOLL */ | ||
| 1447 | .long 0 /* 0x74 - H_XIRR */ | ||
| 1448 | #endif | ||
| 1358 | .long 0 /* 0x78 */ | 1449 | .long 0 /* 0x78 */ |
| 1359 | .long 0 /* 0x7c */ | 1450 | .long 0 /* 0x7c */ |
| 1360 | .long 0 /* 0x80 */ | 1451 | .long 0 /* 0x80 */ |
| @@ -1405,15 +1496,6 @@ ignore_hdec: | |||
| 1405 | mr r4,r9 | 1496 | mr r4,r9 |
| 1406 | b fast_guest_return | 1497 | b fast_guest_return |
| 1407 | 1498 | ||
| 1408 | bounce_ext_interrupt: | ||
| 1409 | mr r4,r9 | ||
| 1410 | mtspr SPRN_SRR0,r10 | ||
| 1411 | mtspr SPRN_SRR1,r11 | ||
| 1412 | li r10,BOOK3S_INTERRUPT_EXTERNAL | ||
| 1413 | li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ | ||
| 1414 | rotldi r11,r11,63 | ||
| 1415 | b fast_guest_return | ||
| 1416 | |||
| 1417 | _GLOBAL(kvmppc_h_set_dabr) | 1499 | _GLOBAL(kvmppc_h_set_dabr) |
| 1418 | std r4,VCPU_DABR(r3) | 1500 | std r4,VCPU_DABR(r3) |
| 1419 | /* Work around P7 bug where DABR can get corrupted on mtspr */ | 1501 | /* Work around P7 bug where DABR can get corrupted on mtspr */ |
| @@ -1519,6 +1601,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) | |||
| 1519 | b . | 1601 | b . |
| 1520 | 1602 | ||
| 1521 | kvm_end_cede: | 1603 | kvm_end_cede: |
| 1604 | /* get vcpu pointer */ | ||
| 1605 | ld r4, HSTATE_KVM_VCPU(r13) | ||
| 1606 | |||
| 1522 | /* Woken by external or decrementer interrupt */ | 1607 | /* Woken by external or decrementer interrupt */ |
| 1523 | ld r1, HSTATE_HOST_R1(r13) | 1608 | ld r1, HSTATE_HOST_R1(r13) |
| 1524 | 1609 | ||
| @@ -1558,6 +1643,16 @@ kvm_end_cede: | |||
| 1558 | li r0,0 | 1643 | li r0,0 |
| 1559 | stb r0,HSTATE_NAPPING(r13) | 1644 | stb r0,HSTATE_NAPPING(r13) |
| 1560 | 1645 | ||
| 1646 | /* Check the wake reason in SRR1 to see why we got here */ | ||
| 1647 | mfspr r3, SPRN_SRR1 | ||
| 1648 | rlwinm r3, r3, 44-31, 0x7 /* extract wake reason field */ | ||
| 1649 | cmpwi r3, 4 /* was it an external interrupt? */ | ||
| 1650 | li r12, BOOK3S_INTERRUPT_EXTERNAL | ||
| 1651 | mr r9, r4 | ||
| 1652 | ld r10, VCPU_PC(r9) | ||
| 1653 | ld r11, VCPU_MSR(r9) | ||
| 1654 | beq do_ext_interrupt /* if so */ | ||
| 1655 | |||
| 1561 | /* see if any other thread is already exiting */ | 1656 | /* see if any other thread is already exiting */ |
| 1562 | lwz r0,VCORE_ENTRY_EXIT(r5) | 1657 | lwz r0,VCORE_ENTRY_EXIT(r5) |
| 1563 | cmpwi r0,0x100 | 1658 | cmpwi r0,0x100 |
| @@ -1577,8 +1672,7 @@ kvm_cede_prodded: | |||
| 1577 | 1672 | ||
| 1578 | /* we've ceded but we want to give control to the host */ | 1673 | /* we've ceded but we want to give control to the host */ |
| 1579 | kvm_cede_exit: | 1674 | kvm_cede_exit: |
| 1580 | li r3,H_TOO_HARD | 1675 | b hcall_real_fallback |
| 1581 | blr | ||
| 1582 | 1676 | ||
| 1583 | /* Try to handle a machine check in real mode */ | 1677 | /* Try to handle a machine check in real mode */ |
| 1584 | machine_check_realmode: | 1678 | machine_check_realmode: |
| @@ -1626,7 +1720,7 @@ secondary_nap: | |||
| 1626 | beq 37f | 1720 | beq 37f |
| 1627 | sync | 1721 | sync |
| 1628 | li r0, 0xff | 1722 | li r0, 0xff |
| 1629 | li r6, XICS_QIRR | 1723 | li r6, XICS_MFRR |
| 1630 | stbcix r0, r5, r6 /* clear the IPI */ | 1724 | stbcix r0, r5, r6 /* clear the IPI */ |
| 1631 | stwcix r3, r5, r7 /* EOI it */ | 1725 | stwcix r3, r5, r7 /* EOI it */ |
| 1632 | 37: sync | 1726 | 37: sync |
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index dbdc15aa8127..bdc40b8e77d9 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c | |||
| @@ -762,9 +762,7 @@ program_interrupt: | |||
| 762 | run->exit_reason = KVM_EXIT_MMIO; | 762 | run->exit_reason = KVM_EXIT_MMIO; |
| 763 | r = RESUME_HOST_NV; | 763 | r = RESUME_HOST_NV; |
| 764 | break; | 764 | break; |
| 765 | case EMULATE_DO_PAPR: | 765 | case EMULATE_EXIT_USER: |
| 766 | run->exit_reason = KVM_EXIT_PAPR_HCALL; | ||
| 767 | vcpu->arch.hcall_needed = 1; | ||
| 768 | r = RESUME_HOST_NV; | 766 | r = RESUME_HOST_NV; |
| 769 | break; | 767 | break; |
| 770 | default: | 768 | default: |
| @@ -1283,7 +1281,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, | |||
| 1283 | 1281 | ||
| 1284 | void kvmppc_core_commit_memory_region(struct kvm *kvm, | 1282 | void kvmppc_core_commit_memory_region(struct kvm *kvm, |
| 1285 | struct kvm_userspace_memory_region *mem, | 1283 | struct kvm_userspace_memory_region *mem, |
| 1286 | struct kvm_memory_slot old) | 1284 | const struct kvm_memory_slot *old) |
| 1287 | { | 1285 | { |
| 1288 | } | 1286 | } |
| 1289 | 1287 | ||
| @@ -1298,6 +1296,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) | |||
| 1298 | { | 1296 | { |
| 1299 | #ifdef CONFIG_PPC64 | 1297 | #ifdef CONFIG_PPC64 |
| 1300 | INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); | 1298 | INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); |
| 1299 | INIT_LIST_HEAD(&kvm->arch.rtas_tokens); | ||
| 1301 | #endif | 1300 | #endif |
| 1302 | 1301 | ||
| 1303 | if (firmware_has_feature(FW_FEATURE_SET_MODE)) { | 1302 | if (firmware_has_feature(FW_FEATURE_SET_MODE)) { |
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index ee02b30878ed..b24309c6c2d5 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c | |||
| @@ -227,6 +227,13 @@ static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu) | |||
| 227 | return EMULATE_DONE; | 227 | return EMULATE_DONE; |
| 228 | } | 228 | } |
| 229 | 229 | ||
| 230 | static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) | ||
| 231 | { | ||
| 232 | long rc = kvmppc_xics_hcall(vcpu, cmd); | ||
| 233 | kvmppc_set_gpr(vcpu, 3, rc); | ||
| 234 | return EMULATE_DONE; | ||
| 235 | } | ||
| 236 | |||
| 230 | int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) | 237 | int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) |
| 231 | { | 238 | { |
| 232 | switch (cmd) { | 239 | switch (cmd) { |
| @@ -246,6 +253,20 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) | |||
| 246 | clear_bit(KVM_REQ_UNHALT, &vcpu->requests); | 253 | clear_bit(KVM_REQ_UNHALT, &vcpu->requests); |
| 247 | vcpu->stat.halt_wakeup++; | 254 | vcpu->stat.halt_wakeup++; |
| 248 | return EMULATE_DONE; | 255 | return EMULATE_DONE; |
| 256 | case H_XIRR: | ||
| 257 | case H_CPPR: | ||
| 258 | case H_EOI: | ||
| 259 | case H_IPI: | ||
| 260 | if (kvmppc_xics_enabled(vcpu)) | ||
| 261 | return kvmppc_h_pr_xics_hcall(vcpu, cmd); | ||
| 262 | break; | ||
| 263 | case H_RTAS: | ||
| 264 | if (list_empty(&vcpu->kvm->arch.rtas_tokens)) | ||
| 265 | return RESUME_HOST; | ||
| 266 | if (kvmppc_rtas_hcall(vcpu)) | ||
| 267 | break; | ||
| 268 | kvmppc_set_gpr(vcpu, 3, 0); | ||
| 269 | return EMULATE_DONE; | ||
| 249 | } | 270 | } |
| 250 | 271 | ||
| 251 | return EMULATE_FAIL; | 272 | return EMULATE_FAIL; |
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c new file mode 100644 index 000000000000..3219ba895246 --- /dev/null +++ b/arch/powerpc/kvm/book3s_rtas.c | |||
| @@ -0,0 +1,274 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2012 Michael Ellerman, IBM Corporation. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License, version 2, as | ||
| 6 | * published by the Free Software Foundation. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include <linux/kernel.h> | ||
| 10 | #include <linux/kvm_host.h> | ||
| 11 | #include <linux/kvm.h> | ||
| 12 | #include <linux/err.h> | ||
| 13 | |||
| 14 | #include <asm/uaccess.h> | ||
| 15 | #include <asm/kvm_book3s.h> | ||
| 16 | #include <asm/kvm_ppc.h> | ||
| 17 | #include <asm/hvcall.h> | ||
| 18 | #include <asm/rtas.h> | ||
| 19 | |||
| 20 | #ifdef CONFIG_KVM_XICS | ||
| 21 | static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) | ||
| 22 | { | ||
| 23 | u32 irq, server, priority; | ||
| 24 | int rc; | ||
| 25 | |||
| 26 | if (args->nargs != 3 || args->nret != 1) { | ||
| 27 | rc = -3; | ||
| 28 | goto out; | ||
| 29 | } | ||
| 30 | |||
| 31 | irq = args->args[0]; | ||
| 32 | server = args->args[1]; | ||
| 33 | priority = args->args[2]; | ||
| 34 | |||
| 35 | rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority); | ||
| 36 | if (rc) | ||
| 37 | rc = -3; | ||
| 38 | out: | ||
| 39 | args->rets[0] = rc; | ||
| 40 | } | ||
| 41 | |||
| 42 | static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) | ||
| 43 | { | ||
| 44 | u32 irq, server, priority; | ||
| 45 | int rc; | ||
| 46 | |||
| 47 | if (args->nargs != 1 || args->nret != 3) { | ||
| 48 | rc = -3; | ||
| 49 | goto out; | ||
| 50 | } | ||
| 51 | |||
| 52 | irq = args->args[0]; | ||
| 53 | |||
| 54 | server = priority = 0; | ||
| 55 | rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority); | ||
| 56 | if (rc) { | ||
| 57 | rc = -3; | ||
| 58 | goto out; | ||
| 59 | } | ||
| 60 | |||
| 61 | args->rets[1] = server; | ||
| 62 | args->rets[2] = priority; | ||
| 63 | out: | ||
| 64 | args->rets[0] = rc; | ||
| 65 | } | ||
| 66 | |||
| 67 | static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args) | ||
| 68 | { | ||
| 69 | u32 irq; | ||
| 70 | int rc; | ||
| 71 | |||
| 72 | if (args->nargs != 1 || args->nret != 1) { | ||
| 73 | rc = -3; | ||
| 74 | goto out; | ||
| 75 | } | ||
| 76 | |||
| 77 | irq = args->args[0]; | ||
| 78 | |||
| 79 | rc = kvmppc_xics_int_off(vcpu->kvm, irq); | ||
| 80 | if (rc) | ||
| 81 | rc = -3; | ||
| 82 | out: | ||
| 83 | args->rets[0] = rc; | ||
| 84 | } | ||
| 85 | |||
| 86 | static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args) | ||
| 87 | { | ||
| 88 | u32 irq; | ||
| 89 | int rc; | ||
| 90 | |||
| 91 | if (args->nargs != 1 || args->nret != 1) { | ||
| 92 | rc = -3; | ||
| 93 | goto out; | ||
| 94 | } | ||
| 95 | |||
| 96 | irq = args->args[0]; | ||
| 97 | |||
| 98 | rc = kvmppc_xics_int_on(vcpu->kvm, irq); | ||
| 99 | if (rc) | ||
| 100 | rc = -3; | ||
| 101 | out: | ||
| 102 | args->rets[0] = rc; | ||
| 103 | } | ||
| 104 | #endif /* CONFIG_KVM_XICS */ | ||
| 105 | |||
| 106 | struct rtas_handler { | ||
| 107 | void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args); | ||
| 108 | char *name; | ||
| 109 | }; | ||
| 110 | |||
| 111 | static struct rtas_handler rtas_handlers[] = { | ||
| 112 | #ifdef CONFIG_KVM_XICS | ||
| 113 | { .name = "ibm,set-xive", .handler = kvm_rtas_set_xive }, | ||
| 114 | { .name = "ibm,get-xive", .handler = kvm_rtas_get_xive }, | ||
| 115 | { .name = "ibm,int-off", .handler = kvm_rtas_int_off }, | ||
| 116 | { .name = "ibm,int-on", .handler = kvm_rtas_int_on }, | ||
| 117 | #endif | ||
| 118 | }; | ||
| 119 | |||
| 120 | struct rtas_token_definition { | ||
| 121 | struct list_head list; | ||
| 122 | struct rtas_handler *handler; | ||
| 123 | u64 token; | ||
| 124 | }; | ||
| 125 | |||
| 126 | static int rtas_name_matches(char *s1, char *s2) | ||
| 127 | { | ||
| 128 | struct kvm_rtas_token_args args; | ||
| 129 | return !strncmp(s1, s2, sizeof(args.name)); | ||
| 130 | } | ||
| 131 | |||
| 132 | static int rtas_token_undefine(struct kvm *kvm, char *name) | ||
| 133 | { | ||
| 134 | struct rtas_token_definition *d, *tmp; | ||
| 135 | |||
| 136 | lockdep_assert_held(&kvm->lock); | ||
| 137 | |||
| 138 | list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) { | ||
| 139 | if (rtas_name_matches(d->handler->name, name)) { | ||
| 140 | list_del(&d->list); | ||
| 141 | kfree(d); | ||
| 142 | return 0; | ||
| 143 | } | ||
| 144 | } | ||
| 145 | |||
| 146 | /* It's not an error to undefine an undefined token */ | ||
| 147 | return 0; | ||
| 148 | } | ||
| 149 | |||
| 150 | static int rtas_token_define(struct kvm *kvm, char *name, u64 token) | ||
| 151 | { | ||
| 152 | struct rtas_token_definition *d; | ||
| 153 | struct rtas_handler *h = NULL; | ||
| 154 | bool found; | ||
| 155 | int i; | ||
| 156 | |||
| 157 | lockdep_assert_held(&kvm->lock); | ||
| 158 | |||
| 159 | list_for_each_entry(d, &kvm->arch.rtas_tokens, list) { | ||
| 160 | if (d->token == token) | ||
| 161 | return -EEXIST; | ||
| 162 | } | ||
| 163 | |||
| 164 | found = false; | ||
| 165 | for (i = 0; i < ARRAY_SIZE(rtas_handlers); i++) { | ||
| 166 | h = &rtas_handlers[i]; | ||
| 167 | if (rtas_name_matches(h->name, name)) { | ||
| 168 | found = true; | ||
| 169 | break; | ||
| 170 | } | ||
| 171 | } | ||
| 172 | |||
| 173 | if (!found) | ||
| 174 | return -ENOENT; | ||
| 175 | |||
| 176 | d = kzalloc(sizeof(*d), GFP_KERNEL); | ||
| 177 | if (!d) | ||
| 178 | return -ENOMEM; | ||
| 179 | |||
| 180 | d->handler = h; | ||
| 181 | d->token = token; | ||
| 182 | |||
| 183 | list_add_tail(&d->list, &kvm->arch.rtas_tokens); | ||
| 184 | |||
| 185 | return 0; | ||
| 186 | } | ||
| 187 | |||
| 188 | int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp) | ||
| 189 | { | ||
| 190 | struct kvm_rtas_token_args args; | ||
| 191 | int rc; | ||
| 192 | |||
| 193 | if (copy_from_user(&args, argp, sizeof(args))) | ||
| 194 | return -EFAULT; | ||
| 195 | |||
| 196 | mutex_lock(&kvm->lock); | ||
| 197 | |||
| 198 | if (args.token) | ||
| 199 | rc = rtas_token_define(kvm, args.name, args.token); | ||
| 200 | else | ||
| 201 | rc = rtas_token_undefine(kvm, args.name); | ||
| 202 | |||
| 203 | mutex_unlock(&kvm->lock); | ||
| 204 | |||
| 205 | return rc; | ||
| 206 | } | ||
| 207 | |||
| 208 | int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) | ||
| 209 | { | ||
| 210 | struct rtas_token_definition *d; | ||
| 211 | struct rtas_args args; | ||
| 212 | rtas_arg_t *orig_rets; | ||
| 213 | gpa_t args_phys; | ||
| 214 | int rc; | ||
| 215 | |||
| 216 | /* r4 contains the guest physical address of the RTAS args */ | ||
| 217 | args_phys = kvmppc_get_gpr(vcpu, 4); | ||
| 218 | |||
| 219 | rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args)); | ||
| 220 | if (rc) | ||
| 221 | goto fail; | ||
| 222 | |||
| 223 | /* | ||
| 224 | * args->rets is a pointer into args->args. Now that we've | ||
| 225 | * copied args we need to fix it up to point into our copy, | ||
| 226 | * not the guest args. We also need to save the original | ||
| 227 | * value so we can restore it on the way out. | ||
| 228 | */ | ||
| 229 | orig_rets = args.rets; | ||
| 230 | args.rets = &args.args[args.nargs]; | ||
| 231 | |||
| 232 | mutex_lock(&vcpu->kvm->lock); | ||
| 233 | |||
| 234 | rc = -ENOENT; | ||
| 235 | list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) { | ||
| 236 | if (d->token == args.token) { | ||
| 237 | d->handler->handler(vcpu, &args); | ||
| 238 | rc = 0; | ||
| 239 | break; | ||
| 240 | } | ||
| 241 | } | ||
| 242 | |||
| 243 | mutex_unlock(&vcpu->kvm->lock); | ||
| 244 | |||
| 245 | if (rc == 0) { | ||
| 246 | args.rets = orig_rets; | ||
| 247 | rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args)); | ||
| 248 | if (rc) | ||
| 249 | goto fail; | ||
| 250 | } | ||
| 251 | |||
| 252 | return rc; | ||
| 253 | |||
| 254 | fail: | ||
| 255 | /* | ||
| 256 | * We only get here if the guest has called RTAS with a bogus | ||
| 257 | * args pointer. That means we can't get to the args, and so we | ||
| 258 | * can't fail the RTAS call. So fail right out to userspace, | ||
| 259 | * which should kill the guest. | ||
| 260 | */ | ||
| 261 | return rc; | ||
| 262 | } | ||
| 263 | |||
| 264 | void kvmppc_rtas_tokens_free(struct kvm *kvm) | ||
| 265 | { | ||
| 266 | struct rtas_token_definition *d, *tmp; | ||
| 267 | |||
| 268 | lockdep_assert_held(&kvm->lock); | ||
| 269 | |||
| 270 | list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) { | ||
| 271 | list_del(&d->list); | ||
| 272 | kfree(d); | ||
| 273 | } | ||
| 274 | } | ||
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c new file mode 100644 index 000000000000..f7a103756618 --- /dev/null +++ b/arch/powerpc/kvm/book3s_xics.c | |||
| @@ -0,0 +1,1270 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2012 Michael Ellerman, IBM Corporation. | ||
| 3 | * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify | ||
| 6 | * it under the terms of the GNU General Public License, version 2, as | ||
| 7 | * published by the Free Software Foundation. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/kernel.h> | ||
| 11 | #include <linux/kvm_host.h> | ||
| 12 | #include <linux/err.h> | ||
| 13 | #include <linux/gfp.h> | ||
| 14 | #include <linux/anon_inodes.h> | ||
| 15 | |||
| 16 | #include <asm/uaccess.h> | ||
| 17 | #include <asm/kvm_book3s.h> | ||
| 18 | #include <asm/kvm_ppc.h> | ||
| 19 | #include <asm/hvcall.h> | ||
| 20 | #include <asm/xics.h> | ||
| 21 | #include <asm/debug.h> | ||
| 22 | |||
| 23 | #include <linux/debugfs.h> | ||
| 24 | #include <linux/seq_file.h> | ||
| 25 | |||
| 26 | #include "book3s_xics.h" | ||
| 27 | |||
| 28 | #if 1 | ||
| 29 | #define XICS_DBG(fmt...) do { } while (0) | ||
| 30 | #else | ||
| 31 | #define XICS_DBG(fmt...) trace_printk(fmt) | ||
| 32 | #endif | ||
| 33 | |||
| 34 | #define ENABLE_REALMODE true | ||
| 35 | #define DEBUG_REALMODE false | ||
| 36 | |||
| 37 | /* | ||
| 38 | * LOCKING | ||
| 39 | * ======= | ||
| 40 | * | ||
| 41 | * Each ICS has a mutex protecting the information about the IRQ | ||
| 42 | * sources and avoiding simultaneous deliveries if the same interrupt. | ||
| 43 | * | ||
| 44 | * ICP operations are done via a single compare & swap transaction | ||
| 45 | * (most ICP state fits in the union kvmppc_icp_state) | ||
| 46 | */ | ||
| 47 | |||
| 48 | /* | ||
| 49 | * TODO | ||
| 50 | * ==== | ||
| 51 | * | ||
| 52 | * - To speed up resends, keep a bitmap of "resend" set bits in the | ||
| 53 | * ICS | ||
| 54 | * | ||
| 55 | * - Speed up server# -> ICP lookup (array ? hash table ?) | ||
| 56 | * | ||
| 57 | * - Make ICS lockless as well, or at least a per-interrupt lock or hashed | ||
| 58 | * locks array to improve scalability | ||
| 59 | */ | ||
| 60 | |||
| 61 | /* -- ICS routines -- */ | ||
| 62 | |||
| 63 | static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, | ||
| 64 | u32 new_irq); | ||
| 65 | |||
| 66 | static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level, | ||
| 67 | bool report_status) | ||
| 68 | { | ||
| 69 | struct ics_irq_state *state; | ||
| 70 | struct kvmppc_ics *ics; | ||
| 71 | u16 src; | ||
| 72 | |||
| 73 | XICS_DBG("ics deliver %#x (level: %d)\n", irq, level); | ||
| 74 | |||
| 75 | ics = kvmppc_xics_find_ics(xics, irq, &src); | ||
| 76 | if (!ics) { | ||
| 77 | XICS_DBG("ics_deliver_irq: IRQ 0x%06x not found !\n", irq); | ||
| 78 | return -EINVAL; | ||
| 79 | } | ||
| 80 | state = &ics->irq_state[src]; | ||
| 81 | if (!state->exists) | ||
| 82 | return -EINVAL; | ||
| 83 | |||
| 84 | if (report_status) | ||
| 85 | return state->asserted; | ||
| 86 | |||
| 87 | /* | ||
| 88 | * We set state->asserted locklessly. This should be fine as | ||
| 89 | * we are the only setter, thus concurrent access is undefined | ||
| 90 | * to begin with. | ||
| 91 | */ | ||
| 92 | if (level == KVM_INTERRUPT_SET_LEVEL) | ||
| 93 | state->asserted = 1; | ||
| 94 | else if (level == KVM_INTERRUPT_UNSET) { | ||
| 95 | state->asserted = 0; | ||
| 96 | return 0; | ||
| 97 | } | ||
| 98 | |||
| 99 | /* Attempt delivery */ | ||
| 100 | icp_deliver_irq(xics, NULL, irq); | ||
| 101 | |||
| 102 | return state->asserted; | ||
| 103 | } | ||
| 104 | |||
| 105 | static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics, | ||
| 106 | struct kvmppc_icp *icp) | ||
| 107 | { | ||
| 108 | int i; | ||
| 109 | |||
| 110 | mutex_lock(&ics->lock); | ||
| 111 | |||
| 112 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { | ||
| 113 | struct ics_irq_state *state = &ics->irq_state[i]; | ||
| 114 | |||
| 115 | if (!state->resend) | ||
| 116 | continue; | ||
| 117 | |||
| 118 | XICS_DBG("resend %#x prio %#x\n", state->number, | ||
| 119 | state->priority); | ||
| 120 | |||
| 121 | mutex_unlock(&ics->lock); | ||
| 122 | icp_deliver_irq(xics, icp, state->number); | ||
| 123 | mutex_lock(&ics->lock); | ||
| 124 | } | ||
| 125 | |||
| 126 | mutex_unlock(&ics->lock); | ||
| 127 | } | ||
| 128 | |||
| 129 | static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics, | ||
| 130 | struct ics_irq_state *state, | ||
| 131 | u32 server, u32 priority, u32 saved_priority) | ||
| 132 | { | ||
| 133 | bool deliver; | ||
| 134 | |||
| 135 | mutex_lock(&ics->lock); | ||
| 136 | |||
| 137 | state->server = server; | ||
| 138 | state->priority = priority; | ||
| 139 | state->saved_priority = saved_priority; | ||
| 140 | deliver = false; | ||
| 141 | if ((state->masked_pending || state->resend) && priority != MASKED) { | ||
| 142 | state->masked_pending = 0; | ||
| 143 | deliver = true; | ||
| 144 | } | ||
| 145 | |||
| 146 | mutex_unlock(&ics->lock); | ||
| 147 | |||
| 148 | return deliver; | ||
| 149 | } | ||
| 150 | |||
| 151 | int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority) | ||
| 152 | { | ||
| 153 | struct kvmppc_xics *xics = kvm->arch.xics; | ||
| 154 | struct kvmppc_icp *icp; | ||
| 155 | struct kvmppc_ics *ics; | ||
| 156 | struct ics_irq_state *state; | ||
| 157 | u16 src; | ||
| 158 | |||
| 159 | if (!xics) | ||
| 160 | return -ENODEV; | ||
| 161 | |||
| 162 | ics = kvmppc_xics_find_ics(xics, irq, &src); | ||
| 163 | if (!ics) | ||
| 164 | return -EINVAL; | ||
| 165 | state = &ics->irq_state[src]; | ||
| 166 | |||
| 167 | icp = kvmppc_xics_find_server(kvm, server); | ||
| 168 | if (!icp) | ||
| 169 | return -EINVAL; | ||
| 170 | |||
| 171 | XICS_DBG("set_xive %#x server %#x prio %#x MP:%d RS:%d\n", | ||
| 172 | irq, server, priority, | ||
| 173 | state->masked_pending, state->resend); | ||
| 174 | |||
| 175 | if (write_xive(xics, ics, state, server, priority, priority)) | ||
| 176 | icp_deliver_irq(xics, icp, irq); | ||
| 177 | |||
| 178 | return 0; | ||
| 179 | } | ||
| 180 | |||
| 181 | int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority) | ||
| 182 | { | ||
| 183 | struct kvmppc_xics *xics = kvm->arch.xics; | ||
| 184 | struct kvmppc_ics *ics; | ||
| 185 | struct ics_irq_state *state; | ||
| 186 | u16 src; | ||
| 187 | |||
| 188 | if (!xics) | ||
| 189 | return -ENODEV; | ||
| 190 | |||
| 191 | ics = kvmppc_xics_find_ics(xics, irq, &src); | ||
| 192 | if (!ics) | ||
| 193 | return -EINVAL; | ||
| 194 | state = &ics->irq_state[src]; | ||
| 195 | |||
| 196 | mutex_lock(&ics->lock); | ||
| 197 | *server = state->server; | ||
| 198 | *priority = state->priority; | ||
| 199 | mutex_unlock(&ics->lock); | ||
| 200 | |||
| 201 | return 0; | ||
| 202 | } | ||
| 203 | |||
| 204 | int kvmppc_xics_int_on(struct kvm *kvm, u32 irq) | ||
| 205 | { | ||
| 206 | struct kvmppc_xics *xics = kvm->arch.xics; | ||
| 207 | struct kvmppc_icp *icp; | ||
| 208 | struct kvmppc_ics *ics; | ||
| 209 | struct ics_irq_state *state; | ||
| 210 | u16 src; | ||
| 211 | |||
| 212 | if (!xics) | ||
| 213 | return -ENODEV; | ||
| 214 | |||
| 215 | ics = kvmppc_xics_find_ics(xics, irq, &src); | ||
| 216 | if (!ics) | ||
| 217 | return -EINVAL; | ||
| 218 | state = &ics->irq_state[src]; | ||
| 219 | |||
| 220 | icp = kvmppc_xics_find_server(kvm, state->server); | ||
| 221 | if (!icp) | ||
| 222 | return -EINVAL; | ||
| 223 | |||
| 224 | if (write_xive(xics, ics, state, state->server, state->saved_priority, | ||
| 225 | state->saved_priority)) | ||
| 226 | icp_deliver_irq(xics, icp, irq); | ||
| 227 | |||
| 228 | return 0; | ||
| 229 | } | ||
| 230 | |||
| 231 | int kvmppc_xics_int_off(struct kvm *kvm, u32 irq) | ||
| 232 | { | ||
| 233 | struct kvmppc_xics *xics = kvm->arch.xics; | ||
| 234 | struct kvmppc_ics *ics; | ||
| 235 | struct ics_irq_state *state; | ||
| 236 | u16 src; | ||
| 237 | |||
| 238 | if (!xics) | ||
| 239 | return -ENODEV; | ||
| 240 | |||
| 241 | ics = kvmppc_xics_find_ics(xics, irq, &src); | ||
| 242 | if (!ics) | ||
| 243 | return -EINVAL; | ||
| 244 | state = &ics->irq_state[src]; | ||
| 245 | |||
| 246 | write_xive(xics, ics, state, state->server, MASKED, state->priority); | ||
| 247 | |||
| 248 | return 0; | ||
| 249 | } | ||
| 250 | |||
| 251 | /* -- ICP routines, including hcalls -- */ | ||
| 252 | |||
| 253 | static inline bool icp_try_update(struct kvmppc_icp *icp, | ||
| 254 | union kvmppc_icp_state old, | ||
| 255 | union kvmppc_icp_state new, | ||
| 256 | bool change_self) | ||
| 257 | { | ||
| 258 | bool success; | ||
| 259 | |||
| 260 | /* Calculate new output value */ | ||
| 261 | new.out_ee = (new.xisr && (new.pending_pri < new.cppr)); | ||
| 262 | |||
| 263 | /* Attempt atomic update */ | ||
| 264 | success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw; | ||
| 265 | if (!success) | ||
| 266 | goto bail; | ||
| 267 | |||
| 268 | XICS_DBG("UPD [%04x] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n", | ||
| 269 | icp->server_num, | ||
| 270 | old.cppr, old.mfrr, old.pending_pri, old.xisr, | ||
| 271 | old.need_resend, old.out_ee); | ||
| 272 | XICS_DBG("UPD - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n", | ||
| 273 | new.cppr, new.mfrr, new.pending_pri, new.xisr, | ||
| 274 | new.need_resend, new.out_ee); | ||
| 275 | /* | ||
| 276 | * Check for output state update | ||
| 277 | * | ||
| 278 | * Note that this is racy since another processor could be updating | ||
| 279 | * the state already. This is why we never clear the interrupt output | ||
| 280 | * here, we only ever set it. The clear only happens prior to doing | ||
| 281 | * an update and only by the processor itself. Currently we do it | ||
| 282 | * in Accept (H_XIRR) and Up_Cppr (H_XPPR). | ||
| 283 | * | ||
| 284 | * We also do not try to figure out whether the EE state has changed, | ||
| 285 | * we unconditionally set it if the new state calls for it. The reason | ||
| 286 | * for that is that we opportunistically remove the pending interrupt | ||
| 287 | * flag when raising CPPR, so we need to set it back here if an | ||
| 288 | * interrupt is still pending. | ||
| 289 | */ | ||
| 290 | if (new.out_ee) { | ||
| 291 | kvmppc_book3s_queue_irqprio(icp->vcpu, | ||
| 292 | BOOK3S_INTERRUPT_EXTERNAL_LEVEL); | ||
| 293 | if (!change_self) | ||
| 294 | kvmppc_fast_vcpu_kick(icp->vcpu); | ||
| 295 | } | ||
| 296 | bail: | ||
| 297 | return success; | ||
| 298 | } | ||
| 299 | |||
| 300 | static void icp_check_resend(struct kvmppc_xics *xics, | ||
| 301 | struct kvmppc_icp *icp) | ||
| 302 | { | ||
| 303 | u32 icsid; | ||
| 304 | |||
| 305 | /* Order this load with the test for need_resend in the caller */ | ||
| 306 | smp_rmb(); | ||
| 307 | for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) { | ||
| 308 | struct kvmppc_ics *ics = xics->ics[icsid]; | ||
| 309 | |||
| 310 | if (!test_and_clear_bit(icsid, icp->resend_map)) | ||
| 311 | continue; | ||
| 312 | if (!ics) | ||
| 313 | continue; | ||
| 314 | ics_check_resend(xics, ics, icp); | ||
| 315 | } | ||
| 316 | } | ||
| 317 | |||
| 318 | static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority, | ||
| 319 | u32 *reject) | ||
| 320 | { | ||
| 321 | union kvmppc_icp_state old_state, new_state; | ||
| 322 | bool success; | ||
| 323 | |||
| 324 | XICS_DBG("try deliver %#x(P:%#x) to server %#x\n", irq, priority, | ||
| 325 | icp->server_num); | ||
| 326 | |||
| 327 | do { | ||
| 328 | old_state = new_state = ACCESS_ONCE(icp->state); | ||
| 329 | |||
| 330 | *reject = 0; | ||
| 331 | |||
| 332 | /* See if we can deliver */ | ||
| 333 | success = new_state.cppr > priority && | ||
| 334 | new_state.mfrr > priority && | ||
| 335 | new_state.pending_pri > priority; | ||
| 336 | |||
| 337 | /* | ||
| 338 | * If we can, check for a rejection and perform the | ||
| 339 | * delivery | ||
| 340 | */ | ||
| 341 | if (success) { | ||
| 342 | *reject = new_state.xisr; | ||
| 343 | new_state.xisr = irq; | ||
| 344 | new_state.pending_pri = priority; | ||
| 345 | } else { | ||
| 346 | /* | ||
| 347 | * If we failed to deliver we set need_resend | ||
| 348 | * so a subsequent CPPR state change causes us | ||
| 349 | * to try a new delivery. | ||
| 350 | */ | ||
| 351 | new_state.need_resend = true; | ||
| 352 | } | ||
| 353 | |||
| 354 | } while (!icp_try_update(icp, old_state, new_state, false)); | ||
| 355 | |||
| 356 | return success; | ||
| 357 | } | ||
| 358 | |||
| 359 | static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, | ||
| 360 | u32 new_irq) | ||
| 361 | { | ||
| 362 | struct ics_irq_state *state; | ||
| 363 | struct kvmppc_ics *ics; | ||
| 364 | u32 reject; | ||
| 365 | u16 src; | ||
| 366 | |||
| 367 | /* | ||
| 368 | * This is used both for initial delivery of an interrupt and | ||
| 369 | * for subsequent rejection. | ||
| 370 | * | ||
| 371 | * Rejection can be racy vs. resends. We have evaluated the | ||
| 372 | * rejection in an atomic ICP transaction which is now complete, | ||
| 373 | * so potentially the ICP can already accept the interrupt again. | ||
| 374 | * | ||
| 375 | * So we need to retry the delivery. Essentially the reject path | ||
| 376 | * boils down to a failed delivery. Always. | ||
| 377 | * | ||
| 378 | * Now the interrupt could also have moved to a different target, | ||
| 379 | * thus we may need to re-do the ICP lookup as well | ||
| 380 | */ | ||
| 381 | |||
| 382 | again: | ||
| 383 | /* Get the ICS state and lock it */ | ||
| 384 | ics = kvmppc_xics_find_ics(xics, new_irq, &src); | ||
| 385 | if (!ics) { | ||
| 386 | XICS_DBG("icp_deliver_irq: IRQ 0x%06x not found !\n", new_irq); | ||
| 387 | return; | ||
| 388 | } | ||
| 389 | state = &ics->irq_state[src]; | ||
| 390 | |||
| 391 | /* Get a lock on the ICS */ | ||
| 392 | mutex_lock(&ics->lock); | ||
| 393 | |||
| 394 | /* Get our server */ | ||
| 395 | if (!icp || state->server != icp->server_num) { | ||
| 396 | icp = kvmppc_xics_find_server(xics->kvm, state->server); | ||
| 397 | if (!icp) { | ||
| 398 | pr_warn("icp_deliver_irq: IRQ 0x%06x server 0x%x not found !\n", | ||
| 399 | new_irq, state->server); | ||
| 400 | goto out; | ||
| 401 | } | ||
| 402 | } | ||
| 403 | |||
| 404 | /* Clear the resend bit of that interrupt */ | ||
| 405 | state->resend = 0; | ||
| 406 | |||
| 407 | /* | ||
| 408 | * If masked, bail out | ||
| 409 | * | ||
| 410 | * Note: PAPR doesn't mention anything about masked pending | ||
| 411 | * when doing a resend, only when doing a delivery. | ||
| 412 | * | ||
| 413 | * However that would have the effect of losing a masked | ||
| 414 | * interrupt that was rejected and isn't consistent with | ||
| 415 | * the whole masked_pending business which is about not | ||
| 416 | * losing interrupts that occur while masked. | ||
| 417 | * | ||
| 418 | * I don't differenciate normal deliveries and resends, this | ||
| 419 | * implementation will differ from PAPR and not lose such | ||
| 420 | * interrupts. | ||
| 421 | */ | ||
| 422 | if (state->priority == MASKED) { | ||
| 423 | XICS_DBG("irq %#x masked pending\n", new_irq); | ||
| 424 | state->masked_pending = 1; | ||
| 425 | goto out; | ||
| 426 | } | ||
| 427 | |||
| 428 | /* | ||
| 429 | * Try the delivery, this will set the need_resend flag | ||
| 430 | * in the ICP as part of the atomic transaction if the | ||
| 431 | * delivery is not possible. | ||
| 432 | * | ||
| 433 | * Note that if successful, the new delivery might have itself | ||
| 434 | * rejected an interrupt that was "delivered" before we took the | ||
| 435 | * icp mutex. | ||
| 436 | * | ||
| 437 | * In this case we do the whole sequence all over again for the | ||
| 438 | * new guy. We cannot assume that the rejected interrupt is less | ||
| 439 | * favored than the new one, and thus doesn't need to be delivered, | ||
| 440 | * because by the time we exit icp_try_to_deliver() the target | ||
| 441 | * processor may well have alrady consumed & completed it, and thus | ||
| 442 | * the rejected interrupt might actually be already acceptable. | ||
| 443 | */ | ||
| 444 | if (icp_try_to_deliver(icp, new_irq, state->priority, &reject)) { | ||
| 445 | /* | ||
| 446 | * Delivery was successful, did we reject somebody else ? | ||
| 447 | */ | ||
| 448 | if (reject && reject != XICS_IPI) { | ||
| 449 | mutex_unlock(&ics->lock); | ||
| 450 | new_irq = reject; | ||
| 451 | goto again; | ||
| 452 | } | ||
| 453 | } else { | ||
| 454 | /* | ||
| 455 | * We failed to deliver the interrupt we need to set the | ||
| 456 | * resend map bit and mark the ICS state as needing a resend | ||
| 457 | */ | ||
| 458 | set_bit(ics->icsid, icp->resend_map); | ||
| 459 | state->resend = 1; | ||
| 460 | |||
| 461 | /* | ||
| 462 | * If the need_resend flag got cleared in the ICP some time | ||
| 463 | * between icp_try_to_deliver() atomic update and now, then | ||
| 464 | * we know it might have missed the resend_map bit. So we | ||
| 465 | * retry | ||
| 466 | */ | ||
| 467 | smp_mb(); | ||
| 468 | if (!icp->state.need_resend) { | ||
| 469 | mutex_unlock(&ics->lock); | ||
| 470 | goto again; | ||
| 471 | } | ||
| 472 | } | ||
| 473 | out: | ||
| 474 | mutex_unlock(&ics->lock); | ||
| 475 | } | ||
| 476 | |||
| 477 | static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp, | ||
| 478 | u8 new_cppr) | ||
| 479 | { | ||
| 480 | union kvmppc_icp_state old_state, new_state; | ||
| 481 | bool resend; | ||
| 482 | |||
| 483 | /* | ||
| 484 | * This handles several related states in one operation: | ||
| 485 | * | ||
| 486 | * ICP State: Down_CPPR | ||
| 487 | * | ||
| 488 | * Load CPPR with new value and if the XISR is 0 | ||
| 489 | * then check for resends: | ||
| 490 | * | ||
| 491 | * ICP State: Resend | ||
| 492 | * | ||
| 493 | * If MFRR is more favored than CPPR, check for IPIs | ||
| 494 | * and notify ICS of a potential resend. This is done | ||
| 495 | * asynchronously (when used in real mode, we will have | ||
| 496 | * to exit here). | ||
| 497 | * | ||
| 498 | * We do not handle the complete Check_IPI as documented | ||
| 499 | * here. In the PAPR, this state will be used for both | ||
| 500 | * Set_MFRR and Down_CPPR. However, we know that we aren't | ||
| 501 | * changing the MFRR state here so we don't need to handle | ||
| 502 | * the case of an MFRR causing a reject of a pending irq, | ||
| 503 | * this will have been handled when the MFRR was set in the | ||
| 504 | * first place. | ||
| 505 | * | ||
| 506 | * Thus we don't have to handle rejects, only resends. | ||
| 507 | * | ||
| 508 | * When implementing real mode for HV KVM, resend will lead to | ||
| 509 | * a H_TOO_HARD return and the whole transaction will be handled | ||
| 510 | * in virtual mode. | ||
| 511 | */ | ||
| 512 | do { | ||
| 513 | old_state = new_state = ACCESS_ONCE(icp->state); | ||
| 514 | |||
| 515 | /* Down_CPPR */ | ||
| 516 | new_state.cppr = new_cppr; | ||
| 517 | |||
| 518 | /* | ||
| 519 | * Cut down Resend / Check_IPI / IPI | ||
| 520 | * | ||
| 521 | * The logic is that we cannot have a pending interrupt | ||
| 522 | * trumped by an IPI at this point (see above), so we | ||
| 523 | * know that either the pending interrupt is already an | ||
| 524 | * IPI (in which case we don't care to override it) or | ||
| 525 | * it's either more favored than us or non existent | ||
| 526 | */ | ||
| 527 | if (new_state.mfrr < new_cppr && | ||
| 528 | new_state.mfrr <= new_state.pending_pri) { | ||
| 529 | WARN_ON(new_state.xisr != XICS_IPI && | ||
| 530 | new_state.xisr != 0); | ||
| 531 | new_state.pending_pri = new_state.mfrr; | ||
| 532 | new_state.xisr = XICS_IPI; | ||
| 533 | } | ||
| 534 | |||
| 535 | /* Latch/clear resend bit */ | ||
| 536 | resend = new_state.need_resend; | ||
| 537 | new_state.need_resend = 0; | ||
| 538 | |||
| 539 | } while (!icp_try_update(icp, old_state, new_state, true)); | ||
| 540 | |||
| 541 | /* | ||
| 542 | * Now handle resend checks. Those are asynchronous to the ICP | ||
| 543 | * state update in HW (ie bus transactions) so we can handle them | ||
| 544 | * separately here too | ||
| 545 | */ | ||
| 546 | if (resend) | ||
| 547 | icp_check_resend(xics, icp); | ||
| 548 | } | ||
| 549 | |||
| 550 | static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu) | ||
| 551 | { | ||
| 552 | union kvmppc_icp_state old_state, new_state; | ||
| 553 | struct kvmppc_icp *icp = vcpu->arch.icp; | ||
| 554 | u32 xirr; | ||
| 555 | |||
| 556 | /* First, remove EE from the processor */ | ||
| 557 | kvmppc_book3s_dequeue_irqprio(icp->vcpu, | ||
| 558 | BOOK3S_INTERRUPT_EXTERNAL_LEVEL); | ||
| 559 | |||
| 560 | /* | ||
| 561 | * ICP State: Accept_Interrupt | ||
| 562 | * | ||
| 563 | * Return the pending interrupt (if any) along with the | ||
| 564 | * current CPPR, then clear the XISR & set CPPR to the | ||
| 565 | * pending priority | ||
| 566 | */ | ||
| 567 | do { | ||
| 568 | old_state = new_state = ACCESS_ONCE(icp->state); | ||
| 569 | |||
| 570 | xirr = old_state.xisr | (((u32)old_state.cppr) << 24); | ||
| 571 | if (!old_state.xisr) | ||
| 572 | break; | ||
| 573 | new_state.cppr = new_state.pending_pri; | ||
| 574 | new_state.pending_pri = 0xff; | ||
| 575 | new_state.xisr = 0; | ||
| 576 | |||
| 577 | } while (!icp_try_update(icp, old_state, new_state, true)); | ||
| 578 | |||
| 579 | XICS_DBG("h_xirr vcpu %d xirr %#x\n", vcpu->vcpu_id, xirr); | ||
| 580 | |||
| 581 | return xirr; | ||
| 582 | } | ||
| 583 | |||
| 584 | static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, | ||
| 585 | unsigned long mfrr) | ||
| 586 | { | ||
| 587 | union kvmppc_icp_state old_state, new_state; | ||
| 588 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | ||
| 589 | struct kvmppc_icp *icp; | ||
| 590 | u32 reject; | ||
| 591 | bool resend; | ||
| 592 | bool local; | ||
| 593 | |||
| 594 | XICS_DBG("h_ipi vcpu %d to server %lu mfrr %#lx\n", | ||
| 595 | vcpu->vcpu_id, server, mfrr); | ||
| 596 | |||
| 597 | icp = vcpu->arch.icp; | ||
| 598 | local = icp->server_num == server; | ||
| 599 | if (!local) { | ||
| 600 | icp = kvmppc_xics_find_server(vcpu->kvm, server); | ||
| 601 | if (!icp) | ||
| 602 | return H_PARAMETER; | ||
| 603 | } | ||
| 604 | |||
| 605 | /* | ||
| 606 | * ICP state: Set_MFRR | ||
| 607 | * | ||
| 608 | * If the CPPR is more favored than the new MFRR, then | ||
| 609 | * nothing needs to be rejected as there can be no XISR to | ||
| 610 | * reject. If the MFRR is being made less favored then | ||
| 611 | * there might be a previously-rejected interrupt needing | ||
| 612 | * to be resent. | ||
| 613 | * | ||
| 614 | * If the CPPR is less favored, then we might be replacing | ||
| 615 | * an interrupt, and thus need to possibly reject it as in | ||
| 616 | * | ||
| 617 | * ICP state: Check_IPI | ||
| 618 | */ | ||
| 619 | do { | ||
| 620 | old_state = new_state = ACCESS_ONCE(icp->state); | ||
| 621 | |||
| 622 | /* Set_MFRR */ | ||
| 623 | new_state.mfrr = mfrr; | ||
| 624 | |||
| 625 | /* Check_IPI */ | ||
| 626 | reject = 0; | ||
| 627 | resend = false; | ||
| 628 | if (mfrr < new_state.cppr) { | ||
| 629 | /* Reject a pending interrupt if not an IPI */ | ||
| 630 | if (mfrr <= new_state.pending_pri) | ||
| 631 | reject = new_state.xisr; | ||
| 632 | new_state.pending_pri = mfrr; | ||
| 633 | new_state.xisr = XICS_IPI; | ||
| 634 | } | ||
| 635 | |||
| 636 | if (mfrr > old_state.mfrr && mfrr > new_state.cppr) { | ||
| 637 | resend = new_state.need_resend; | ||
| 638 | new_state.need_resend = 0; | ||
| 639 | } | ||
| 640 | } while (!icp_try_update(icp, old_state, new_state, local)); | ||
| 641 | |||
| 642 | /* Handle reject */ | ||
| 643 | if (reject && reject != XICS_IPI) | ||
| 644 | icp_deliver_irq(xics, icp, reject); | ||
| 645 | |||
| 646 | /* Handle resend */ | ||
| 647 | if (resend) | ||
| 648 | icp_check_resend(xics, icp); | ||
| 649 | |||
| 650 | return H_SUCCESS; | ||
| 651 | } | ||
| 652 | |||
| 653 | static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) | ||
| 654 | { | ||
| 655 | union kvmppc_icp_state old_state, new_state; | ||
| 656 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | ||
| 657 | struct kvmppc_icp *icp = vcpu->arch.icp; | ||
| 658 | u32 reject; | ||
| 659 | |||
| 660 | XICS_DBG("h_cppr vcpu %d cppr %#lx\n", vcpu->vcpu_id, cppr); | ||
| 661 | |||
| 662 | /* | ||
| 663 | * ICP State: Set_CPPR | ||
| 664 | * | ||
| 665 | * We can safely compare the new value with the current | ||
| 666 | * value outside of the transaction as the CPPR is only | ||
| 667 | * ever changed by the processor on itself | ||
| 668 | */ | ||
| 669 | if (cppr > icp->state.cppr) | ||
| 670 | icp_down_cppr(xics, icp, cppr); | ||
| 671 | else if (cppr == icp->state.cppr) | ||
| 672 | return; | ||
| 673 | |||
| 674 | /* | ||
| 675 | * ICP State: Up_CPPR | ||
| 676 | * | ||
| 677 | * The processor is raising its priority, this can result | ||
| 678 | * in a rejection of a pending interrupt: | ||
| 679 | * | ||
| 680 | * ICP State: Reject_Current | ||
| 681 | * | ||
| 682 | * We can remove EE from the current processor, the update | ||
| 683 | * transaction will set it again if needed | ||
| 684 | */ | ||
| 685 | kvmppc_book3s_dequeue_irqprio(icp->vcpu, | ||
| 686 | BOOK3S_INTERRUPT_EXTERNAL_LEVEL); | ||
| 687 | |||
| 688 | do { | ||
| 689 | old_state = new_state = ACCESS_ONCE(icp->state); | ||
| 690 | |||
| 691 | reject = 0; | ||
| 692 | new_state.cppr = cppr; | ||
| 693 | |||
| 694 | if (cppr <= new_state.pending_pri) { | ||
| 695 | reject = new_state.xisr; | ||
| 696 | new_state.xisr = 0; | ||
| 697 | new_state.pending_pri = 0xff; | ||
| 698 | } | ||
| 699 | |||
| 700 | } while (!icp_try_update(icp, old_state, new_state, true)); | ||
| 701 | |||
| 702 | /* | ||
| 703 | * Check for rejects. They are handled by doing a new delivery | ||
| 704 | * attempt (see comments in icp_deliver_irq). | ||
| 705 | */ | ||
| 706 | if (reject && reject != XICS_IPI) | ||
| 707 | icp_deliver_irq(xics, icp, reject); | ||
| 708 | } | ||
| 709 | |||
| 710 | static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) | ||
| 711 | { | ||
| 712 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | ||
| 713 | struct kvmppc_icp *icp = vcpu->arch.icp; | ||
| 714 | struct kvmppc_ics *ics; | ||
| 715 | struct ics_irq_state *state; | ||
| 716 | u32 irq = xirr & 0x00ffffff; | ||
| 717 | u16 src; | ||
| 718 | |||
| 719 | XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr); | ||
| 720 | |||
| 721 | /* | ||
| 722 | * ICP State: EOI | ||
| 723 | * | ||
| 724 | * Note: If EOI is incorrectly used by SW to lower the CPPR | ||
| 725 | * value (ie more favored), we do not check for rejection of | ||
| 726 | * a pending interrupt, this is a SW error and PAPR sepcifies | ||
| 727 | * that we don't have to deal with it. | ||
| 728 | * | ||
| 729 | * The sending of an EOI to the ICS is handled after the | ||
| 730 | * CPPR update | ||
| 731 | * | ||
| 732 | * ICP State: Down_CPPR which we handle | ||
| 733 | * in a separate function as it's shared with H_CPPR. | ||
| 734 | */ | ||
| 735 | icp_down_cppr(xics, icp, xirr >> 24); | ||
| 736 | |||
| 737 | /* IPIs have no EOI */ | ||
| 738 | if (irq == XICS_IPI) | ||
| 739 | return H_SUCCESS; | ||
| 740 | /* | ||
| 741 | * EOI handling: If the interrupt is still asserted, we need to | ||
| 742 | * resend it. We can take a lockless "peek" at the ICS state here. | ||
| 743 | * | ||
| 744 | * "Message" interrupts will never have "asserted" set | ||
| 745 | */ | ||
| 746 | ics = kvmppc_xics_find_ics(xics, irq, &src); | ||
| 747 | if (!ics) { | ||
| 748 | XICS_DBG("h_eoi: IRQ 0x%06x not found !\n", irq); | ||
| 749 | return H_PARAMETER; | ||
| 750 | } | ||
| 751 | state = &ics->irq_state[src]; | ||
| 752 | |||
| 753 | /* Still asserted, resend it */ | ||
| 754 | if (state->asserted) | ||
| 755 | icp_deliver_irq(xics, icp, irq); | ||
| 756 | |||
| 757 | return H_SUCCESS; | ||
| 758 | } | ||
| 759 | |||
| 760 | static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) | ||
| 761 | { | ||
| 762 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | ||
| 763 | struct kvmppc_icp *icp = vcpu->arch.icp; | ||
| 764 | |||
| 765 | XICS_DBG("XICS_RM: H_%x completing, act: %x state: %lx tgt: %p\n", | ||
| 766 | hcall, icp->rm_action, icp->rm_dbgstate.raw, icp->rm_dbgtgt); | ||
| 767 | |||
| 768 | if (icp->rm_action & XICS_RM_KICK_VCPU) | ||
| 769 | kvmppc_fast_vcpu_kick(icp->rm_kick_target); | ||
| 770 | if (icp->rm_action & XICS_RM_CHECK_RESEND) | ||
| 771 | icp_check_resend(xics, icp); | ||
| 772 | if (icp->rm_action & XICS_RM_REJECT) | ||
| 773 | icp_deliver_irq(xics, icp, icp->rm_reject); | ||
| 774 | |||
| 775 | icp->rm_action = 0; | ||
| 776 | |||
| 777 | return H_SUCCESS; | ||
| 778 | } | ||
| 779 | |||
| 780 | int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req) | ||
| 781 | { | ||
| 782 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | ||
| 783 | unsigned long res; | ||
| 784 | int rc = H_SUCCESS; | ||
| 785 | |||
| 786 | /* Check if we have an ICP */ | ||
| 787 | if (!xics || !vcpu->arch.icp) | ||
| 788 | return H_HARDWARE; | ||
| 789 | |||
| 790 | /* Check for real mode returning too hard */ | ||
| 791 | if (xics->real_mode) | ||
| 792 | return kvmppc_xics_rm_complete(vcpu, req); | ||
| 793 | |||
| 794 | switch (req) { | ||
| 795 | case H_XIRR: | ||
| 796 | res = kvmppc_h_xirr(vcpu); | ||
| 797 | kvmppc_set_gpr(vcpu, 4, res); | ||
| 798 | break; | ||
| 799 | case H_CPPR: | ||
| 800 | kvmppc_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4)); | ||
| 801 | break; | ||
| 802 | case H_EOI: | ||
| 803 | rc = kvmppc_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4)); | ||
| 804 | break; | ||
| 805 | case H_IPI: | ||
| 806 | rc = kvmppc_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4), | ||
| 807 | kvmppc_get_gpr(vcpu, 5)); | ||
| 808 | break; | ||
| 809 | } | ||
| 810 | |||
| 811 | return rc; | ||
| 812 | } | ||
| 813 | |||
| 814 | |||
| 815 | /* -- Initialisation code etc. -- */ | ||
| 816 | |||
| 817 | static int xics_debug_show(struct seq_file *m, void *private) | ||
| 818 | { | ||
| 819 | struct kvmppc_xics *xics = m->private; | ||
| 820 | struct kvm *kvm = xics->kvm; | ||
| 821 | struct kvm_vcpu *vcpu; | ||
| 822 | int icsid, i; | ||
| 823 | |||
| 824 | if (!kvm) | ||
| 825 | return 0; | ||
| 826 | |||
| 827 | seq_printf(m, "=========\nICP state\n=========\n"); | ||
| 828 | |||
| 829 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
| 830 | struct kvmppc_icp *icp = vcpu->arch.icp; | ||
| 831 | union kvmppc_icp_state state; | ||
| 832 | |||
| 833 | if (!icp) | ||
| 834 | continue; | ||
| 835 | |||
| 836 | state.raw = ACCESS_ONCE(icp->state.raw); | ||
| 837 | seq_printf(m, "cpu server %#lx XIRR:%#x PPRI:%#x CPPR:%#x MFRR:%#x OUT:%d NR:%d\n", | ||
| 838 | icp->server_num, state.xisr, | ||
| 839 | state.pending_pri, state.cppr, state.mfrr, | ||
| 840 | state.out_ee, state.need_resend); | ||
| 841 | } | ||
| 842 | |||
| 843 | for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) { | ||
| 844 | struct kvmppc_ics *ics = xics->ics[icsid]; | ||
| 845 | |||
| 846 | if (!ics) | ||
| 847 | continue; | ||
| 848 | |||
| 849 | seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n", | ||
| 850 | icsid); | ||
| 851 | |||
| 852 | mutex_lock(&ics->lock); | ||
| 853 | |||
| 854 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { | ||
| 855 | struct ics_irq_state *irq = &ics->irq_state[i]; | ||
| 856 | |||
| 857 | seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x asserted %d resend %d masked pending %d\n", | ||
| 858 | irq->number, irq->server, irq->priority, | ||
| 859 | irq->saved_priority, irq->asserted, | ||
| 860 | irq->resend, irq->masked_pending); | ||
| 861 | |||
| 862 | } | ||
| 863 | mutex_unlock(&ics->lock); | ||
| 864 | } | ||
| 865 | return 0; | ||
| 866 | } | ||
| 867 | |||
| 868 | static int xics_debug_open(struct inode *inode, struct file *file) | ||
| 869 | { | ||
| 870 | return single_open(file, xics_debug_show, inode->i_private); | ||
| 871 | } | ||
| 872 | |||
| 873 | static const struct file_operations xics_debug_fops = { | ||
| 874 | .open = xics_debug_open, | ||
| 875 | .read = seq_read, | ||
| 876 | .llseek = seq_lseek, | ||
| 877 | .release = single_release, | ||
| 878 | }; | ||
| 879 | |||
| 880 | static void xics_debugfs_init(struct kvmppc_xics *xics) | ||
| 881 | { | ||
| 882 | char *name; | ||
| 883 | |||
| 884 | name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics); | ||
| 885 | if (!name) { | ||
| 886 | pr_err("%s: no memory for name\n", __func__); | ||
| 887 | return; | ||
| 888 | } | ||
| 889 | |||
| 890 | xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root, | ||
| 891 | xics, &xics_debug_fops); | ||
| 892 | |||
| 893 | pr_debug("%s: created %s\n", __func__, name); | ||
| 894 | kfree(name); | ||
| 895 | } | ||
| 896 | |||
| 897 | static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm, | ||
| 898 | struct kvmppc_xics *xics, int irq) | ||
| 899 | { | ||
| 900 | struct kvmppc_ics *ics; | ||
| 901 | int i, icsid; | ||
| 902 | |||
| 903 | icsid = irq >> KVMPPC_XICS_ICS_SHIFT; | ||
| 904 | |||
| 905 | mutex_lock(&kvm->lock); | ||
| 906 | |||
| 907 | /* ICS already exists - somebody else got here first */ | ||
| 908 | if (xics->ics[icsid]) | ||
| 909 | goto out; | ||
| 910 | |||
| 911 | /* Create the ICS */ | ||
| 912 | ics = kzalloc(sizeof(struct kvmppc_ics), GFP_KERNEL); | ||
| 913 | if (!ics) | ||
| 914 | goto out; | ||
| 915 | |||
| 916 | mutex_init(&ics->lock); | ||
| 917 | ics->icsid = icsid; | ||
| 918 | |||
| 919 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { | ||
| 920 | ics->irq_state[i].number = (icsid << KVMPPC_XICS_ICS_SHIFT) | i; | ||
| 921 | ics->irq_state[i].priority = MASKED; | ||
| 922 | ics->irq_state[i].saved_priority = MASKED; | ||
| 923 | } | ||
| 924 | smp_wmb(); | ||
| 925 | xics->ics[icsid] = ics; | ||
| 926 | |||
| 927 | if (icsid > xics->max_icsid) | ||
| 928 | xics->max_icsid = icsid; | ||
| 929 | |||
| 930 | out: | ||
| 931 | mutex_unlock(&kvm->lock); | ||
| 932 | return xics->ics[icsid]; | ||
| 933 | } | ||
| 934 | |||
| 935 | int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_num) | ||
| 936 | { | ||
| 937 | struct kvmppc_icp *icp; | ||
| 938 | |||
| 939 | if (!vcpu->kvm->arch.xics) | ||
| 940 | return -ENODEV; | ||
| 941 | |||
| 942 | if (kvmppc_xics_find_server(vcpu->kvm, server_num)) | ||
| 943 | return -EEXIST; | ||
| 944 | |||
| 945 | icp = kzalloc(sizeof(struct kvmppc_icp), GFP_KERNEL); | ||
| 946 | if (!icp) | ||
| 947 | return -ENOMEM; | ||
| 948 | |||
| 949 | icp->vcpu = vcpu; | ||
| 950 | icp->server_num = server_num; | ||
| 951 | icp->state.mfrr = MASKED; | ||
| 952 | icp->state.pending_pri = MASKED; | ||
| 953 | vcpu->arch.icp = icp; | ||
| 954 | |||
| 955 | XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id); | ||
| 956 | |||
| 957 | return 0; | ||
| 958 | } | ||
| 959 | |||
| 960 | u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu) | ||
| 961 | { | ||
| 962 | struct kvmppc_icp *icp = vcpu->arch.icp; | ||
| 963 | union kvmppc_icp_state state; | ||
| 964 | |||
| 965 | if (!icp) | ||
| 966 | return 0; | ||
| 967 | state = icp->state; | ||
| 968 | return ((u64)state.cppr << KVM_REG_PPC_ICP_CPPR_SHIFT) | | ||
| 969 | ((u64)state.xisr << KVM_REG_PPC_ICP_XISR_SHIFT) | | ||
| 970 | ((u64)state.mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT) | | ||
| 971 | ((u64)state.pending_pri << KVM_REG_PPC_ICP_PPRI_SHIFT); | ||
| 972 | } | ||
| 973 | |||
| 974 | int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval) | ||
| 975 | { | ||
| 976 | struct kvmppc_icp *icp = vcpu->arch.icp; | ||
| 977 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | ||
| 978 | union kvmppc_icp_state old_state, new_state; | ||
| 979 | struct kvmppc_ics *ics; | ||
| 980 | u8 cppr, mfrr, pending_pri; | ||
| 981 | u32 xisr; | ||
| 982 | u16 src; | ||
| 983 | bool resend; | ||
| 984 | |||
| 985 | if (!icp || !xics) | ||
| 986 | return -ENOENT; | ||
| 987 | |||
| 988 | cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT; | ||
| 989 | xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) & | ||
| 990 | KVM_REG_PPC_ICP_XISR_MASK; | ||
| 991 | mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT; | ||
| 992 | pending_pri = icpval >> KVM_REG_PPC_ICP_PPRI_SHIFT; | ||
| 993 | |||
| 994 | /* Require the new state to be internally consistent */ | ||
| 995 | if (xisr == 0) { | ||
| 996 | if (pending_pri != 0xff) | ||
| 997 | return -EINVAL; | ||
| 998 | } else if (xisr == XICS_IPI) { | ||
| 999 | if (pending_pri != mfrr || pending_pri >= cppr) | ||
| 1000 | return -EINVAL; | ||
| 1001 | } else { | ||
| 1002 | if (pending_pri >= mfrr || pending_pri >= cppr) | ||
| 1003 | return -EINVAL; | ||
| 1004 | ics = kvmppc_xics_find_ics(xics, xisr, &src); | ||
| 1005 | if (!ics) | ||
| 1006 | return -EINVAL; | ||
| 1007 | } | ||
| 1008 | |||
| 1009 | new_state.raw = 0; | ||
| 1010 | new_state.cppr = cppr; | ||
| 1011 | new_state.xisr = xisr; | ||
| 1012 | new_state.mfrr = mfrr; | ||
| 1013 | new_state.pending_pri = pending_pri; | ||
| 1014 | |||
| 1015 | /* | ||
| 1016 | * Deassert the CPU interrupt request. | ||
| 1017 | * icp_try_update will reassert it if necessary. | ||
| 1018 | */ | ||
| 1019 | kvmppc_book3s_dequeue_irqprio(icp->vcpu, | ||
| 1020 | BOOK3S_INTERRUPT_EXTERNAL_LEVEL); | ||
| 1021 | |||
| 1022 | /* | ||
| 1023 | * Note that if we displace an interrupt from old_state.xisr, | ||
| 1024 | * we don't mark it as rejected. We expect userspace to set | ||
| 1025 | * the state of the interrupt sources to be consistent with | ||
| 1026 | * the ICP states (either before or afterwards, which doesn't | ||
| 1027 | * matter). We do handle resends due to CPPR becoming less | ||
| 1028 | * favoured because that is necessary to end up with a | ||
| 1029 | * consistent state in the situation where userspace restores | ||
| 1030 | * the ICS states before the ICP states. | ||
| 1031 | */ | ||
| 1032 | do { | ||
| 1033 | old_state = ACCESS_ONCE(icp->state); | ||
| 1034 | |||
| 1035 | if (new_state.mfrr <= old_state.mfrr) { | ||
| 1036 | resend = false; | ||
| 1037 | new_state.need_resend = old_state.need_resend; | ||
| 1038 | } else { | ||
| 1039 | resend = old_state.need_resend; | ||
| 1040 | new_state.need_resend = 0; | ||
| 1041 | } | ||
| 1042 | } while (!icp_try_update(icp, old_state, new_state, false)); | ||
| 1043 | |||
| 1044 | if (resend) | ||
| 1045 | icp_check_resend(xics, icp); | ||
| 1046 | |||
| 1047 | return 0; | ||
| 1048 | } | ||
| 1049 | |||
| 1050 | static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr) | ||
| 1051 | { | ||
| 1052 | int ret; | ||
| 1053 | struct kvmppc_ics *ics; | ||
| 1054 | struct ics_irq_state *irqp; | ||
| 1055 | u64 __user *ubufp = (u64 __user *) addr; | ||
| 1056 | u16 idx; | ||
| 1057 | u64 val, prio; | ||
| 1058 | |||
| 1059 | ics = kvmppc_xics_find_ics(xics, irq, &idx); | ||
| 1060 | if (!ics) | ||
| 1061 | return -ENOENT; | ||
| 1062 | |||
| 1063 | irqp = &ics->irq_state[idx]; | ||
| 1064 | mutex_lock(&ics->lock); | ||
| 1065 | ret = -ENOENT; | ||
| 1066 | if (irqp->exists) { | ||
| 1067 | val = irqp->server; | ||
| 1068 | prio = irqp->priority; | ||
| 1069 | if (prio == MASKED) { | ||
| 1070 | val |= KVM_XICS_MASKED; | ||
| 1071 | prio = irqp->saved_priority; | ||
| 1072 | } | ||
| 1073 | val |= prio << KVM_XICS_PRIORITY_SHIFT; | ||
| 1074 | if (irqp->asserted) | ||
| 1075 | val |= KVM_XICS_LEVEL_SENSITIVE | KVM_XICS_PENDING; | ||
| 1076 | else if (irqp->masked_pending || irqp->resend) | ||
| 1077 | val |= KVM_XICS_PENDING; | ||
| 1078 | ret = 0; | ||
| 1079 | } | ||
| 1080 | mutex_unlock(&ics->lock); | ||
| 1081 | |||
| 1082 | if (!ret && put_user(val, ubufp)) | ||
| 1083 | ret = -EFAULT; | ||
| 1084 | |||
| 1085 | return ret; | ||
| 1086 | } | ||
| 1087 | |||
| 1088 | static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr) | ||
| 1089 | { | ||
| 1090 | struct kvmppc_ics *ics; | ||
| 1091 | struct ics_irq_state *irqp; | ||
| 1092 | u64 __user *ubufp = (u64 __user *) addr; | ||
| 1093 | u16 idx; | ||
| 1094 | u64 val; | ||
| 1095 | u8 prio; | ||
| 1096 | u32 server; | ||
| 1097 | |||
| 1098 | if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS) | ||
| 1099 | return -ENOENT; | ||
| 1100 | |||
| 1101 | ics = kvmppc_xics_find_ics(xics, irq, &idx); | ||
| 1102 | if (!ics) { | ||
| 1103 | ics = kvmppc_xics_create_ics(xics->kvm, xics, irq); | ||
| 1104 | if (!ics) | ||
| 1105 | return -ENOMEM; | ||
| 1106 | } | ||
| 1107 | irqp = &ics->irq_state[idx]; | ||
| 1108 | if (get_user(val, ubufp)) | ||
| 1109 | return -EFAULT; | ||
| 1110 | |||
| 1111 | server = val & KVM_XICS_DESTINATION_MASK; | ||
| 1112 | prio = val >> KVM_XICS_PRIORITY_SHIFT; | ||
| 1113 | if (prio != MASKED && | ||
| 1114 | kvmppc_xics_find_server(xics->kvm, server) == NULL) | ||
| 1115 | return -EINVAL; | ||
| 1116 | |||
| 1117 | mutex_lock(&ics->lock); | ||
| 1118 | irqp->server = server; | ||
| 1119 | irqp->saved_priority = prio; | ||
| 1120 | if (val & KVM_XICS_MASKED) | ||
| 1121 | prio = MASKED; | ||
| 1122 | irqp->priority = prio; | ||
| 1123 | irqp->resend = 0; | ||
| 1124 | irqp->masked_pending = 0; | ||
| 1125 | irqp->asserted = 0; | ||
| 1126 | if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE)) | ||
| 1127 | irqp->asserted = 1; | ||
| 1128 | irqp->exists = 1; | ||
| 1129 | mutex_unlock(&ics->lock); | ||
| 1130 | |||
| 1131 | if (val & KVM_XICS_PENDING) | ||
| 1132 | icp_deliver_irq(xics, NULL, irqp->number); | ||
| 1133 | |||
| 1134 | return 0; | ||
| 1135 | } | ||
| 1136 | |||
| 1137 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, | ||
| 1138 | bool line_status) | ||
| 1139 | { | ||
| 1140 | struct kvmppc_xics *xics = kvm->arch.xics; | ||
| 1141 | |||
| 1142 | return ics_deliver_irq(xics, irq, level, line_status); | ||
| 1143 | } | ||
| 1144 | |||
| 1145 | static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | ||
| 1146 | { | ||
| 1147 | struct kvmppc_xics *xics = dev->private; | ||
| 1148 | |||
| 1149 | switch (attr->group) { | ||
| 1150 | case KVM_DEV_XICS_GRP_SOURCES: | ||
| 1151 | return xics_set_source(xics, attr->attr, attr->addr); | ||
| 1152 | } | ||
| 1153 | return -ENXIO; | ||
| 1154 | } | ||
| 1155 | |||
| 1156 | static int xics_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | ||
| 1157 | { | ||
| 1158 | struct kvmppc_xics *xics = dev->private; | ||
| 1159 | |||
| 1160 | switch (attr->group) { | ||
| 1161 | case KVM_DEV_XICS_GRP_SOURCES: | ||
| 1162 | return xics_get_source(xics, attr->attr, attr->addr); | ||
| 1163 | } | ||
| 1164 | return -ENXIO; | ||
| 1165 | } | ||
| 1166 | |||
| 1167 | static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | ||
| 1168 | { | ||
| 1169 | switch (attr->group) { | ||
| 1170 | case KVM_DEV_XICS_GRP_SOURCES: | ||
| 1171 | if (attr->attr >= KVMPPC_XICS_FIRST_IRQ && | ||
| 1172 | attr->attr < KVMPPC_XICS_NR_IRQS) | ||
| 1173 | return 0; | ||
| 1174 | break; | ||
| 1175 | } | ||
| 1176 | return -ENXIO; | ||
| 1177 | } | ||
| 1178 | |||
| 1179 | static void kvmppc_xics_free(struct kvm_device *dev) | ||
| 1180 | { | ||
| 1181 | struct kvmppc_xics *xics = dev->private; | ||
| 1182 | int i; | ||
| 1183 | struct kvm *kvm = xics->kvm; | ||
| 1184 | |||
| 1185 | debugfs_remove(xics->dentry); | ||
| 1186 | |||
| 1187 | if (kvm) | ||
| 1188 | kvm->arch.xics = NULL; | ||
| 1189 | |||
| 1190 | for (i = 0; i <= xics->max_icsid; i++) | ||
| 1191 | kfree(xics->ics[i]); | ||
| 1192 | kfree(xics); | ||
| 1193 | kfree(dev); | ||
| 1194 | } | ||
| 1195 | |||
| 1196 | static int kvmppc_xics_create(struct kvm_device *dev, u32 type) | ||
| 1197 | { | ||
| 1198 | struct kvmppc_xics *xics; | ||
| 1199 | struct kvm *kvm = dev->kvm; | ||
| 1200 | int ret = 0; | ||
| 1201 | |||
| 1202 | xics = kzalloc(sizeof(*xics), GFP_KERNEL); | ||
| 1203 | if (!xics) | ||
| 1204 | return -ENOMEM; | ||
| 1205 | |||
| 1206 | dev->private = xics; | ||
| 1207 | xics->dev = dev; | ||
| 1208 | xics->kvm = kvm; | ||
| 1209 | |||
| 1210 | /* Already there ? */ | ||
| 1211 | mutex_lock(&kvm->lock); | ||
| 1212 | if (kvm->arch.xics) | ||
| 1213 | ret = -EEXIST; | ||
| 1214 | else | ||
| 1215 | kvm->arch.xics = xics; | ||
| 1216 | mutex_unlock(&kvm->lock); | ||
| 1217 | |||
| 1218 | if (ret) | ||
| 1219 | return ret; | ||
| 1220 | |||
| 1221 | xics_debugfs_init(xics); | ||
| 1222 | |||
| 1223 | #ifdef CONFIG_KVM_BOOK3S_64_HV | ||
| 1224 | if (cpu_has_feature(CPU_FTR_ARCH_206)) { | ||
| 1225 | /* Enable real mode support */ | ||
| 1226 | xics->real_mode = ENABLE_REALMODE; | ||
| 1227 | xics->real_mode_dbg = DEBUG_REALMODE; | ||
| 1228 | } | ||
| 1229 | #endif /* CONFIG_KVM_BOOK3S_64_HV */ | ||
| 1230 | |||
| 1231 | return 0; | ||
| 1232 | } | ||
| 1233 | |||
| 1234 | struct kvm_device_ops kvm_xics_ops = { | ||
| 1235 | .name = "kvm-xics", | ||
| 1236 | .create = kvmppc_xics_create, | ||
| 1237 | .destroy = kvmppc_xics_free, | ||
| 1238 | .set_attr = xics_set_attr, | ||
| 1239 | .get_attr = xics_get_attr, | ||
| 1240 | .has_attr = xics_has_attr, | ||
| 1241 | }; | ||
| 1242 | |||
| 1243 | int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, | ||
| 1244 | u32 xcpu) | ||
| 1245 | { | ||
| 1246 | struct kvmppc_xics *xics = dev->private; | ||
| 1247 | int r = -EBUSY; | ||
| 1248 | |||
| 1249 | if (dev->ops != &kvm_xics_ops) | ||
| 1250 | return -EPERM; | ||
| 1251 | if (xics->kvm != vcpu->kvm) | ||
| 1252 | return -EPERM; | ||
| 1253 | if (vcpu->arch.irq_type) | ||
| 1254 | return -EBUSY; | ||
| 1255 | |||
| 1256 | r = kvmppc_xics_create_icp(vcpu, xcpu); | ||
| 1257 | if (!r) | ||
| 1258 | vcpu->arch.irq_type = KVMPPC_IRQ_XICS; | ||
| 1259 | |||
| 1260 | return r; | ||
| 1261 | } | ||
| 1262 | |||
| 1263 | void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) | ||
| 1264 | { | ||
| 1265 | if (!vcpu->arch.icp) | ||
| 1266 | return; | ||
| 1267 | kfree(vcpu->arch.icp); | ||
| 1268 | vcpu->arch.icp = NULL; | ||
| 1269 | vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; | ||
| 1270 | } | ||
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h new file mode 100644 index 000000000000..dd9326c5c19b --- /dev/null +++ b/arch/powerpc/kvm/book3s_xics.h | |||
| @@ -0,0 +1,130 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2012 Michael Ellerman, IBM Corporation. | ||
| 3 | * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify | ||
| 6 | * it under the terms of the GNU General Public License, version 2, as | ||
| 7 | * published by the Free Software Foundation. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef _KVM_PPC_BOOK3S_XICS_H | ||
| 11 | #define _KVM_PPC_BOOK3S_XICS_H | ||
| 12 | |||
| 13 | /* | ||
| 14 | * We use a two-level tree to store interrupt source information. | ||
| 15 | * There are up to 1024 ICS nodes, each of which can represent | ||
| 16 | * 1024 sources. | ||
| 17 | */ | ||
| 18 | #define KVMPPC_XICS_MAX_ICS_ID 1023 | ||
| 19 | #define KVMPPC_XICS_ICS_SHIFT 10 | ||
| 20 | #define KVMPPC_XICS_IRQ_PER_ICS (1 << KVMPPC_XICS_ICS_SHIFT) | ||
| 21 | #define KVMPPC_XICS_SRC_MASK (KVMPPC_XICS_IRQ_PER_ICS - 1) | ||
| 22 | |||
| 23 | /* | ||
| 24 | * Interrupt source numbers below this are reserved, for example | ||
| 25 | * 0 is "no interrupt", and 2 is used for IPIs. | ||
| 26 | */ | ||
| 27 | #define KVMPPC_XICS_FIRST_IRQ 16 | ||
| 28 | #define KVMPPC_XICS_NR_IRQS ((KVMPPC_XICS_MAX_ICS_ID + 1) * \ | ||
| 29 | KVMPPC_XICS_IRQ_PER_ICS) | ||
| 30 | |||
| 31 | /* Priority value to use for disabling an interrupt */ | ||
| 32 | #define MASKED 0xff | ||
| 33 | |||
| 34 | /* State for one irq source */ | ||
| 35 | struct ics_irq_state { | ||
| 36 | u32 number; | ||
| 37 | u32 server; | ||
| 38 | u8 priority; | ||
| 39 | u8 saved_priority; | ||
| 40 | u8 resend; | ||
| 41 | u8 masked_pending; | ||
| 42 | u8 asserted; /* Only for LSI */ | ||
| 43 | u8 exists; | ||
| 44 | }; | ||
| 45 | |||
| 46 | /* Atomic ICP state, updated with a single compare & swap */ | ||
| 47 | union kvmppc_icp_state { | ||
| 48 | unsigned long raw; | ||
| 49 | struct { | ||
| 50 | u8 out_ee:1; | ||
| 51 | u8 need_resend:1; | ||
| 52 | u8 cppr; | ||
| 53 | u8 mfrr; | ||
| 54 | u8 pending_pri; | ||
| 55 | u32 xisr; | ||
| 56 | }; | ||
| 57 | }; | ||
| 58 | |||
| 59 | /* One bit per ICS */ | ||
| 60 | #define ICP_RESEND_MAP_SIZE (KVMPPC_XICS_MAX_ICS_ID / BITS_PER_LONG + 1) | ||
| 61 | |||
| 62 | struct kvmppc_icp { | ||
| 63 | struct kvm_vcpu *vcpu; | ||
| 64 | unsigned long server_num; | ||
| 65 | union kvmppc_icp_state state; | ||
| 66 | unsigned long resend_map[ICP_RESEND_MAP_SIZE]; | ||
| 67 | |||
| 68 | /* Real mode might find something too hard, here's the action | ||
| 69 | * it might request from virtual mode | ||
| 70 | */ | ||
| 71 | #define XICS_RM_KICK_VCPU 0x1 | ||
| 72 | #define XICS_RM_CHECK_RESEND 0x2 | ||
| 73 | #define XICS_RM_REJECT 0x4 | ||
| 74 | u32 rm_action; | ||
| 75 | struct kvm_vcpu *rm_kick_target; | ||
| 76 | u32 rm_reject; | ||
| 77 | |||
| 78 | /* Debug stuff for real mode */ | ||
| 79 | union kvmppc_icp_state rm_dbgstate; | ||
| 80 | struct kvm_vcpu *rm_dbgtgt; | ||
| 81 | }; | ||
| 82 | |||
| 83 | struct kvmppc_ics { | ||
| 84 | struct mutex lock; | ||
| 85 | u16 icsid; | ||
| 86 | struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS]; | ||
| 87 | }; | ||
| 88 | |||
| 89 | struct kvmppc_xics { | ||
| 90 | struct kvm *kvm; | ||
| 91 | struct kvm_device *dev; | ||
| 92 | struct dentry *dentry; | ||
| 93 | u32 max_icsid; | ||
| 94 | bool real_mode; | ||
| 95 | bool real_mode_dbg; | ||
| 96 | struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1]; | ||
| 97 | }; | ||
| 98 | |||
| 99 | static inline struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm, | ||
| 100 | u32 nr) | ||
| 101 | { | ||
| 102 | struct kvm_vcpu *vcpu = NULL; | ||
| 103 | int i; | ||
| 104 | |||
| 105 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
| 106 | if (vcpu->arch.icp && nr == vcpu->arch.icp->server_num) | ||
| 107 | return vcpu->arch.icp; | ||
| 108 | } | ||
| 109 | return NULL; | ||
| 110 | } | ||
| 111 | |||
| 112 | static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics, | ||
| 113 | u32 irq, u16 *source) | ||
| 114 | { | ||
| 115 | u32 icsid = irq >> KVMPPC_XICS_ICS_SHIFT; | ||
| 116 | u16 src = irq & KVMPPC_XICS_SRC_MASK; | ||
| 117 | struct kvmppc_ics *ics; | ||
| 118 | |||
| 119 | if (source) | ||
| 120 | *source = src; | ||
| 121 | if (icsid > KVMPPC_XICS_MAX_ICS_ID) | ||
| 122 | return NULL; | ||
| 123 | ics = xics->ics[icsid]; | ||
| 124 | if (!ics) | ||
| 125 | return NULL; | ||
| 126 | return ics; | ||
| 127 | } | ||
| 128 | |||
| 129 | |||
| 130 | #endif /* _KVM_PPC_BOOK3S_XICS_H */ | ||
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 020923e43134..1020119226db 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c | |||
| @@ -222,8 +222,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, | |||
| 222 | kvmppc_booke_queue_irqprio(vcpu, prio); | 222 | kvmppc_booke_queue_irqprio(vcpu, prio); |
| 223 | } | 223 | } |
| 224 | 224 | ||
| 225 | void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, | 225 | void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) |
| 226 | struct kvm_interrupt *irq) | ||
| 227 | { | 226 | { |
| 228 | clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions); | 227 | clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions); |
| 229 | clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); | 228 | clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); |
| @@ -347,7 +346,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, | |||
| 347 | keep_irq = true; | 346 | keep_irq = true; |
| 348 | } | 347 | } |
| 349 | 348 | ||
| 350 | if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_enabled) | 349 | if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_flags) |
| 351 | update_epr = true; | 350 | update_epr = true; |
| 352 | 351 | ||
| 353 | switch (priority) { | 352 | switch (priority) { |
| @@ -428,8 +427,14 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, | |||
| 428 | set_guest_esr(vcpu, vcpu->arch.queued_esr); | 427 | set_guest_esr(vcpu, vcpu->arch.queued_esr); |
| 429 | if (update_dear == true) | 428 | if (update_dear == true) |
| 430 | set_guest_dear(vcpu, vcpu->arch.queued_dear); | 429 | set_guest_dear(vcpu, vcpu->arch.queued_dear); |
| 431 | if (update_epr == true) | 430 | if (update_epr == true) { |
| 432 | kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); | 431 | if (vcpu->arch.epr_flags & KVMPPC_EPR_USER) |
| 432 | kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); | ||
| 433 | else if (vcpu->arch.epr_flags & KVMPPC_EPR_KERNEL) { | ||
| 434 | BUG_ON(vcpu->arch.irq_type != KVMPPC_IRQ_MPIC); | ||
| 435 | kvmppc_mpic_set_epr(vcpu); | ||
| 436 | } | ||
| 437 | } | ||
| 433 | 438 | ||
| 434 | new_msr &= msr_mask; | 439 | new_msr &= msr_mask; |
| 435 | #if defined(CONFIG_64BIT) | 440 | #if defined(CONFIG_64BIT) |
| @@ -746,6 +751,9 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
| 746 | kvmppc_core_queue_program(vcpu, ESR_PIL); | 751 | kvmppc_core_queue_program(vcpu, ESR_PIL); |
| 747 | return RESUME_HOST; | 752 | return RESUME_HOST; |
| 748 | 753 | ||
| 754 | case EMULATE_EXIT_USER: | ||
| 755 | return RESUME_HOST; | ||
| 756 | |||
| 749 | default: | 757 | default: |
| 750 | BUG(); | 758 | BUG(); |
| 751 | } | 759 | } |
| @@ -1148,6 +1156,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 1148 | return r; | 1156 | return r; |
| 1149 | } | 1157 | } |
| 1150 | 1158 | ||
| 1159 | static void kvmppc_set_tsr(struct kvm_vcpu *vcpu, u32 new_tsr) | ||
| 1160 | { | ||
| 1161 | u32 old_tsr = vcpu->arch.tsr; | ||
| 1162 | |||
| 1163 | vcpu->arch.tsr = new_tsr; | ||
| 1164 | |||
| 1165 | if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) | ||
| 1166 | arm_next_watchdog(vcpu); | ||
| 1167 | |||
| 1168 | update_timer_ints(vcpu); | ||
| 1169 | } | ||
| 1170 | |||
| 1151 | /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ | 1171 | /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ |
| 1152 | int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | 1172 | int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) |
| 1153 | { | 1173 | { |
| @@ -1287,16 +1307,8 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, | |||
| 1287 | kvmppc_emulate_dec(vcpu); | 1307 | kvmppc_emulate_dec(vcpu); |
| 1288 | } | 1308 | } |
| 1289 | 1309 | ||
| 1290 | if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { | 1310 | if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) |
| 1291 | u32 old_tsr = vcpu->arch.tsr; | 1311 | kvmppc_set_tsr(vcpu, sregs->u.e.tsr); |
| 1292 | |||
| 1293 | vcpu->arch.tsr = sregs->u.e.tsr; | ||
| 1294 | |||
| 1295 | if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) | ||
| 1296 | arm_next_watchdog(vcpu); | ||
| 1297 | |||
| 1298 | update_timer_ints(vcpu); | ||
| 1299 | } | ||
| 1300 | 1312 | ||
| 1301 | return 0; | 1313 | return 0; |
| 1302 | } | 1314 | } |
| @@ -1409,84 +1421,134 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 1409 | 1421 | ||
| 1410 | int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | 1422 | int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) |
| 1411 | { | 1423 | { |
| 1412 | int r = -EINVAL; | 1424 | int r = 0; |
| 1425 | union kvmppc_one_reg val; | ||
| 1426 | int size; | ||
| 1427 | long int i; | ||
| 1428 | |||
| 1429 | size = one_reg_size(reg->id); | ||
| 1430 | if (size > sizeof(val)) | ||
| 1431 | return -EINVAL; | ||
| 1413 | 1432 | ||
| 1414 | switch (reg->id) { | 1433 | switch (reg->id) { |
| 1415 | case KVM_REG_PPC_IAC1: | 1434 | case KVM_REG_PPC_IAC1: |
| 1416 | case KVM_REG_PPC_IAC2: | 1435 | case KVM_REG_PPC_IAC2: |
| 1417 | case KVM_REG_PPC_IAC3: | 1436 | case KVM_REG_PPC_IAC3: |
| 1418 | case KVM_REG_PPC_IAC4: { | 1437 | case KVM_REG_PPC_IAC4: |
| 1419 | int iac = reg->id - KVM_REG_PPC_IAC1; | 1438 | i = reg->id - KVM_REG_PPC_IAC1; |
| 1420 | r = copy_to_user((u64 __user *)(long)reg->addr, | 1439 | val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac[i]); |
| 1421 | &vcpu->arch.dbg_reg.iac[iac], sizeof(u64)); | ||
| 1422 | break; | 1440 | break; |
| 1423 | } | ||
| 1424 | case KVM_REG_PPC_DAC1: | 1441 | case KVM_REG_PPC_DAC1: |
| 1425 | case KVM_REG_PPC_DAC2: { | 1442 | case KVM_REG_PPC_DAC2: |
| 1426 | int dac = reg->id - KVM_REG_PPC_DAC1; | 1443 | i = reg->id - KVM_REG_PPC_DAC1; |
| 1427 | r = copy_to_user((u64 __user *)(long)reg->addr, | 1444 | val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac[i]); |
| 1428 | &vcpu->arch.dbg_reg.dac[dac], sizeof(u64)); | ||
| 1429 | break; | 1445 | break; |
| 1430 | } | ||
| 1431 | case KVM_REG_PPC_EPR: { | 1446 | case KVM_REG_PPC_EPR: { |
| 1432 | u32 epr = get_guest_epr(vcpu); | 1447 | u32 epr = get_guest_epr(vcpu); |
| 1433 | r = put_user(epr, (u32 __user *)(long)reg->addr); | 1448 | val = get_reg_val(reg->id, epr); |
| 1434 | break; | 1449 | break; |
| 1435 | } | 1450 | } |
| 1436 | #if defined(CONFIG_64BIT) | 1451 | #if defined(CONFIG_64BIT) |
| 1437 | case KVM_REG_PPC_EPCR: | 1452 | case KVM_REG_PPC_EPCR: |
| 1438 | r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr); | 1453 | val = get_reg_val(reg->id, vcpu->arch.epcr); |
| 1439 | break; | 1454 | break; |
| 1440 | #endif | 1455 | #endif |
| 1456 | case KVM_REG_PPC_TCR: | ||
| 1457 | val = get_reg_val(reg->id, vcpu->arch.tcr); | ||
| 1458 | break; | ||
| 1459 | case KVM_REG_PPC_TSR: | ||
| 1460 | val = get_reg_val(reg->id, vcpu->arch.tsr); | ||
| 1461 | break; | ||
| 1462 | case KVM_REG_PPC_DEBUG_INST: | ||
| 1463 | val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV); | ||
| 1464 | break; | ||
| 1441 | default: | 1465 | default: |
| 1466 | r = kvmppc_get_one_reg(vcpu, reg->id, &val); | ||
| 1442 | break; | 1467 | break; |
| 1443 | } | 1468 | } |
| 1469 | |||
| 1470 | if (r) | ||
| 1471 | return r; | ||
| 1472 | |||
| 1473 | if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size)) | ||
| 1474 | r = -EFAULT; | ||
| 1475 | |||
| 1444 | return r; | 1476 | return r; |
| 1445 | } | 1477 | } |
| 1446 | 1478 | ||
| 1447 | int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | 1479 | int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) |
| 1448 | { | 1480 | { |
| 1449 | int r = -EINVAL; | 1481 | int r = 0; |
| 1482 | union kvmppc_one_reg val; | ||
| 1483 | int size; | ||
| 1484 | long int i; | ||
| 1485 | |||
| 1486 | size = one_reg_size(reg->id); | ||
| 1487 | if (size > sizeof(val)) | ||
| 1488 | return -EINVAL; | ||
| 1489 | |||
| 1490 | if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) | ||
| 1491 | return -EFAULT; | ||
| 1450 | 1492 | ||
| 1451 | switch (reg->id) { | 1493 | switch (reg->id) { |
| 1452 | case KVM_REG_PPC_IAC1: | 1494 | case KVM_REG_PPC_IAC1: |
| 1453 | case KVM_REG_PPC_IAC2: | 1495 | case KVM_REG_PPC_IAC2: |
| 1454 | case KVM_REG_PPC_IAC3: | 1496 | case KVM_REG_PPC_IAC3: |
| 1455 | case KVM_REG_PPC_IAC4: { | 1497 | case KVM_REG_PPC_IAC4: |
| 1456 | int iac = reg->id - KVM_REG_PPC_IAC1; | 1498 | i = reg->id - KVM_REG_PPC_IAC1; |
| 1457 | r = copy_from_user(&vcpu->arch.dbg_reg.iac[iac], | 1499 | vcpu->arch.dbg_reg.iac[i] = set_reg_val(reg->id, val); |
| 1458 | (u64 __user *)(long)reg->addr, sizeof(u64)); | ||
| 1459 | break; | 1500 | break; |
| 1460 | } | ||
| 1461 | case KVM_REG_PPC_DAC1: | 1501 | case KVM_REG_PPC_DAC1: |
| 1462 | case KVM_REG_PPC_DAC2: { | 1502 | case KVM_REG_PPC_DAC2: |
| 1463 | int dac = reg->id - KVM_REG_PPC_DAC1; | 1503 | i = reg->id - KVM_REG_PPC_DAC1; |
| 1464 | r = copy_from_user(&vcpu->arch.dbg_reg.dac[dac], | 1504 | vcpu->arch.dbg_reg.dac[i] = set_reg_val(reg->id, val); |
| 1465 | (u64 __user *)(long)reg->addr, sizeof(u64)); | ||
| 1466 | break; | 1505 | break; |
| 1467 | } | ||
| 1468 | case KVM_REG_PPC_EPR: { | 1506 | case KVM_REG_PPC_EPR: { |
| 1469 | u32 new_epr; | 1507 | u32 new_epr = set_reg_val(reg->id, val); |
| 1470 | r = get_user(new_epr, (u32 __user *)(long)reg->addr); | 1508 | kvmppc_set_epr(vcpu, new_epr); |
| 1471 | if (!r) | ||
| 1472 | kvmppc_set_epr(vcpu, new_epr); | ||
| 1473 | break; | 1509 | break; |
| 1474 | } | 1510 | } |
| 1475 | #if defined(CONFIG_64BIT) | 1511 | #if defined(CONFIG_64BIT) |
| 1476 | case KVM_REG_PPC_EPCR: { | 1512 | case KVM_REG_PPC_EPCR: { |
| 1477 | u32 new_epcr; | 1513 | u32 new_epcr = set_reg_val(reg->id, val); |
| 1478 | r = get_user(new_epcr, (u32 __user *)(long)reg->addr); | 1514 | kvmppc_set_epcr(vcpu, new_epcr); |
| 1479 | if (r == 0) | ||
| 1480 | kvmppc_set_epcr(vcpu, new_epcr); | ||
| 1481 | break; | 1515 | break; |
| 1482 | } | 1516 | } |
| 1483 | #endif | 1517 | #endif |
| 1518 | case KVM_REG_PPC_OR_TSR: { | ||
| 1519 | u32 tsr_bits = set_reg_val(reg->id, val); | ||
| 1520 | kvmppc_set_tsr_bits(vcpu, tsr_bits); | ||
| 1521 | break; | ||
| 1522 | } | ||
| 1523 | case KVM_REG_PPC_CLEAR_TSR: { | ||
| 1524 | u32 tsr_bits = set_reg_val(reg->id, val); | ||
| 1525 | kvmppc_clr_tsr_bits(vcpu, tsr_bits); | ||
| 1526 | break; | ||
| 1527 | } | ||
| 1528 | case KVM_REG_PPC_TSR: { | ||
| 1529 | u32 tsr = set_reg_val(reg->id, val); | ||
| 1530 | kvmppc_set_tsr(vcpu, tsr); | ||
| 1531 | break; | ||
| 1532 | } | ||
| 1533 | case KVM_REG_PPC_TCR: { | ||
| 1534 | u32 tcr = set_reg_val(reg->id, val); | ||
| 1535 | kvmppc_set_tcr(vcpu, tcr); | ||
| 1536 | break; | ||
| 1537 | } | ||
| 1484 | default: | 1538 | default: |
| 1539 | r = kvmppc_set_one_reg(vcpu, reg->id, &val); | ||
| 1485 | break; | 1540 | break; |
| 1486 | } | 1541 | } |
| 1542 | |||
| 1487 | return r; | 1543 | return r; |
| 1488 | } | 1544 | } |
| 1489 | 1545 | ||
| 1546 | int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | ||
| 1547 | struct kvm_guest_debug *dbg) | ||
| 1548 | { | ||
| 1549 | return -EINVAL; | ||
| 1550 | } | ||
| 1551 | |||
| 1490 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | 1552 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) |
| 1491 | { | 1553 | { |
| 1492 | return -ENOTSUPP; | 1554 | return -ENOTSUPP; |
| @@ -1531,7 +1593,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, | |||
| 1531 | 1593 | ||
| 1532 | void kvmppc_core_commit_memory_region(struct kvm *kvm, | 1594 | void kvmppc_core_commit_memory_region(struct kvm *kvm, |
| 1533 | struct kvm_userspace_memory_region *mem, | 1595 | struct kvm_userspace_memory_region *mem, |
| 1534 | struct kvm_memory_slot old) | 1596 | const struct kvm_memory_slot *old) |
| 1535 | { | 1597 | { |
| 1536 | } | 1598 | } |
| 1537 | 1599 | ||
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index f4bb55c96517..2c6deb5ef2fe 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S | |||
| @@ -54,8 +54,7 @@ | |||
| 54 | (1<<BOOKE_INTERRUPT_DTLB_MISS) | \ | 54 | (1<<BOOKE_INTERRUPT_DTLB_MISS) | \ |
| 55 | (1<<BOOKE_INTERRUPT_ALIGNMENT)) | 55 | (1<<BOOKE_INTERRUPT_ALIGNMENT)) |
| 56 | 56 | ||
| 57 | .macro KVM_HANDLER ivor_nr scratch srr0 | 57 | .macro __KVM_HANDLER ivor_nr scratch srr0 |
| 58 | _GLOBAL(kvmppc_handler_\ivor_nr) | ||
| 59 | /* Get pointer to vcpu and record exit number. */ | 58 | /* Get pointer to vcpu and record exit number. */ |
| 60 | mtspr \scratch , r4 | 59 | mtspr \scratch , r4 |
| 61 | mfspr r4, SPRN_SPRG_THREAD | 60 | mfspr r4, SPRN_SPRG_THREAD |
| @@ -76,6 +75,43 @@ _GLOBAL(kvmppc_handler_\ivor_nr) | |||
| 76 | bctr | 75 | bctr |
| 77 | .endm | 76 | .endm |
| 78 | 77 | ||
| 78 | .macro KVM_HANDLER ivor_nr scratch srr0 | ||
| 79 | _GLOBAL(kvmppc_handler_\ivor_nr) | ||
| 80 | __KVM_HANDLER \ivor_nr \scratch \srr0 | ||
| 81 | .endm | ||
| 82 | |||
| 83 | .macro KVM_DBG_HANDLER ivor_nr scratch srr0 | ||
| 84 | _GLOBAL(kvmppc_handler_\ivor_nr) | ||
| 85 | mtspr \scratch, r4 | ||
| 86 | mfspr r4, SPRN_SPRG_THREAD | ||
| 87 | lwz r4, THREAD_KVM_VCPU(r4) | ||
| 88 | stw r3, VCPU_CRIT_SAVE(r4) | ||
| 89 | mfcr r3 | ||
| 90 | mfspr r4, SPRN_CSRR1 | ||
| 91 | andi. r4, r4, MSR_PR | ||
| 92 | bne 1f | ||
| 93 | /* debug interrupt happened in enter/exit path */ | ||
| 94 | mfspr r4, SPRN_CSRR1 | ||
| 95 | rlwinm r4, r4, 0, ~MSR_DE | ||
| 96 | mtspr SPRN_CSRR1, r4 | ||
| 97 | lis r4, 0xffff | ||
| 98 | ori r4, r4, 0xffff | ||
| 99 | mtspr SPRN_DBSR, r4 | ||
| 100 | mfspr r4, SPRN_SPRG_THREAD | ||
| 101 | lwz r4, THREAD_KVM_VCPU(r4) | ||
| 102 | mtcr r3 | ||
| 103 | lwz r3, VCPU_CRIT_SAVE(r4) | ||
| 104 | mfspr r4, \scratch | ||
| 105 | rfci | ||
| 106 | 1: /* debug interrupt happened in guest */ | ||
| 107 | mtcr r3 | ||
| 108 | mfspr r4, SPRN_SPRG_THREAD | ||
| 109 | lwz r4, THREAD_KVM_VCPU(r4) | ||
| 110 | lwz r3, VCPU_CRIT_SAVE(r4) | ||
| 111 | mfspr r4, \scratch | ||
| 112 | __KVM_HANDLER \ivor_nr \scratch \srr0 | ||
| 113 | .endm | ||
| 114 | |||
| 79 | .macro KVM_HANDLER_ADDR ivor_nr | 115 | .macro KVM_HANDLER_ADDR ivor_nr |
| 80 | .long kvmppc_handler_\ivor_nr | 116 | .long kvmppc_handler_\ivor_nr |
| 81 | .endm | 117 | .endm |
| @@ -100,7 +136,7 @@ KVM_HANDLER BOOKE_INTERRUPT_FIT SPRN_SPRG_RSCRATCH0 SPRN_SRR0 | |||
| 100 | KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 | 136 | KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 |
| 101 | KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0 | 137 | KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0 |
| 102 | KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0 | 138 | KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0 |
| 103 | KVM_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 | 139 | KVM_DBG_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 |
| 104 | KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0 | 140 | KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0 |
| 105 | KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0 | 141 | KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0 |
| 106 | KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0 | 142 | KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0 |
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 6dd4de7802bf..ce6b73c29612 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c | |||
| @@ -425,6 +425,20 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) | |||
| 425 | return kvmppc_set_sregs_ivor(vcpu, sregs); | 425 | return kvmppc_set_sregs_ivor(vcpu, sregs); |
| 426 | } | 426 | } |
| 427 | 427 | ||
| 428 | int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, | ||
| 429 | union kvmppc_one_reg *val) | ||
| 430 | { | ||
| 431 | int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); | ||
| 432 | return r; | ||
| 433 | } | ||
| 434 | |||
| 435 | int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, | ||
| 436 | union kvmppc_one_reg *val) | ||
| 437 | { | ||
| 438 | int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); | ||
| 439 | return r; | ||
| 440 | } | ||
| 441 | |||
| 428 | struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | 442 | struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) |
| 429 | { | 443 | { |
| 430 | struct kvmppc_vcpu_e500 *vcpu_e500; | 444 | struct kvmppc_vcpu_e500 *vcpu_e500; |
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index 33db48a8ce24..c2e5e98453a6 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h | |||
| @@ -23,6 +23,10 @@ | |||
| 23 | #include <asm/mmu-book3e.h> | 23 | #include <asm/mmu-book3e.h> |
| 24 | #include <asm/tlb.h> | 24 | #include <asm/tlb.h> |
| 25 | 25 | ||
| 26 | enum vcpu_ftr { | ||
| 27 | VCPU_FTR_MMU_V2 | ||
| 28 | }; | ||
| 29 | |||
| 26 | #define E500_PID_NUM 3 | 30 | #define E500_PID_NUM 3 |
| 27 | #define E500_TLB_NUM 2 | 31 | #define E500_TLB_NUM 2 |
| 28 | 32 | ||
| @@ -131,6 +135,10 @@ void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500); | |||
| 131 | void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); | 135 | void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); |
| 132 | int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); | 136 | int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); |
| 133 | 137 | ||
| 138 | int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, | ||
| 139 | union kvmppc_one_reg *val); | ||
| 140 | int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, | ||
| 141 | union kvmppc_one_reg *val); | ||
| 134 | 142 | ||
| 135 | #ifdef CONFIG_KVM_E500V2 | 143 | #ifdef CONFIG_KVM_E500V2 |
| 136 | unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500, | 144 | unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500, |
| @@ -295,4 +303,18 @@ static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu *vcpu) | |||
| 295 | #define get_tlb_sts(gtlbe) (MAS1_TS) | 303 | #define get_tlb_sts(gtlbe) (MAS1_TS) |
| 296 | #endif /* !BOOKE_HV */ | 304 | #endif /* !BOOKE_HV */ |
| 297 | 305 | ||
| 306 | static inline bool has_feature(const struct kvm_vcpu *vcpu, | ||
| 307 | enum vcpu_ftr ftr) | ||
| 308 | { | ||
| 309 | bool has_ftr; | ||
| 310 | switch (ftr) { | ||
| 311 | case VCPU_FTR_MMU_V2: | ||
| 312 | has_ftr = ((vcpu->arch.mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2); | ||
| 313 | break; | ||
| 314 | default: | ||
| 315 | return false; | ||
| 316 | } | ||
| 317 | return has_ftr; | ||
| 318 | } | ||
| 319 | |||
| 298 | #endif /* KVM_E500_H */ | 320 | #endif /* KVM_E500_H */ |
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index e78f353a836a..b10a01243abd 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c | |||
| @@ -284,6 +284,16 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) | |||
| 284 | case SPRN_TLB1CFG: | 284 | case SPRN_TLB1CFG: |
| 285 | *spr_val = vcpu->arch.tlbcfg[1]; | 285 | *spr_val = vcpu->arch.tlbcfg[1]; |
| 286 | break; | 286 | break; |
| 287 | case SPRN_TLB0PS: | ||
| 288 | if (!has_feature(vcpu, VCPU_FTR_MMU_V2)) | ||
| 289 | return EMULATE_FAIL; | ||
| 290 | *spr_val = vcpu->arch.tlbps[0]; | ||
| 291 | break; | ||
| 292 | case SPRN_TLB1PS: | ||
| 293 | if (!has_feature(vcpu, VCPU_FTR_MMU_V2)) | ||
| 294 | return EMULATE_FAIL; | ||
| 295 | *spr_val = vcpu->arch.tlbps[1]; | ||
| 296 | break; | ||
| 287 | case SPRN_L1CSR0: | 297 | case SPRN_L1CSR0: |
| 288 | *spr_val = vcpu_e500->l1csr0; | 298 | *spr_val = vcpu_e500->l1csr0; |
| 289 | break; | 299 | break; |
| @@ -307,6 +317,15 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) | |||
| 307 | case SPRN_MMUCFG: | 317 | case SPRN_MMUCFG: |
| 308 | *spr_val = vcpu->arch.mmucfg; | 318 | *spr_val = vcpu->arch.mmucfg; |
| 309 | break; | 319 | break; |
| 320 | case SPRN_EPTCFG: | ||
| 321 | if (!has_feature(vcpu, VCPU_FTR_MMU_V2)) | ||
| 322 | return EMULATE_FAIL; | ||
| 323 | /* | ||
| 324 | * Legacy Linux guests access EPTCFG register even if the E.PT | ||
| 325 | * category is disabled in the VM. Give them a chance to live. | ||
| 326 | */ | ||
| 327 | *spr_val = vcpu->arch.eptcfg; | ||
| 328 | break; | ||
| 310 | 329 | ||
| 311 | /* extra exceptions */ | 330 | /* extra exceptions */ |
| 312 | case SPRN_IVOR32: | 331 | case SPRN_IVOR32: |
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index 5c4475983f78..c41a5a96b558 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c | |||
| @@ -596,6 +596,140 @@ int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) | |||
| 596 | return 0; | 596 | return 0; |
| 597 | } | 597 | } |
| 598 | 598 | ||
| 599 | int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, | ||
| 600 | union kvmppc_one_reg *val) | ||
| 601 | { | ||
| 602 | int r = 0; | ||
| 603 | long int i; | ||
| 604 | |||
| 605 | switch (id) { | ||
| 606 | case KVM_REG_PPC_MAS0: | ||
| 607 | *val = get_reg_val(id, vcpu->arch.shared->mas0); | ||
| 608 | break; | ||
| 609 | case KVM_REG_PPC_MAS1: | ||
| 610 | *val = get_reg_val(id, vcpu->arch.shared->mas1); | ||
| 611 | break; | ||
| 612 | case KVM_REG_PPC_MAS2: | ||
| 613 | *val = get_reg_val(id, vcpu->arch.shared->mas2); | ||
| 614 | break; | ||
| 615 | case KVM_REG_PPC_MAS7_3: | ||
| 616 | *val = get_reg_val(id, vcpu->arch.shared->mas7_3); | ||
| 617 | break; | ||
| 618 | case KVM_REG_PPC_MAS4: | ||
| 619 | *val = get_reg_val(id, vcpu->arch.shared->mas4); | ||
| 620 | break; | ||
| 621 | case KVM_REG_PPC_MAS6: | ||
| 622 | *val = get_reg_val(id, vcpu->arch.shared->mas6); | ||
| 623 | break; | ||
| 624 | case KVM_REG_PPC_MMUCFG: | ||
| 625 | *val = get_reg_val(id, vcpu->arch.mmucfg); | ||
| 626 | break; | ||
| 627 | case KVM_REG_PPC_EPTCFG: | ||
| 628 | *val = get_reg_val(id, vcpu->arch.eptcfg); | ||
| 629 | break; | ||
| 630 | case KVM_REG_PPC_TLB0CFG: | ||
| 631 | case KVM_REG_PPC_TLB1CFG: | ||
| 632 | case KVM_REG_PPC_TLB2CFG: | ||
| 633 | case KVM_REG_PPC_TLB3CFG: | ||
| 634 | i = id - KVM_REG_PPC_TLB0CFG; | ||
| 635 | *val = get_reg_val(id, vcpu->arch.tlbcfg[i]); | ||
| 636 | break; | ||
| 637 | case KVM_REG_PPC_TLB0PS: | ||
| 638 | case KVM_REG_PPC_TLB1PS: | ||
| 639 | case KVM_REG_PPC_TLB2PS: | ||
| 640 | case KVM_REG_PPC_TLB3PS: | ||
| 641 | i = id - KVM_REG_PPC_TLB0PS; | ||
| 642 | *val = get_reg_val(id, vcpu->arch.tlbps[i]); | ||
| 643 | break; | ||
| 644 | default: | ||
| 645 | r = -EINVAL; | ||
| 646 | break; | ||
| 647 | } | ||
| 648 | |||
| 649 | return r; | ||
| 650 | } | ||
| 651 | |||
| 652 | int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, | ||
| 653 | union kvmppc_one_reg *val) | ||
| 654 | { | ||
| 655 | int r = 0; | ||
| 656 | long int i; | ||
| 657 | |||
| 658 | switch (id) { | ||
| 659 | case KVM_REG_PPC_MAS0: | ||
| 660 | vcpu->arch.shared->mas0 = set_reg_val(id, *val); | ||
| 661 | break; | ||
| 662 | case KVM_REG_PPC_MAS1: | ||
| 663 | vcpu->arch.shared->mas1 = set_reg_val(id, *val); | ||
| 664 | break; | ||
| 665 | case KVM_REG_PPC_MAS2: | ||
| 666 | vcpu->arch.shared->mas2 = set_reg_val(id, *val); | ||
| 667 | break; | ||
| 668 | case KVM_REG_PPC_MAS7_3: | ||
| 669 | vcpu->arch.shared->mas7_3 = set_reg_val(id, *val); | ||
| 670 | break; | ||
| 671 | case KVM_REG_PPC_MAS4: | ||
| 672 | vcpu->arch.shared->mas4 = set_reg_val(id, *val); | ||
| 673 | break; | ||
| 674 | case KVM_REG_PPC_MAS6: | ||
| 675 | vcpu->arch.shared->mas6 = set_reg_val(id, *val); | ||
| 676 | break; | ||
| 677 | /* Only allow MMU registers to be set to the config supported by KVM */ | ||
| 678 | case KVM_REG_PPC_MMUCFG: { | ||
| 679 | u32 reg = set_reg_val(id, *val); | ||
| 680 | if (reg != vcpu->arch.mmucfg) | ||
| 681 | r = -EINVAL; | ||
| 682 | break; | ||
| 683 | } | ||
| 684 | case KVM_REG_PPC_EPTCFG: { | ||
| 685 | u32 reg = set_reg_val(id, *val); | ||
| 686 | if (reg != vcpu->arch.eptcfg) | ||
| 687 | r = -EINVAL; | ||
| 688 | break; | ||
| 689 | } | ||
| 690 | case KVM_REG_PPC_TLB0CFG: | ||
| 691 | case KVM_REG_PPC_TLB1CFG: | ||
| 692 | case KVM_REG_PPC_TLB2CFG: | ||
| 693 | case KVM_REG_PPC_TLB3CFG: { | ||
| 694 | /* MMU geometry (N_ENTRY/ASSOC) can be set only using SW_TLB */ | ||
| 695 | u32 reg = set_reg_val(id, *val); | ||
| 696 | i = id - KVM_REG_PPC_TLB0CFG; | ||
| 697 | if (reg != vcpu->arch.tlbcfg[i]) | ||
| 698 | r = -EINVAL; | ||
| 699 | break; | ||
| 700 | } | ||
| 701 | case KVM_REG_PPC_TLB0PS: | ||
| 702 | case KVM_REG_PPC_TLB1PS: | ||
| 703 | case KVM_REG_PPC_TLB2PS: | ||
| 704 | case KVM_REG_PPC_TLB3PS: { | ||
| 705 | u32 reg = set_reg_val(id, *val); | ||
| 706 | i = id - KVM_REG_PPC_TLB0PS; | ||
| 707 | if (reg != vcpu->arch.tlbps[i]) | ||
| 708 | r = -EINVAL; | ||
| 709 | break; | ||
| 710 | } | ||
| 711 | default: | ||
| 712 | r = -EINVAL; | ||
| 713 | break; | ||
| 714 | } | ||
| 715 | |||
| 716 | return r; | ||
| 717 | } | ||
| 718 | |||
| 719 | static int vcpu_mmu_geometry_update(struct kvm_vcpu *vcpu, | ||
| 720 | struct kvm_book3e_206_tlb_params *params) | ||
| 721 | { | ||
| 722 | vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); | ||
| 723 | if (params->tlb_sizes[0] <= 2048) | ||
| 724 | vcpu->arch.tlbcfg[0] |= params->tlb_sizes[0]; | ||
| 725 | vcpu->arch.tlbcfg[0] |= params->tlb_ways[0] << TLBnCFG_ASSOC_SHIFT; | ||
| 726 | |||
| 727 | vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); | ||
| 728 | vcpu->arch.tlbcfg[1] |= params->tlb_sizes[1]; | ||
| 729 | vcpu->arch.tlbcfg[1] |= params->tlb_ways[1] << TLBnCFG_ASSOC_SHIFT; | ||
| 730 | return 0; | ||
| 731 | } | ||
| 732 | |||
| 599 | int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, | 733 | int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, |
| 600 | struct kvm_config_tlb *cfg) | 734 | struct kvm_config_tlb *cfg) |
| 601 | { | 735 | { |
| @@ -692,16 +826,8 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, | |||
| 692 | vcpu_e500->gtlb_offset[0] = 0; | 826 | vcpu_e500->gtlb_offset[0] = 0; |
| 693 | vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0]; | 827 | vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0]; |
| 694 | 828 | ||
| 695 | vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE; | 829 | /* Update vcpu's MMU geometry based on SW_TLB input */ |
| 696 | 830 | vcpu_mmu_geometry_update(vcpu, ¶ms); | |
| 697 | vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); | ||
| 698 | if (params.tlb_sizes[0] <= 2048) | ||
| 699 | vcpu->arch.tlbcfg[0] |= params.tlb_sizes[0]; | ||
| 700 | vcpu->arch.tlbcfg[0] |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT; | ||
| 701 | |||
| 702 | vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); | ||
| 703 | vcpu->arch.tlbcfg[1] |= params.tlb_sizes[1]; | ||
| 704 | vcpu->arch.tlbcfg[1] |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT; | ||
| 705 | 831 | ||
| 706 | vcpu_e500->shared_tlb_pages = pages; | 832 | vcpu_e500->shared_tlb_pages = pages; |
| 707 | vcpu_e500->num_shared_tlb_pages = num_pages; | 833 | vcpu_e500->num_shared_tlb_pages = num_pages; |
| @@ -737,6 +863,39 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, | |||
| 737 | return 0; | 863 | return 0; |
| 738 | } | 864 | } |
| 739 | 865 | ||
| 866 | /* Vcpu's MMU default configuration */ | ||
| 867 | static int vcpu_mmu_init(struct kvm_vcpu *vcpu, | ||
| 868 | struct kvmppc_e500_tlb_params *params) | ||
| 869 | { | ||
| 870 | /* Initialize RASIZE, PIDSIZE, NTLBS and MAVN fields with host values*/ | ||
| 871 | vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE; | ||
| 872 | |||
| 873 | /* Initialize TLBnCFG fields with host values and SW_TLB geometry*/ | ||
| 874 | vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) & | ||
| 875 | ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); | ||
| 876 | vcpu->arch.tlbcfg[0] |= params[0].entries; | ||
| 877 | vcpu->arch.tlbcfg[0] |= params[0].ways << TLBnCFG_ASSOC_SHIFT; | ||
| 878 | |||
| 879 | vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) & | ||
| 880 | ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); | ||
| 881 | vcpu->arch.tlbcfg[1] |= params[1].entries; | ||
| 882 | vcpu->arch.tlbcfg[1] |= params[1].ways << TLBnCFG_ASSOC_SHIFT; | ||
| 883 | |||
| 884 | if (has_feature(vcpu, VCPU_FTR_MMU_V2)) { | ||
| 885 | vcpu->arch.tlbps[0] = mfspr(SPRN_TLB0PS); | ||
| 886 | vcpu->arch.tlbps[1] = mfspr(SPRN_TLB1PS); | ||
| 887 | |||
| 888 | vcpu->arch.mmucfg &= ~MMUCFG_LRAT; | ||
| 889 | |||
| 890 | /* Guest mmu emulation currently doesn't handle E.PT */ | ||
| 891 | vcpu->arch.eptcfg = 0; | ||
| 892 | vcpu->arch.tlbcfg[0] &= ~TLBnCFG_PT; | ||
| 893 | vcpu->arch.tlbcfg[1] &= ~TLBnCFG_IND; | ||
| 894 | } | ||
| 895 | |||
| 896 | return 0; | ||
| 897 | } | ||
| 898 | |||
| 740 | int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) | 899 | int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) |
| 741 | { | 900 | { |
| 742 | struct kvm_vcpu *vcpu = &vcpu_e500->vcpu; | 901 | struct kvm_vcpu *vcpu = &vcpu_e500->vcpu; |
| @@ -781,18 +940,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) | |||
| 781 | if (!vcpu_e500->g2h_tlb1_map) | 940 | if (!vcpu_e500->g2h_tlb1_map) |
| 782 | goto err; | 941 | goto err; |
| 783 | 942 | ||
| 784 | /* Init TLB configuration register */ | 943 | vcpu_mmu_init(vcpu, vcpu_e500->gtlb_params); |
| 785 | vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) & | ||
| 786 | ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); | ||
| 787 | vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[0].entries; | ||
| 788 | vcpu->arch.tlbcfg[0] |= | ||
| 789 | vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT; | ||
| 790 | |||
| 791 | vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) & | ||
| 792 | ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); | ||
| 793 | vcpu->arch.tlbcfg[1] |= vcpu_e500->gtlb_params[1].entries; | ||
| 794 | vcpu->arch.tlbcfg[1] |= | ||
| 795 | vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT; | ||
| 796 | 944 | ||
| 797 | kvmppc_recalc_tlb1map_range(vcpu_e500); | 945 | kvmppc_recalc_tlb1map_range(vcpu_e500); |
| 798 | return 0; | 946 | return 0; |
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 2f4baa074b2e..753cc99eff2b 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c | |||
| @@ -177,6 +177,8 @@ int kvmppc_core_check_processor_compat(void) | |||
| 177 | r = 0; | 177 | r = 0; |
| 178 | else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0) | 178 | else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0) |
| 179 | r = 0; | 179 | r = 0; |
| 180 | else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0) | ||
| 181 | r = 0; | ||
| 180 | else | 182 | else |
| 181 | r = -ENOTSUPP; | 183 | r = -ENOTSUPP; |
| 182 | 184 | ||
| @@ -260,6 +262,20 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) | |||
| 260 | return kvmppc_set_sregs_ivor(vcpu, sregs); | 262 | return kvmppc_set_sregs_ivor(vcpu, sregs); |
| 261 | } | 263 | } |
| 262 | 264 | ||
| 265 | int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, | ||
| 266 | union kvmppc_one_reg *val) | ||
| 267 | { | ||
| 268 | int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); | ||
| 269 | return r; | ||
| 270 | } | ||
| 271 | |||
| 272 | int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, | ||
| 273 | union kvmppc_one_reg *val) | ||
| 274 | { | ||
| 275 | int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val); | ||
| 276 | return r; | ||
| 277 | } | ||
| 278 | |||
| 263 | struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | 279 | struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) |
| 264 | { | 280 | { |
| 265 | struct kvmppc_vcpu_e500 *vcpu_e500; | 281 | struct kvmppc_vcpu_e500 *vcpu_e500; |
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 7a73b6f72a8b..631a2650e4e4 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c | |||
| @@ -38,6 +38,7 @@ | |||
| 38 | 38 | ||
| 39 | #define OP_31_XOP_TRAP 4 | 39 | #define OP_31_XOP_TRAP 4 |
| 40 | #define OP_31_XOP_LWZX 23 | 40 | #define OP_31_XOP_LWZX 23 |
| 41 | #define OP_31_XOP_DCBST 54 | ||
| 41 | #define OP_31_XOP_TRAP_64 68 | 42 | #define OP_31_XOP_TRAP_64 68 |
| 42 | #define OP_31_XOP_DCBF 86 | 43 | #define OP_31_XOP_DCBF 86 |
| 43 | #define OP_31_XOP_LBZX 87 | 44 | #define OP_31_XOP_LBZX 87 |
| @@ -370,6 +371,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
| 370 | emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs); | 371 | emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs); |
| 371 | break; | 372 | break; |
| 372 | 373 | ||
| 374 | case OP_31_XOP_DCBST: | ||
| 373 | case OP_31_XOP_DCBF: | 375 | case OP_31_XOP_DCBF: |
| 374 | case OP_31_XOP_DCBI: | 376 | case OP_31_XOP_DCBI: |
| 375 | /* Do nothing. The guest is performing dcbi because | 377 | /* Do nothing. The guest is performing dcbi because |
diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h new file mode 100644 index 000000000000..5a9a10b90762 --- /dev/null +++ b/arch/powerpc/kvm/irq.h | |||
| @@ -0,0 +1,20 @@ | |||
| 1 | #ifndef __IRQ_H | ||
| 2 | #define __IRQ_H | ||
| 3 | |||
| 4 | #include <linux/kvm_host.h> | ||
| 5 | |||
| 6 | static inline int irqchip_in_kernel(struct kvm *kvm) | ||
| 7 | { | ||
| 8 | int ret = 0; | ||
| 9 | |||
| 10 | #ifdef CONFIG_KVM_MPIC | ||
| 11 | ret = ret || (kvm->arch.mpic != NULL); | ||
| 12 | #endif | ||
| 13 | #ifdef CONFIG_KVM_XICS | ||
| 14 | ret = ret || (kvm->arch.xics != NULL); | ||
| 15 | #endif | ||
| 16 | smp_rmb(); | ||
| 17 | return ret; | ||
| 18 | } | ||
| 19 | |||
| 20 | #endif | ||
diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c new file mode 100644 index 000000000000..2861ae9eaae6 --- /dev/null +++ b/arch/powerpc/kvm/mpic.c | |||
| @@ -0,0 +1,1853 @@ | |||
| 1 | /* | ||
| 2 | * OpenPIC emulation | ||
| 3 | * | ||
| 4 | * Copyright (c) 2004 Jocelyn Mayer | ||
| 5 | * 2011 Alexander Graf | ||
| 6 | * | ||
| 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
| 8 | * of this software and associated documentation files (the "Software"), to deal | ||
| 9 | * in the Software without restriction, including without limitation the rights | ||
| 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
| 11 | * copies of the Software, and to permit persons to whom the Software is | ||
| 12 | * furnished to do so, subject to the following conditions: | ||
| 13 | * | ||
| 14 | * The above copyright notice and this permission notice shall be included in | ||
| 15 | * all copies or substantial portions of the Software. | ||
| 16 | * | ||
| 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
| 20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
| 23 | * THE SOFTWARE. | ||
| 24 | */ | ||
| 25 | |||
| 26 | #include <linux/slab.h> | ||
| 27 | #include <linux/mutex.h> | ||
| 28 | #include <linux/kvm_host.h> | ||
| 29 | #include <linux/errno.h> | ||
| 30 | #include <linux/fs.h> | ||
| 31 | #include <linux/anon_inodes.h> | ||
| 32 | #include <asm/uaccess.h> | ||
| 33 | #include <asm/mpic.h> | ||
| 34 | #include <asm/kvm_para.h> | ||
| 35 | #include <asm/kvm_host.h> | ||
| 36 | #include <asm/kvm_ppc.h> | ||
| 37 | #include "iodev.h" | ||
| 38 | |||
| 39 | #define MAX_CPU 32 | ||
| 40 | #define MAX_SRC 256 | ||
| 41 | #define MAX_TMR 4 | ||
| 42 | #define MAX_IPI 4 | ||
| 43 | #define MAX_MSI 8 | ||
| 44 | #define MAX_IRQ (MAX_SRC + MAX_IPI + MAX_TMR) | ||
| 45 | #define VID 0x03 /* MPIC version ID */ | ||
| 46 | |||
| 47 | /* OpenPIC capability flags */ | ||
| 48 | #define OPENPIC_FLAG_IDR_CRIT (1 << 0) | ||
| 49 | #define OPENPIC_FLAG_ILR (2 << 0) | ||
| 50 | |||
| 51 | /* OpenPIC address map */ | ||
| 52 | #define OPENPIC_REG_SIZE 0x40000 | ||
| 53 | #define OPENPIC_GLB_REG_START 0x0 | ||
| 54 | #define OPENPIC_GLB_REG_SIZE 0x10F0 | ||
| 55 | #define OPENPIC_TMR_REG_START 0x10F0 | ||
| 56 | #define OPENPIC_TMR_REG_SIZE 0x220 | ||
| 57 | #define OPENPIC_MSI_REG_START 0x1600 | ||
| 58 | #define OPENPIC_MSI_REG_SIZE 0x200 | ||
| 59 | #define OPENPIC_SUMMARY_REG_START 0x3800 | ||
| 60 | #define OPENPIC_SUMMARY_REG_SIZE 0x800 | ||
| 61 | #define OPENPIC_SRC_REG_START 0x10000 | ||
| 62 | #define OPENPIC_SRC_REG_SIZE (MAX_SRC * 0x20) | ||
| 63 | #define OPENPIC_CPU_REG_START 0x20000 | ||
| 64 | #define OPENPIC_CPU_REG_SIZE (0x100 + ((MAX_CPU - 1) * 0x1000)) | ||
| 65 | |||
| 66 | struct fsl_mpic_info { | ||
| 67 | int max_ext; | ||
| 68 | }; | ||
| 69 | |||
| 70 | static struct fsl_mpic_info fsl_mpic_20 = { | ||
| 71 | .max_ext = 12, | ||
| 72 | }; | ||
| 73 | |||
| 74 | static struct fsl_mpic_info fsl_mpic_42 = { | ||
| 75 | .max_ext = 12, | ||
| 76 | }; | ||
| 77 | |||
| 78 | #define FRR_NIRQ_SHIFT 16 | ||
| 79 | #define FRR_NCPU_SHIFT 8 | ||
| 80 | #define FRR_VID_SHIFT 0 | ||
| 81 | |||
| 82 | #define VID_REVISION_1_2 2 | ||
| 83 | #define VID_REVISION_1_3 3 | ||
| 84 | |||
| 85 | #define VIR_GENERIC 0x00000000 /* Generic Vendor ID */ | ||
| 86 | |||
| 87 | #define GCR_RESET 0x80000000 | ||
| 88 | #define GCR_MODE_PASS 0x00000000 | ||
| 89 | #define GCR_MODE_MIXED 0x20000000 | ||
| 90 | #define GCR_MODE_PROXY 0x60000000 | ||
| 91 | |||
| 92 | #define TBCR_CI 0x80000000 /* count inhibit */ | ||
| 93 | #define TCCR_TOG 0x80000000 /* toggles when decrement to zero */ | ||
| 94 | |||
| 95 | #define IDR_EP_SHIFT 31 | ||
| 96 | #define IDR_EP_MASK (1 << IDR_EP_SHIFT) | ||
| 97 | #define IDR_CI0_SHIFT 30 | ||
| 98 | #define IDR_CI1_SHIFT 29 | ||
| 99 | #define IDR_P1_SHIFT 1 | ||
| 100 | #define IDR_P0_SHIFT 0 | ||
| 101 | |||
| 102 | #define ILR_INTTGT_MASK 0x000000ff | ||
| 103 | #define ILR_INTTGT_INT 0x00 | ||
| 104 | #define ILR_INTTGT_CINT 0x01 /* critical */ | ||
| 105 | #define ILR_INTTGT_MCP 0x02 /* machine check */ | ||
| 106 | #define NUM_OUTPUTS 3 | ||
| 107 | |||
| 108 | #define MSIIR_OFFSET 0x140 | ||
| 109 | #define MSIIR_SRS_SHIFT 29 | ||
| 110 | #define MSIIR_SRS_MASK (0x7 << MSIIR_SRS_SHIFT) | ||
| 111 | #define MSIIR_IBS_SHIFT 24 | ||
| 112 | #define MSIIR_IBS_MASK (0x1f << MSIIR_IBS_SHIFT) | ||
| 113 | |||
| 114 | static int get_current_cpu(void) | ||
| 115 | { | ||
| 116 | #if defined(CONFIG_KVM) && defined(CONFIG_BOOKE) | ||
| 117 | struct kvm_vcpu *vcpu = current->thread.kvm_vcpu; | ||
| 118 | return vcpu ? vcpu->arch.irq_cpu_id : -1; | ||
| 119 | #else | ||
| 120 | /* XXX */ | ||
| 121 | return -1; | ||
| 122 | #endif | ||
| 123 | } | ||
| 124 | |||
| 125 | static int openpic_cpu_write_internal(void *opaque, gpa_t addr, | ||
| 126 | u32 val, int idx); | ||
| 127 | static int openpic_cpu_read_internal(void *opaque, gpa_t addr, | ||
| 128 | u32 *ptr, int idx); | ||
| 129 | |||
| 130 | enum irq_type { | ||
| 131 | IRQ_TYPE_NORMAL = 0, | ||
| 132 | IRQ_TYPE_FSLINT, /* FSL internal interrupt -- level only */ | ||
| 133 | IRQ_TYPE_FSLSPECIAL, /* FSL timer/IPI interrupt, edge, no polarity */ | ||
| 134 | }; | ||
| 135 | |||
| 136 | struct irq_queue { | ||
| 137 | /* Round up to the nearest 64 IRQs so that the queue length | ||
| 138 | * won't change when moving between 32 and 64 bit hosts. | ||
| 139 | */ | ||
| 140 | unsigned long queue[BITS_TO_LONGS((MAX_IRQ + 63) & ~63)]; | ||
| 141 | int next; | ||
| 142 | int priority; | ||
| 143 | }; | ||
| 144 | |||
| 145 | struct irq_source { | ||
| 146 | uint32_t ivpr; /* IRQ vector/priority register */ | ||
| 147 | uint32_t idr; /* IRQ destination register */ | ||
| 148 | uint32_t destmask; /* bitmap of CPU destinations */ | ||
| 149 | int last_cpu; | ||
| 150 | int output; /* IRQ level, e.g. ILR_INTTGT_INT */ | ||
| 151 | int pending; /* TRUE if IRQ is pending */ | ||
| 152 | enum irq_type type; | ||
| 153 | bool level:1; /* level-triggered */ | ||
| 154 | bool nomask:1; /* critical interrupts ignore mask on some FSL MPICs */ | ||
| 155 | }; | ||
| 156 | |||
| 157 | #define IVPR_MASK_SHIFT 31 | ||
| 158 | #define IVPR_MASK_MASK (1 << IVPR_MASK_SHIFT) | ||
| 159 | #define IVPR_ACTIVITY_SHIFT 30 | ||
| 160 | #define IVPR_ACTIVITY_MASK (1 << IVPR_ACTIVITY_SHIFT) | ||
| 161 | #define IVPR_MODE_SHIFT 29 | ||
| 162 | #define IVPR_MODE_MASK (1 << IVPR_MODE_SHIFT) | ||
| 163 | #define IVPR_POLARITY_SHIFT 23 | ||
| 164 | #define IVPR_POLARITY_MASK (1 << IVPR_POLARITY_SHIFT) | ||
| 165 | #define IVPR_SENSE_SHIFT 22 | ||
| 166 | #define IVPR_SENSE_MASK (1 << IVPR_SENSE_SHIFT) | ||
| 167 | |||
| 168 | #define IVPR_PRIORITY_MASK (0xF << 16) | ||
| 169 | #define IVPR_PRIORITY(_ivprr_) ((int)(((_ivprr_) & IVPR_PRIORITY_MASK) >> 16)) | ||
| 170 | #define IVPR_VECTOR(opp, _ivprr_) ((_ivprr_) & (opp)->vector_mask) | ||
| 171 | |||
| 172 | /* IDR[EP/CI] are only for FSL MPIC prior to v4.0 */ | ||
| 173 | #define IDR_EP 0x80000000 /* external pin */ | ||
| 174 | #define IDR_CI 0x40000000 /* critical interrupt */ | ||
| 175 | |||
| 176 | struct irq_dest { | ||
| 177 | struct kvm_vcpu *vcpu; | ||
| 178 | |||
| 179 | int32_t ctpr; /* CPU current task priority */ | ||
| 180 | struct irq_queue raised; | ||
| 181 | struct irq_queue servicing; | ||
| 182 | |||
| 183 | /* Count of IRQ sources asserting on non-INT outputs */ | ||
| 184 | uint32_t outputs_active[NUM_OUTPUTS]; | ||
| 185 | }; | ||
| 186 | |||
| 187 | #define MAX_MMIO_REGIONS 10 | ||
| 188 | |||
| 189 | struct openpic { | ||
| 190 | struct kvm *kvm; | ||
| 191 | struct kvm_device *dev; | ||
| 192 | struct kvm_io_device mmio; | ||
| 193 | const struct mem_reg *mmio_regions[MAX_MMIO_REGIONS]; | ||
| 194 | int num_mmio_regions; | ||
| 195 | |||
| 196 | gpa_t reg_base; | ||
| 197 | spinlock_t lock; | ||
| 198 | |||
| 199 | /* Behavior control */ | ||
| 200 | struct fsl_mpic_info *fsl; | ||
| 201 | uint32_t model; | ||
| 202 | uint32_t flags; | ||
| 203 | uint32_t nb_irqs; | ||
| 204 | uint32_t vid; | ||
| 205 | uint32_t vir; /* Vendor identification register */ | ||
| 206 | uint32_t vector_mask; | ||
| 207 | uint32_t tfrr_reset; | ||
| 208 | uint32_t ivpr_reset; | ||
| 209 | uint32_t idr_reset; | ||
| 210 | uint32_t brr1; | ||
| 211 | uint32_t mpic_mode_mask; | ||
| 212 | |||
| 213 | /* Global registers */ | ||
| 214 | uint32_t frr; /* Feature reporting register */ | ||
| 215 | uint32_t gcr; /* Global configuration register */ | ||
| 216 | uint32_t pir; /* Processor initialization register */ | ||
| 217 | uint32_t spve; /* Spurious vector register */ | ||
| 218 | uint32_t tfrr; /* Timer frequency reporting register */ | ||
| 219 | /* Source registers */ | ||
| 220 | struct irq_source src[MAX_IRQ]; | ||
| 221 | /* Local registers per output pin */ | ||
| 222 | struct irq_dest dst[MAX_CPU]; | ||
| 223 | uint32_t nb_cpus; | ||
| 224 | /* Timer registers */ | ||
| 225 | struct { | ||
| 226 | uint32_t tccr; /* Global timer current count register */ | ||
| 227 | uint32_t tbcr; /* Global timer base count register */ | ||
| 228 | } timers[MAX_TMR]; | ||
| 229 | /* Shared MSI registers */ | ||
| 230 | struct { | ||
| 231 | uint32_t msir; /* Shared Message Signaled Interrupt Register */ | ||
| 232 | } msi[MAX_MSI]; | ||
| 233 | uint32_t max_irq; | ||
| 234 | uint32_t irq_ipi0; | ||
| 235 | uint32_t irq_tim0; | ||
| 236 | uint32_t irq_msi; | ||
| 237 | }; | ||
| 238 | |||
| 239 | |||
| 240 | static void mpic_irq_raise(struct openpic *opp, struct irq_dest *dst, | ||
| 241 | int output) | ||
| 242 | { | ||
| 243 | struct kvm_interrupt irq = { | ||
| 244 | .irq = KVM_INTERRUPT_SET_LEVEL, | ||
| 245 | }; | ||
| 246 | |||
| 247 | if (!dst->vcpu) { | ||
| 248 | pr_debug("%s: destination cpu %d does not exist\n", | ||
| 249 | __func__, (int)(dst - &opp->dst[0])); | ||
| 250 | return; | ||
| 251 | } | ||
| 252 | |||
| 253 | pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id, | ||
| 254 | output); | ||
| 255 | |||
| 256 | if (output != ILR_INTTGT_INT) /* TODO */ | ||
| 257 | return; | ||
| 258 | |||
| 259 | kvm_vcpu_ioctl_interrupt(dst->vcpu, &irq); | ||
| 260 | } | ||
| 261 | |||
| 262 | static void mpic_irq_lower(struct openpic *opp, struct irq_dest *dst, | ||
| 263 | int output) | ||
| 264 | { | ||
| 265 | if (!dst->vcpu) { | ||
| 266 | pr_debug("%s: destination cpu %d does not exist\n", | ||
| 267 | __func__, (int)(dst - &opp->dst[0])); | ||
| 268 | return; | ||
| 269 | } | ||
| 270 | |||
| 271 | pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id, | ||
| 272 | output); | ||
| 273 | |||
| 274 | if (output != ILR_INTTGT_INT) /* TODO */ | ||
| 275 | return; | ||
| 276 | |||
| 277 | kvmppc_core_dequeue_external(dst->vcpu); | ||
| 278 | } | ||
| 279 | |||
| 280 | static inline void IRQ_setbit(struct irq_queue *q, int n_IRQ) | ||
| 281 | { | ||
| 282 | set_bit(n_IRQ, q->queue); | ||
| 283 | } | ||
| 284 | |||
| 285 | static inline void IRQ_resetbit(struct irq_queue *q, int n_IRQ) | ||
| 286 | { | ||
| 287 | clear_bit(n_IRQ, q->queue); | ||
| 288 | } | ||
| 289 | |||
| 290 | static inline int IRQ_testbit(struct irq_queue *q, int n_IRQ) | ||
| 291 | { | ||
| 292 | return test_bit(n_IRQ, q->queue); | ||
| 293 | } | ||
| 294 | |||
| 295 | static void IRQ_check(struct openpic *opp, struct irq_queue *q) | ||
| 296 | { | ||
| 297 | int irq = -1; | ||
| 298 | int next = -1; | ||
| 299 | int priority = -1; | ||
| 300 | |||
| 301 | for (;;) { | ||
| 302 | irq = find_next_bit(q->queue, opp->max_irq, irq + 1); | ||
| 303 | if (irq == opp->max_irq) | ||
| 304 | break; | ||
| 305 | |||
| 306 | pr_debug("IRQ_check: irq %d set ivpr_pr=%d pr=%d\n", | ||
| 307 | irq, IVPR_PRIORITY(opp->src[irq].ivpr), priority); | ||
| 308 | |||
| 309 | if (IVPR_PRIORITY(opp->src[irq].ivpr) > priority) { | ||
| 310 | next = irq; | ||
| 311 | priority = IVPR_PRIORITY(opp->src[irq].ivpr); | ||
| 312 | } | ||
| 313 | } | ||
| 314 | |||
| 315 | q->next = next; | ||
| 316 | q->priority = priority; | ||
| 317 | } | ||
| 318 | |||
| 319 | static int IRQ_get_next(struct openpic *opp, struct irq_queue *q) | ||
| 320 | { | ||
| 321 | /* XXX: optimize */ | ||
| 322 | IRQ_check(opp, q); | ||
| 323 | |||
| 324 | return q->next; | ||
| 325 | } | ||
| 326 | |||
| 327 | static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ, | ||
| 328 | bool active, bool was_active) | ||
| 329 | { | ||
| 330 | struct irq_dest *dst; | ||
| 331 | struct irq_source *src; | ||
| 332 | int priority; | ||
| 333 | |||
| 334 | dst = &opp->dst[n_CPU]; | ||
| 335 | src = &opp->src[n_IRQ]; | ||
| 336 | |||
| 337 | pr_debug("%s: IRQ %d active %d was %d\n", | ||
| 338 | __func__, n_IRQ, active, was_active); | ||
| 339 | |||
| 340 | if (src->output != ILR_INTTGT_INT) { | ||
| 341 | pr_debug("%s: output %d irq %d active %d was %d count %d\n", | ||
| 342 | __func__, src->output, n_IRQ, active, was_active, | ||
| 343 | dst->outputs_active[src->output]); | ||
| 344 | |||
| 345 | /* On Freescale MPIC, critical interrupts ignore priority, | ||
| 346 | * IACK, EOI, etc. Before MPIC v4.1 they also ignore | ||
| 347 | * masking. | ||
| 348 | */ | ||
| 349 | if (active) { | ||
| 350 | if (!was_active && | ||
| 351 | dst->outputs_active[src->output]++ == 0) { | ||
| 352 | pr_debug("%s: Raise OpenPIC output %d cpu %d irq %d\n", | ||
| 353 | __func__, src->output, n_CPU, n_IRQ); | ||
| 354 | mpic_irq_raise(opp, dst, src->output); | ||
| 355 | } | ||
| 356 | } else { | ||
| 357 | if (was_active && | ||
| 358 | --dst->outputs_active[src->output] == 0) { | ||
| 359 | pr_debug("%s: Lower OpenPIC output %d cpu %d irq %d\n", | ||
| 360 | __func__, src->output, n_CPU, n_IRQ); | ||
| 361 | mpic_irq_lower(opp, dst, src->output); | ||
| 362 | } | ||
| 363 | } | ||
| 364 | |||
| 365 | return; | ||
| 366 | } | ||
| 367 | |||
| 368 | priority = IVPR_PRIORITY(src->ivpr); | ||
| 369 | |||
| 370 | /* Even if the interrupt doesn't have enough priority, | ||
| 371 | * it is still raised, in case ctpr is lowered later. | ||
| 372 | */ | ||
| 373 | if (active) | ||
| 374 | IRQ_setbit(&dst->raised, n_IRQ); | ||
| 375 | else | ||
| 376 | IRQ_resetbit(&dst->raised, n_IRQ); | ||
| 377 | |||
| 378 | IRQ_check(opp, &dst->raised); | ||
| 379 | |||
| 380 | if (active && priority <= dst->ctpr) { | ||
| 381 | pr_debug("%s: IRQ %d priority %d too low for ctpr %d on CPU %d\n", | ||
| 382 | __func__, n_IRQ, priority, dst->ctpr, n_CPU); | ||
| 383 | active = 0; | ||
| 384 | } | ||
| 385 | |||
| 386 | if (active) { | ||
| 387 | if (IRQ_get_next(opp, &dst->servicing) >= 0 && | ||
| 388 | priority <= dst->servicing.priority) { | ||
| 389 | pr_debug("%s: IRQ %d is hidden by servicing IRQ %d on CPU %d\n", | ||
| 390 | __func__, n_IRQ, dst->servicing.next, n_CPU); | ||
| 391 | } else { | ||
| 392 | pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d/%d\n", | ||
| 393 | __func__, n_CPU, n_IRQ, dst->raised.next); | ||
| 394 | mpic_irq_raise(opp, dst, ILR_INTTGT_INT); | ||
| 395 | } | ||
| 396 | } else { | ||
| 397 | IRQ_get_next(opp, &dst->servicing); | ||
| 398 | if (dst->raised.priority > dst->ctpr && | ||
| 399 | dst->raised.priority > dst->servicing.priority) { | ||
| 400 | pr_debug("%s: IRQ %d inactive, IRQ %d prio %d above %d/%d, CPU %d\n", | ||
| 401 | __func__, n_IRQ, dst->raised.next, | ||
| 402 | dst->raised.priority, dst->ctpr, | ||
| 403 | dst->servicing.priority, n_CPU); | ||
| 404 | /* IRQ line stays asserted */ | ||
| 405 | } else { | ||
| 406 | pr_debug("%s: IRQ %d inactive, current prio %d/%d, CPU %d\n", | ||
| 407 | __func__, n_IRQ, dst->ctpr, | ||
| 408 | dst->servicing.priority, n_CPU); | ||
| 409 | mpic_irq_lower(opp, dst, ILR_INTTGT_INT); | ||
| 410 | } | ||
| 411 | } | ||
| 412 | } | ||
| 413 | |||
| 414 | /* update pic state because registers for n_IRQ have changed value */ | ||
| 415 | static void openpic_update_irq(struct openpic *opp, int n_IRQ) | ||
| 416 | { | ||
| 417 | struct irq_source *src; | ||
| 418 | bool active, was_active; | ||
| 419 | int i; | ||
| 420 | |||
| 421 | src = &opp->src[n_IRQ]; | ||
| 422 | active = src->pending; | ||
| 423 | |||
| 424 | if ((src->ivpr & IVPR_MASK_MASK) && !src->nomask) { | ||
| 425 | /* Interrupt source is disabled */ | ||
| 426 | pr_debug("%s: IRQ %d is disabled\n", __func__, n_IRQ); | ||
| 427 | active = false; | ||
| 428 | } | ||
| 429 | |||
| 430 | was_active = !!(src->ivpr & IVPR_ACTIVITY_MASK); | ||
| 431 | |||
| 432 | /* | ||
| 433 | * We don't have a similar check for already-active because | ||
| 434 | * ctpr may have changed and we need to withdraw the interrupt. | ||
| 435 | */ | ||
| 436 | if (!active && !was_active) { | ||
| 437 | pr_debug("%s: IRQ %d is already inactive\n", __func__, n_IRQ); | ||
| 438 | return; | ||
| 439 | } | ||
| 440 | |||
| 441 | if (active) | ||
| 442 | src->ivpr |= IVPR_ACTIVITY_MASK; | ||
| 443 | else | ||
| 444 | src->ivpr &= ~IVPR_ACTIVITY_MASK; | ||
| 445 | |||
| 446 | if (src->destmask == 0) { | ||
| 447 | /* No target */ | ||
| 448 | pr_debug("%s: IRQ %d has no target\n", __func__, n_IRQ); | ||
| 449 | return; | ||
| 450 | } | ||
| 451 | |||
| 452 | if (src->destmask == (1 << src->last_cpu)) { | ||
| 453 | /* Only one CPU is allowed to receive this IRQ */ | ||
| 454 | IRQ_local_pipe(opp, src->last_cpu, n_IRQ, active, was_active); | ||
| 455 | } else if (!(src->ivpr & IVPR_MODE_MASK)) { | ||
| 456 | /* Directed delivery mode */ | ||
| 457 | for (i = 0; i < opp->nb_cpus; i++) { | ||
| 458 | if (src->destmask & (1 << i)) { | ||
| 459 | IRQ_local_pipe(opp, i, n_IRQ, active, | ||
| 460 | was_active); | ||
| 461 | } | ||
| 462 | } | ||
| 463 | } else { | ||
| 464 | /* Distributed delivery mode */ | ||
| 465 | for (i = src->last_cpu + 1; i != src->last_cpu; i++) { | ||
| 466 | if (i == opp->nb_cpus) | ||
| 467 | i = 0; | ||
| 468 | |||
| 469 | if (src->destmask & (1 << i)) { | ||
| 470 | IRQ_local_pipe(opp, i, n_IRQ, active, | ||
| 471 | was_active); | ||
| 472 | src->last_cpu = i; | ||
| 473 | break; | ||
| 474 | } | ||
| 475 | } | ||
| 476 | } | ||
| 477 | } | ||
| 478 | |||
| 479 | static void openpic_set_irq(void *opaque, int n_IRQ, int level) | ||
| 480 | { | ||
| 481 | struct openpic *opp = opaque; | ||
| 482 | struct irq_source *src; | ||
| 483 | |||
| 484 | if (n_IRQ >= MAX_IRQ) { | ||
| 485 | WARN_ONCE(1, "%s: IRQ %d out of range\n", __func__, n_IRQ); | ||
| 486 | return; | ||
| 487 | } | ||
| 488 | |||
| 489 | src = &opp->src[n_IRQ]; | ||
| 490 | pr_debug("openpic: set irq %d = %d ivpr=0x%08x\n", | ||
| 491 | n_IRQ, level, src->ivpr); | ||
| 492 | if (src->level) { | ||
| 493 | /* level-sensitive irq */ | ||
| 494 | src->pending = level; | ||
| 495 | openpic_update_irq(opp, n_IRQ); | ||
| 496 | } else { | ||
| 497 | /* edge-sensitive irq */ | ||
| 498 | if (level) { | ||
| 499 | src->pending = 1; | ||
| 500 | openpic_update_irq(opp, n_IRQ); | ||
| 501 | } | ||
| 502 | |||
| 503 | if (src->output != ILR_INTTGT_INT) { | ||
| 504 | /* Edge-triggered interrupts shouldn't be used | ||
| 505 | * with non-INT delivery, but just in case, | ||
| 506 | * try to make it do something sane rather than | ||
| 507 | * cause an interrupt storm. This is close to | ||
| 508 | * what you'd probably see happen in real hardware. | ||
| 509 | */ | ||
| 510 | src->pending = 0; | ||
| 511 | openpic_update_irq(opp, n_IRQ); | ||
| 512 | } | ||
| 513 | } | ||
| 514 | } | ||
| 515 | |||
| 516 | static void openpic_reset(struct openpic *opp) | ||
| 517 | { | ||
| 518 | int i; | ||
| 519 | |||
| 520 | opp->gcr = GCR_RESET; | ||
| 521 | /* Initialise controller registers */ | ||
| 522 | opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) | | ||
| 523 | (opp->vid << FRR_VID_SHIFT); | ||
| 524 | |||
| 525 | opp->pir = 0; | ||
| 526 | opp->spve = -1 & opp->vector_mask; | ||
| 527 | opp->tfrr = opp->tfrr_reset; | ||
| 528 | /* Initialise IRQ sources */ | ||
| 529 | for (i = 0; i < opp->max_irq; i++) { | ||
| 530 | opp->src[i].ivpr = opp->ivpr_reset; | ||
| 531 | opp->src[i].idr = opp->idr_reset; | ||
| 532 | |||
| 533 | switch (opp->src[i].type) { | ||
| 534 | case IRQ_TYPE_NORMAL: | ||
| 535 | opp->src[i].level = | ||
| 536 | !!(opp->ivpr_reset & IVPR_SENSE_MASK); | ||
| 537 | break; | ||
| 538 | |||
| 539 | case IRQ_TYPE_FSLINT: | ||
| 540 | opp->src[i].ivpr |= IVPR_POLARITY_MASK; | ||
| 541 | break; | ||
| 542 | |||
| 543 | case IRQ_TYPE_FSLSPECIAL: | ||
| 544 | break; | ||
| 545 | } | ||
| 546 | } | ||
| 547 | /* Initialise IRQ destinations */ | ||
| 548 | for (i = 0; i < MAX_CPU; i++) { | ||
| 549 | opp->dst[i].ctpr = 15; | ||
| 550 | memset(&opp->dst[i].raised, 0, sizeof(struct irq_queue)); | ||
| 551 | opp->dst[i].raised.next = -1; | ||
| 552 | memset(&opp->dst[i].servicing, 0, sizeof(struct irq_queue)); | ||
| 553 | opp->dst[i].servicing.next = -1; | ||
| 554 | } | ||
| 555 | /* Initialise timers */ | ||
| 556 | for (i = 0; i < MAX_TMR; i++) { | ||
| 557 | opp->timers[i].tccr = 0; | ||
| 558 | opp->timers[i].tbcr = TBCR_CI; | ||
| 559 | } | ||
| 560 | /* Go out of RESET state */ | ||
| 561 | opp->gcr = 0; | ||
| 562 | } | ||
| 563 | |||
| 564 | static inline uint32_t read_IRQreg_idr(struct openpic *opp, int n_IRQ) | ||
| 565 | { | ||
| 566 | return opp->src[n_IRQ].idr; | ||
| 567 | } | ||
| 568 | |||
| 569 | static inline uint32_t read_IRQreg_ilr(struct openpic *opp, int n_IRQ) | ||
| 570 | { | ||
| 571 | if (opp->flags & OPENPIC_FLAG_ILR) | ||
| 572 | return opp->src[n_IRQ].output; | ||
| 573 | |||
| 574 | return 0xffffffff; | ||
| 575 | } | ||
| 576 | |||
| 577 | static inline uint32_t read_IRQreg_ivpr(struct openpic *opp, int n_IRQ) | ||
| 578 | { | ||
| 579 | return opp->src[n_IRQ].ivpr; | ||
| 580 | } | ||
| 581 | |||
| 582 | static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ, | ||
| 583 | uint32_t val) | ||
| 584 | { | ||
| 585 | struct irq_source *src = &opp->src[n_IRQ]; | ||
| 586 | uint32_t normal_mask = (1UL << opp->nb_cpus) - 1; | ||
| 587 | uint32_t crit_mask = 0; | ||
| 588 | uint32_t mask = normal_mask; | ||
| 589 | int crit_shift = IDR_EP_SHIFT - opp->nb_cpus; | ||
| 590 | int i; | ||
| 591 | |||
| 592 | if (opp->flags & OPENPIC_FLAG_IDR_CRIT) { | ||
| 593 | crit_mask = mask << crit_shift; | ||
| 594 | mask |= crit_mask | IDR_EP; | ||
| 595 | } | ||
| 596 | |||
| 597 | src->idr = val & mask; | ||
| 598 | pr_debug("Set IDR %d to 0x%08x\n", n_IRQ, src->idr); | ||
| 599 | |||
| 600 | if (opp->flags & OPENPIC_FLAG_IDR_CRIT) { | ||
| 601 | if (src->idr & crit_mask) { | ||
| 602 | if (src->idr & normal_mask) { | ||
| 603 | pr_debug("%s: IRQ configured for multiple output types, using critical\n", | ||
| 604 | __func__); | ||
| 605 | } | ||
| 606 | |||
| 607 | src->output = ILR_INTTGT_CINT; | ||
| 608 | src->nomask = true; | ||
| 609 | src->destmask = 0; | ||
| 610 | |||
| 611 | for (i = 0; i < opp->nb_cpus; i++) { | ||
| 612 | int n_ci = IDR_CI0_SHIFT - i; | ||
| 613 | |||
| 614 | if (src->idr & (1UL << n_ci)) | ||
| 615 | src->destmask |= 1UL << i; | ||
| 616 | } | ||
| 617 | } else { | ||
| 618 | src->output = ILR_INTTGT_INT; | ||
| 619 | src->nomask = false; | ||
| 620 | src->destmask = src->idr & normal_mask; | ||
| 621 | } | ||
| 622 | } else { | ||
| 623 | src->destmask = src->idr; | ||
| 624 | } | ||
| 625 | } | ||
| 626 | |||
| 627 | static inline void write_IRQreg_ilr(struct openpic *opp, int n_IRQ, | ||
| 628 | uint32_t val) | ||
| 629 | { | ||
| 630 | if (opp->flags & OPENPIC_FLAG_ILR) { | ||
| 631 | struct irq_source *src = &opp->src[n_IRQ]; | ||
| 632 | |||
| 633 | src->output = val & ILR_INTTGT_MASK; | ||
| 634 | pr_debug("Set ILR %d to 0x%08x, output %d\n", n_IRQ, src->idr, | ||
| 635 | src->output); | ||
| 636 | |||
| 637 | /* TODO: on MPIC v4.0 only, set nomask for non-INT */ | ||
| 638 | } | ||
| 639 | } | ||
| 640 | |||
| 641 | static inline void write_IRQreg_ivpr(struct openpic *opp, int n_IRQ, | ||
| 642 | uint32_t val) | ||
| 643 | { | ||
| 644 | uint32_t mask; | ||
| 645 | |||
| 646 | /* NOTE when implementing newer FSL MPIC models: starting with v4.0, | ||
| 647 | * the polarity bit is read-only on internal interrupts. | ||
| 648 | */ | ||
| 649 | mask = IVPR_MASK_MASK | IVPR_PRIORITY_MASK | IVPR_SENSE_MASK | | ||
| 650 | IVPR_POLARITY_MASK | opp->vector_mask; | ||
| 651 | |||
| 652 | /* ACTIVITY bit is read-only */ | ||
| 653 | opp->src[n_IRQ].ivpr = | ||
| 654 | (opp->src[n_IRQ].ivpr & IVPR_ACTIVITY_MASK) | (val & mask); | ||
| 655 | |||
| 656 | /* For FSL internal interrupts, The sense bit is reserved and zero, | ||
| 657 | * and the interrupt is always level-triggered. Timers and IPIs | ||
| 658 | * have no sense or polarity bits, and are edge-triggered. | ||
| 659 | */ | ||
| 660 | switch (opp->src[n_IRQ].type) { | ||
| 661 | case IRQ_TYPE_NORMAL: | ||
| 662 | opp->src[n_IRQ].level = | ||
| 663 | !!(opp->src[n_IRQ].ivpr & IVPR_SENSE_MASK); | ||
| 664 | break; | ||
| 665 | |||
| 666 | case IRQ_TYPE_FSLINT: | ||
| 667 | opp->src[n_IRQ].ivpr &= ~IVPR_SENSE_MASK; | ||
| 668 | break; | ||
| 669 | |||
| 670 | case IRQ_TYPE_FSLSPECIAL: | ||
| 671 | opp->src[n_IRQ].ivpr &= ~(IVPR_POLARITY_MASK | IVPR_SENSE_MASK); | ||
| 672 | break; | ||
| 673 | } | ||
| 674 | |||
| 675 | openpic_update_irq(opp, n_IRQ); | ||
| 676 | pr_debug("Set IVPR %d to 0x%08x -> 0x%08x\n", n_IRQ, val, | ||
| 677 | opp->src[n_IRQ].ivpr); | ||
| 678 | } | ||
| 679 | |||
| 680 | static void openpic_gcr_write(struct openpic *opp, uint64_t val) | ||
| 681 | { | ||
| 682 | if (val & GCR_RESET) { | ||
| 683 | openpic_reset(opp); | ||
| 684 | return; | ||
| 685 | } | ||
| 686 | |||
| 687 | opp->gcr &= ~opp->mpic_mode_mask; | ||
| 688 | opp->gcr |= val & opp->mpic_mode_mask; | ||
| 689 | } | ||
| 690 | |||
| 691 | static int openpic_gbl_write(void *opaque, gpa_t addr, u32 val) | ||
| 692 | { | ||
| 693 | struct openpic *opp = opaque; | ||
| 694 | int err = 0; | ||
| 695 | |||
| 696 | pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val); | ||
| 697 | if (addr & 0xF) | ||
| 698 | return 0; | ||
| 699 | |||
| 700 | switch (addr) { | ||
| 701 | case 0x00: /* Block Revision Register1 (BRR1) is Readonly */ | ||
| 702 | break; | ||
| 703 | case 0x40: | ||
| 704 | case 0x50: | ||
| 705 | case 0x60: | ||
| 706 | case 0x70: | ||
| 707 | case 0x80: | ||
| 708 | case 0x90: | ||
| 709 | case 0xA0: | ||
| 710 | case 0xB0: | ||
| 711 | err = openpic_cpu_write_internal(opp, addr, val, | ||
| 712 | get_current_cpu()); | ||
| 713 | break; | ||
| 714 | case 0x1000: /* FRR */ | ||
| 715 | break; | ||
| 716 | case 0x1020: /* GCR */ | ||
| 717 | openpic_gcr_write(opp, val); | ||
| 718 | break; | ||
| 719 | case 0x1080: /* VIR */ | ||
| 720 | break; | ||
| 721 | case 0x1090: /* PIR */ | ||
| 722 | /* | ||
| 723 | * This register is used to reset a CPU core -- | ||
| 724 | * let userspace handle it. | ||
| 725 | */ | ||
| 726 | err = -ENXIO; | ||
| 727 | break; | ||
| 728 | case 0x10A0: /* IPI_IVPR */ | ||
| 729 | case 0x10B0: | ||
| 730 | case 0x10C0: | ||
| 731 | case 0x10D0: { | ||
| 732 | int idx; | ||
| 733 | idx = (addr - 0x10A0) >> 4; | ||
| 734 | write_IRQreg_ivpr(opp, opp->irq_ipi0 + idx, val); | ||
| 735 | break; | ||
| 736 | } | ||
| 737 | case 0x10E0: /* SPVE */ | ||
| 738 | opp->spve = val & opp->vector_mask; | ||
| 739 | break; | ||
| 740 | default: | ||
| 741 | break; | ||
| 742 | } | ||
| 743 | |||
| 744 | return err; | ||
| 745 | } | ||
| 746 | |||
| 747 | static int openpic_gbl_read(void *opaque, gpa_t addr, u32 *ptr) | ||
| 748 | { | ||
| 749 | struct openpic *opp = opaque; | ||
| 750 | u32 retval; | ||
| 751 | int err = 0; | ||
| 752 | |||
| 753 | pr_debug("%s: addr %#llx\n", __func__, addr); | ||
| 754 | retval = 0xFFFFFFFF; | ||
| 755 | if (addr & 0xF) | ||
| 756 | goto out; | ||
| 757 | |||
| 758 | switch (addr) { | ||
| 759 | case 0x1000: /* FRR */ | ||
| 760 | retval = opp->frr; | ||
| 761 | retval |= (opp->nb_cpus - 1) << FRR_NCPU_SHIFT; | ||
| 762 | break; | ||
| 763 | case 0x1020: /* GCR */ | ||
| 764 | retval = opp->gcr; | ||
| 765 | break; | ||
| 766 | case 0x1080: /* VIR */ | ||
| 767 | retval = opp->vir; | ||
| 768 | break; | ||
| 769 | case 0x1090: /* PIR */ | ||
| 770 | retval = 0x00000000; | ||
| 771 | break; | ||
| 772 | case 0x00: /* Block Revision Register1 (BRR1) */ | ||
| 773 | retval = opp->brr1; | ||
| 774 | break; | ||
| 775 | case 0x40: | ||
| 776 | case 0x50: | ||
| 777 | case 0x60: | ||
| 778 | case 0x70: | ||
| 779 | case 0x80: | ||
| 780 | case 0x90: | ||
| 781 | case 0xA0: | ||
| 782 | case 0xB0: | ||
| 783 | err = openpic_cpu_read_internal(opp, addr, | ||
| 784 | &retval, get_current_cpu()); | ||
| 785 | break; | ||
| 786 | case 0x10A0: /* IPI_IVPR */ | ||
| 787 | case 0x10B0: | ||
| 788 | case 0x10C0: | ||
| 789 | case 0x10D0: | ||
| 790 | { | ||
| 791 | int idx; | ||
| 792 | idx = (addr - 0x10A0) >> 4; | ||
| 793 | retval = read_IRQreg_ivpr(opp, opp->irq_ipi0 + idx); | ||
| 794 | } | ||
| 795 | break; | ||
| 796 | case 0x10E0: /* SPVE */ | ||
| 797 | retval = opp->spve; | ||
| 798 | break; | ||
| 799 | default: | ||
| 800 | break; | ||
| 801 | } | ||
| 802 | |||
| 803 | out: | ||
| 804 | pr_debug("%s: => 0x%08x\n", __func__, retval); | ||
| 805 | *ptr = retval; | ||
| 806 | return err; | ||
| 807 | } | ||
| 808 | |||
| 809 | static int openpic_tmr_write(void *opaque, gpa_t addr, u32 val) | ||
| 810 | { | ||
| 811 | struct openpic *opp = opaque; | ||
| 812 | int idx; | ||
| 813 | |||
| 814 | addr += 0x10f0; | ||
| 815 | |||
| 816 | pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val); | ||
| 817 | if (addr & 0xF) | ||
| 818 | return 0; | ||
| 819 | |||
| 820 | if (addr == 0x10f0) { | ||
| 821 | /* TFRR */ | ||
| 822 | opp->tfrr = val; | ||
| 823 | return 0; | ||
| 824 | } | ||
| 825 | |||
| 826 | idx = (addr >> 6) & 0x3; | ||
| 827 | addr = addr & 0x30; | ||
| 828 | |||
| 829 | switch (addr & 0x30) { | ||
| 830 | case 0x00: /* TCCR */ | ||
| 831 | break; | ||
| 832 | case 0x10: /* TBCR */ | ||
| 833 | if ((opp->timers[idx].tccr & TCCR_TOG) != 0 && | ||
| 834 | (val & TBCR_CI) == 0 && | ||
| 835 | (opp->timers[idx].tbcr & TBCR_CI) != 0) | ||
| 836 | opp->timers[idx].tccr &= ~TCCR_TOG; | ||
| 837 | |||
| 838 | opp->timers[idx].tbcr = val; | ||
| 839 | break; | ||
| 840 | case 0x20: /* TVPR */ | ||
| 841 | write_IRQreg_ivpr(opp, opp->irq_tim0 + idx, val); | ||
| 842 | break; | ||
| 843 | case 0x30: /* TDR */ | ||
| 844 | write_IRQreg_idr(opp, opp->irq_tim0 + idx, val); | ||
| 845 | break; | ||
| 846 | } | ||
| 847 | |||
| 848 | return 0; | ||
| 849 | } | ||
| 850 | |||
| 851 | static int openpic_tmr_read(void *opaque, gpa_t addr, u32 *ptr) | ||
| 852 | { | ||
| 853 | struct openpic *opp = opaque; | ||
| 854 | uint32_t retval = -1; | ||
| 855 | int idx; | ||
| 856 | |||
| 857 | pr_debug("%s: addr %#llx\n", __func__, addr); | ||
| 858 | if (addr & 0xF) | ||
| 859 | goto out; | ||
| 860 | |||
| 861 | idx = (addr >> 6) & 0x3; | ||
| 862 | if (addr == 0x0) { | ||
| 863 | /* TFRR */ | ||
| 864 | retval = opp->tfrr; | ||
| 865 | goto out; | ||
| 866 | } | ||
| 867 | |||
| 868 | switch (addr & 0x30) { | ||
| 869 | case 0x00: /* TCCR */ | ||
| 870 | retval = opp->timers[idx].tccr; | ||
| 871 | break; | ||
| 872 | case 0x10: /* TBCR */ | ||
| 873 | retval = opp->timers[idx].tbcr; | ||
| 874 | break; | ||
| 875 | case 0x20: /* TIPV */ | ||
| 876 | retval = read_IRQreg_ivpr(opp, opp->irq_tim0 + idx); | ||
| 877 | break; | ||
| 878 | case 0x30: /* TIDE (TIDR) */ | ||
| 879 | retval = read_IRQreg_idr(opp, opp->irq_tim0 + idx); | ||
| 880 | break; | ||
| 881 | } | ||
| 882 | |||
| 883 | out: | ||
| 884 | pr_debug("%s: => 0x%08x\n", __func__, retval); | ||
| 885 | *ptr = retval; | ||
| 886 | return 0; | ||
| 887 | } | ||
| 888 | |||
| 889 | static int openpic_src_write(void *opaque, gpa_t addr, u32 val) | ||
| 890 | { | ||
| 891 | struct openpic *opp = opaque; | ||
| 892 | int idx; | ||
| 893 | |||
| 894 | pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val); | ||
| 895 | |||
| 896 | addr = addr & 0xffff; | ||
| 897 | idx = addr >> 5; | ||
| 898 | |||
| 899 | switch (addr & 0x1f) { | ||
| 900 | case 0x00: | ||
| 901 | write_IRQreg_ivpr(opp, idx, val); | ||
| 902 | break; | ||
| 903 | case 0x10: | ||
| 904 | write_IRQreg_idr(opp, idx, val); | ||
| 905 | break; | ||
| 906 | case 0x18: | ||
| 907 | write_IRQreg_ilr(opp, idx, val); | ||
| 908 | break; | ||
| 909 | } | ||
| 910 | |||
| 911 | return 0; | ||
| 912 | } | ||
| 913 | |||
| 914 | static int openpic_src_read(void *opaque, gpa_t addr, u32 *ptr) | ||
| 915 | { | ||
| 916 | struct openpic *opp = opaque; | ||
| 917 | uint32_t retval; | ||
| 918 | int idx; | ||
| 919 | |||
| 920 | pr_debug("%s: addr %#llx\n", __func__, addr); | ||
| 921 | retval = 0xFFFFFFFF; | ||
| 922 | |||
| 923 | addr = addr & 0xffff; | ||
| 924 | idx = addr >> 5; | ||
| 925 | |||
| 926 | switch (addr & 0x1f) { | ||
| 927 | case 0x00: | ||
| 928 | retval = read_IRQreg_ivpr(opp, idx); | ||
| 929 | break; | ||
| 930 | case 0x10: | ||
| 931 | retval = read_IRQreg_idr(opp, idx); | ||
| 932 | break; | ||
| 933 | case 0x18: | ||
| 934 | retval = read_IRQreg_ilr(opp, idx); | ||
| 935 | break; | ||
| 936 | } | ||
| 937 | |||
| 938 | pr_debug("%s: => 0x%08x\n", __func__, retval); | ||
| 939 | *ptr = retval; | ||
| 940 | return 0; | ||
| 941 | } | ||
| 942 | |||
| 943 | static int openpic_msi_write(void *opaque, gpa_t addr, u32 val) | ||
| 944 | { | ||
| 945 | struct openpic *opp = opaque; | ||
| 946 | int idx = opp->irq_msi; | ||
| 947 | int srs, ibs; | ||
| 948 | |||
| 949 | pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val); | ||
| 950 | if (addr & 0xF) | ||
| 951 | return 0; | ||
| 952 | |||
| 953 | switch (addr) { | ||
| 954 | case MSIIR_OFFSET: | ||
| 955 | srs = val >> MSIIR_SRS_SHIFT; | ||
| 956 | idx += srs; | ||
| 957 | ibs = (val & MSIIR_IBS_MASK) >> MSIIR_IBS_SHIFT; | ||
| 958 | opp->msi[srs].msir |= 1 << ibs; | ||
| 959 | openpic_set_irq(opp, idx, 1); | ||
| 960 | break; | ||
| 961 | default: | ||
| 962 | /* most registers are read-only, thus ignored */ | ||
| 963 | break; | ||
| 964 | } | ||
| 965 | |||
| 966 | return 0; | ||
| 967 | } | ||
| 968 | |||
| 969 | static int openpic_msi_read(void *opaque, gpa_t addr, u32 *ptr) | ||
| 970 | { | ||
| 971 | struct openpic *opp = opaque; | ||
| 972 | uint32_t r = 0; | ||
| 973 | int i, srs; | ||
| 974 | |||
| 975 | pr_debug("%s: addr %#llx\n", __func__, addr); | ||
| 976 | if (addr & 0xF) | ||
| 977 | return -ENXIO; | ||
| 978 | |||
| 979 | srs = addr >> 4; | ||
| 980 | |||
| 981 | switch (addr) { | ||
| 982 | case 0x00: | ||
| 983 | case 0x10: | ||
| 984 | case 0x20: | ||
| 985 | case 0x30: | ||
| 986 | case 0x40: | ||
| 987 | case 0x50: | ||
| 988 | case 0x60: | ||
| 989 | case 0x70: /* MSIRs */ | ||
| 990 | r = opp->msi[srs].msir; | ||
| 991 | /* Clear on read */ | ||
| 992 | opp->msi[srs].msir = 0; | ||
| 993 | openpic_set_irq(opp, opp->irq_msi + srs, 0); | ||
| 994 | break; | ||
| 995 | case 0x120: /* MSISR */ | ||
| 996 | for (i = 0; i < MAX_MSI; i++) | ||
| 997 | r |= (opp->msi[i].msir ? 1 : 0) << i; | ||
| 998 | break; | ||
| 999 | } | ||
| 1000 | |||
| 1001 | pr_debug("%s: => 0x%08x\n", __func__, r); | ||
| 1002 | *ptr = r; | ||
| 1003 | return 0; | ||
| 1004 | } | ||
| 1005 | |||
| 1006 | static int openpic_summary_read(void *opaque, gpa_t addr, u32 *ptr) | ||
| 1007 | { | ||
| 1008 | uint32_t r = 0; | ||
| 1009 | |||
| 1010 | pr_debug("%s: addr %#llx\n", __func__, addr); | ||
| 1011 | |||
| 1012 | /* TODO: EISR/EIMR */ | ||
| 1013 | |||
| 1014 | *ptr = r; | ||
| 1015 | return 0; | ||
| 1016 | } | ||
| 1017 | |||
| 1018 | static int openpic_summary_write(void *opaque, gpa_t addr, u32 val) | ||
| 1019 | { | ||
| 1020 | pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val); | ||
| 1021 | |||
| 1022 | /* TODO: EISR/EIMR */ | ||
| 1023 | return 0; | ||
| 1024 | } | ||
| 1025 | |||
| 1026 | static int openpic_cpu_write_internal(void *opaque, gpa_t addr, | ||
| 1027 | u32 val, int idx) | ||
| 1028 | { | ||
| 1029 | struct openpic *opp = opaque; | ||
| 1030 | struct irq_source *src; | ||
| 1031 | struct irq_dest *dst; | ||
| 1032 | int s_IRQ, n_IRQ; | ||
| 1033 | |||
| 1034 | pr_debug("%s: cpu %d addr %#llx <= 0x%08x\n", __func__, idx, | ||
| 1035 | addr, val); | ||
| 1036 | |||
| 1037 | if (idx < 0) | ||
| 1038 | return 0; | ||
| 1039 | |||
| 1040 | if (addr & 0xF) | ||
| 1041 | return 0; | ||
| 1042 | |||
| 1043 | dst = &opp->dst[idx]; | ||
| 1044 | addr &= 0xFF0; | ||
| 1045 | switch (addr) { | ||
| 1046 | case 0x40: /* IPIDR */ | ||
| 1047 | case 0x50: | ||
| 1048 | case 0x60: | ||
| 1049 | case 0x70: | ||
| 1050 | idx = (addr - 0x40) >> 4; | ||
| 1051 | /* we use IDE as mask which CPUs to deliver the IPI to still. */ | ||
| 1052 | opp->src[opp->irq_ipi0 + idx].destmask |= val; | ||
| 1053 | openpic_set_irq(opp, opp->irq_ipi0 + idx, 1); | ||
| 1054 | openpic_set_irq(opp, opp->irq_ipi0 + idx, 0); | ||
| 1055 | break; | ||
| 1056 | case 0x80: /* CTPR */ | ||
| 1057 | dst->ctpr = val & 0x0000000F; | ||
| 1058 | |||
| 1059 | pr_debug("%s: set CPU %d ctpr to %d, raised %d servicing %d\n", | ||
| 1060 | __func__, idx, dst->ctpr, dst->raised.priority, | ||
| 1061 | dst->servicing.priority); | ||
| 1062 | |||
| 1063 | if (dst->raised.priority <= dst->ctpr) { | ||
| 1064 | pr_debug("%s: Lower OpenPIC INT output cpu %d due to ctpr\n", | ||
| 1065 | __func__, idx); | ||
| 1066 | mpic_irq_lower(opp, dst, ILR_INTTGT_INT); | ||
| 1067 | } else if (dst->raised.priority > dst->servicing.priority) { | ||
| 1068 | pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d\n", | ||
| 1069 | __func__, idx, dst->raised.next); | ||
| 1070 | mpic_irq_raise(opp, dst, ILR_INTTGT_INT); | ||
| 1071 | } | ||
| 1072 | |||
| 1073 | break; | ||
| 1074 | case 0x90: /* WHOAMI */ | ||
| 1075 | /* Read-only register */ | ||
| 1076 | break; | ||
| 1077 | case 0xA0: /* IACK */ | ||
| 1078 | /* Read-only register */ | ||
| 1079 | break; | ||
| 1080 | case 0xB0: { /* EOI */ | ||
| 1081 | int notify_eoi; | ||
| 1082 | |||
| 1083 | pr_debug("EOI\n"); | ||
| 1084 | s_IRQ = IRQ_get_next(opp, &dst->servicing); | ||
| 1085 | |||
| 1086 | if (s_IRQ < 0) { | ||
| 1087 | pr_debug("%s: EOI with no interrupt in service\n", | ||
| 1088 | __func__); | ||
| 1089 | break; | ||
| 1090 | } | ||
| 1091 | |||
| 1092 | IRQ_resetbit(&dst->servicing, s_IRQ); | ||
| 1093 | /* Notify listeners that the IRQ is over */ | ||
| 1094 | notify_eoi = s_IRQ; | ||
| 1095 | /* Set up next servicing IRQ */ | ||
| 1096 | s_IRQ = IRQ_get_next(opp, &dst->servicing); | ||
| 1097 | /* Check queued interrupts. */ | ||
| 1098 | n_IRQ = IRQ_get_next(opp, &dst->raised); | ||
| 1099 | src = &opp->src[n_IRQ]; | ||
| 1100 | if (n_IRQ != -1 && | ||
| 1101 | (s_IRQ == -1 || | ||
| 1102 | IVPR_PRIORITY(src->ivpr) > dst->servicing.priority)) { | ||
| 1103 | pr_debug("Raise OpenPIC INT output cpu %d irq %d\n", | ||
| 1104 | idx, n_IRQ); | ||
| 1105 | mpic_irq_raise(opp, dst, ILR_INTTGT_INT); | ||
| 1106 | } | ||
| 1107 | |||
| 1108 | spin_unlock(&opp->lock); | ||
| 1109 | kvm_notify_acked_irq(opp->kvm, 0, notify_eoi); | ||
| 1110 | spin_lock(&opp->lock); | ||
| 1111 | |||
| 1112 | break; | ||
| 1113 | } | ||
| 1114 | default: | ||
| 1115 | break; | ||
| 1116 | } | ||
| 1117 | |||
| 1118 | return 0; | ||
| 1119 | } | ||
| 1120 | |||
| 1121 | static int openpic_cpu_write(void *opaque, gpa_t addr, u32 val) | ||
| 1122 | { | ||
| 1123 | struct openpic *opp = opaque; | ||
| 1124 | |||
| 1125 | return openpic_cpu_write_internal(opp, addr, val, | ||
| 1126 | (addr & 0x1f000) >> 12); | ||
| 1127 | } | ||
| 1128 | |||
| 1129 | static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst, | ||
| 1130 | int cpu) | ||
| 1131 | { | ||
| 1132 | struct irq_source *src; | ||
| 1133 | int retval, irq; | ||
| 1134 | |||
| 1135 | pr_debug("Lower OpenPIC INT output\n"); | ||
| 1136 | mpic_irq_lower(opp, dst, ILR_INTTGT_INT); | ||
| 1137 | |||
| 1138 | irq = IRQ_get_next(opp, &dst->raised); | ||
| 1139 | pr_debug("IACK: irq=%d\n", irq); | ||
| 1140 | |||
| 1141 | if (irq == -1) | ||
| 1142 | /* No more interrupt pending */ | ||
| 1143 | return opp->spve; | ||
| 1144 | |||
| 1145 | src = &opp->src[irq]; | ||
| 1146 | if (!(src->ivpr & IVPR_ACTIVITY_MASK) || | ||
| 1147 | !(IVPR_PRIORITY(src->ivpr) > dst->ctpr)) { | ||
| 1148 | pr_err("%s: bad raised IRQ %d ctpr %d ivpr 0x%08x\n", | ||
| 1149 | __func__, irq, dst->ctpr, src->ivpr); | ||
| 1150 | openpic_update_irq(opp, irq); | ||
| 1151 | retval = opp->spve; | ||
| 1152 | } else { | ||
| 1153 | /* IRQ enter servicing state */ | ||
| 1154 | IRQ_setbit(&dst->servicing, irq); | ||
| 1155 | retval = IVPR_VECTOR(opp, src->ivpr); | ||
| 1156 | } | ||
| 1157 | |||
| 1158 | if (!src->level) { | ||
| 1159 | /* edge-sensitive IRQ */ | ||
| 1160 | src->ivpr &= ~IVPR_ACTIVITY_MASK; | ||
| 1161 | src->pending = 0; | ||
| 1162 | IRQ_resetbit(&dst->raised, irq); | ||
| 1163 | } | ||
| 1164 | |||
| 1165 | if ((irq >= opp->irq_ipi0) && (irq < (opp->irq_ipi0 + MAX_IPI))) { | ||
| 1166 | src->destmask &= ~(1 << cpu); | ||
| 1167 | if (src->destmask && !src->level) { | ||
| 1168 | /* trigger on CPUs that didn't know about it yet */ | ||
| 1169 | openpic_set_irq(opp, irq, 1); | ||
| 1170 | openpic_set_irq(opp, irq, 0); | ||
| 1171 | /* if all CPUs knew about it, set active bit again */ | ||
| 1172 | src->ivpr |= IVPR_ACTIVITY_MASK; | ||
| 1173 | } | ||
| 1174 | } | ||
| 1175 | |||
| 1176 | return retval; | ||
| 1177 | } | ||
| 1178 | |||
| 1179 | void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu) | ||
| 1180 | { | ||
| 1181 | struct openpic *opp = vcpu->arch.mpic; | ||
| 1182 | int cpu = vcpu->arch.irq_cpu_id; | ||
| 1183 | unsigned long flags; | ||
| 1184 | |||
| 1185 | spin_lock_irqsave(&opp->lock, flags); | ||
| 1186 | |||
| 1187 | if ((opp->gcr & opp->mpic_mode_mask) == GCR_MODE_PROXY) | ||
| 1188 | kvmppc_set_epr(vcpu, openpic_iack(opp, &opp->dst[cpu], cpu)); | ||
| 1189 | |||
| 1190 | spin_unlock_irqrestore(&opp->lock, flags); | ||
| 1191 | } | ||
| 1192 | |||
| 1193 | static int openpic_cpu_read_internal(void *opaque, gpa_t addr, | ||
| 1194 | u32 *ptr, int idx) | ||
| 1195 | { | ||
| 1196 | struct openpic *opp = opaque; | ||
| 1197 | struct irq_dest *dst; | ||
| 1198 | uint32_t retval; | ||
| 1199 | |||
| 1200 | pr_debug("%s: cpu %d addr %#llx\n", __func__, idx, addr); | ||
| 1201 | retval = 0xFFFFFFFF; | ||
| 1202 | |||
| 1203 | if (idx < 0) | ||
| 1204 | goto out; | ||
| 1205 | |||
| 1206 | if (addr & 0xF) | ||
| 1207 | goto out; | ||
| 1208 | |||
| 1209 | dst = &opp->dst[idx]; | ||
| 1210 | addr &= 0xFF0; | ||
| 1211 | switch (addr) { | ||
| 1212 | case 0x80: /* CTPR */ | ||
| 1213 | retval = dst->ctpr; | ||
| 1214 | break; | ||
| 1215 | case 0x90: /* WHOAMI */ | ||
| 1216 | retval = idx; | ||
| 1217 | break; | ||
| 1218 | case 0xA0: /* IACK */ | ||
| 1219 | retval = openpic_iack(opp, dst, idx); | ||
| 1220 | break; | ||
| 1221 | case 0xB0: /* EOI */ | ||
| 1222 | retval = 0; | ||
| 1223 | break; | ||
| 1224 | default: | ||
| 1225 | break; | ||
| 1226 | } | ||
| 1227 | pr_debug("%s: => 0x%08x\n", __func__, retval); | ||
| 1228 | |||
| 1229 | out: | ||
| 1230 | *ptr = retval; | ||
| 1231 | return 0; | ||
| 1232 | } | ||
| 1233 | |||
| 1234 | static int openpic_cpu_read(void *opaque, gpa_t addr, u32 *ptr) | ||
| 1235 | { | ||
| 1236 | struct openpic *opp = opaque; | ||
| 1237 | |||
| 1238 | return openpic_cpu_read_internal(opp, addr, ptr, | ||
| 1239 | (addr & 0x1f000) >> 12); | ||
| 1240 | } | ||
| 1241 | |||
| 1242 | struct mem_reg { | ||
| 1243 | int (*read)(void *opaque, gpa_t addr, u32 *ptr); | ||
| 1244 | int (*write)(void *opaque, gpa_t addr, u32 val); | ||
| 1245 | gpa_t start_addr; | ||
| 1246 | int size; | ||
| 1247 | }; | ||
| 1248 | |||
| 1249 | static const struct mem_reg openpic_gbl_mmio = { | ||
| 1250 | .write = openpic_gbl_write, | ||
| 1251 | .read = openpic_gbl_read, | ||
| 1252 | .start_addr = OPENPIC_GLB_REG_START, | ||
| 1253 | .size = OPENPIC_GLB_REG_SIZE, | ||
| 1254 | }; | ||
| 1255 | |||
| 1256 | static const struct mem_reg openpic_tmr_mmio = { | ||
| 1257 | .write = openpic_tmr_write, | ||
| 1258 | .read = openpic_tmr_read, | ||
| 1259 | .start_addr = OPENPIC_TMR_REG_START, | ||
| 1260 | .size = OPENPIC_TMR_REG_SIZE, | ||
| 1261 | }; | ||
| 1262 | |||
| 1263 | static const struct mem_reg openpic_cpu_mmio = { | ||
| 1264 | .write = openpic_cpu_write, | ||
| 1265 | .read = openpic_cpu_read, | ||
| 1266 | .start_addr = OPENPIC_CPU_REG_START, | ||
| 1267 | .size = OPENPIC_CPU_REG_SIZE, | ||
| 1268 | }; | ||
| 1269 | |||
| 1270 | static const struct mem_reg openpic_src_mmio = { | ||
| 1271 | .write = openpic_src_write, | ||
| 1272 | .read = openpic_src_read, | ||
| 1273 | .start_addr = OPENPIC_SRC_REG_START, | ||
| 1274 | .size = OPENPIC_SRC_REG_SIZE, | ||
| 1275 | }; | ||
| 1276 | |||
| 1277 | static const struct mem_reg openpic_msi_mmio = { | ||
| 1278 | .read = openpic_msi_read, | ||
| 1279 | .write = openpic_msi_write, | ||
| 1280 | .start_addr = OPENPIC_MSI_REG_START, | ||
| 1281 | .size = OPENPIC_MSI_REG_SIZE, | ||
| 1282 | }; | ||
| 1283 | |||
| 1284 | static const struct mem_reg openpic_summary_mmio = { | ||
| 1285 | .read = openpic_summary_read, | ||
| 1286 | .write = openpic_summary_write, | ||
| 1287 | .start_addr = OPENPIC_SUMMARY_REG_START, | ||
| 1288 | .size = OPENPIC_SUMMARY_REG_SIZE, | ||
| 1289 | }; | ||
| 1290 | |||
| 1291 | static void add_mmio_region(struct openpic *opp, const struct mem_reg *mr) | ||
| 1292 | { | ||
| 1293 | if (opp->num_mmio_regions >= MAX_MMIO_REGIONS) { | ||
| 1294 | WARN(1, "kvm mpic: too many mmio regions\n"); | ||
| 1295 | return; | ||
| 1296 | } | ||
| 1297 | |||
| 1298 | opp->mmio_regions[opp->num_mmio_regions++] = mr; | ||
| 1299 | } | ||
| 1300 | |||
| 1301 | static void fsl_common_init(struct openpic *opp) | ||
| 1302 | { | ||
| 1303 | int i; | ||
| 1304 | int virq = MAX_SRC; | ||
| 1305 | |||
| 1306 | add_mmio_region(opp, &openpic_msi_mmio); | ||
| 1307 | add_mmio_region(opp, &openpic_summary_mmio); | ||
| 1308 | |||
| 1309 | opp->vid = VID_REVISION_1_2; | ||
| 1310 | opp->vir = VIR_GENERIC; | ||
| 1311 | opp->vector_mask = 0xFFFF; | ||
| 1312 | opp->tfrr_reset = 0; | ||
| 1313 | opp->ivpr_reset = IVPR_MASK_MASK; | ||
| 1314 | opp->idr_reset = 1 << 0; | ||
| 1315 | opp->max_irq = MAX_IRQ; | ||
| 1316 | |||
| 1317 | opp->irq_ipi0 = virq; | ||
| 1318 | virq += MAX_IPI; | ||
| 1319 | opp->irq_tim0 = virq; | ||
| 1320 | virq += MAX_TMR; | ||
| 1321 | |||
| 1322 | BUG_ON(virq > MAX_IRQ); | ||
| 1323 | |||
| 1324 | opp->irq_msi = 224; | ||
| 1325 | |||
| 1326 | for (i = 0; i < opp->fsl->max_ext; i++) | ||
| 1327 | opp->src[i].level = false; | ||
| 1328 | |||
| 1329 | /* Internal interrupts, including message and MSI */ | ||
| 1330 | for (i = 16; i < MAX_SRC; i++) { | ||
| 1331 | opp->src[i].type = IRQ_TYPE_FSLINT; | ||
| 1332 | opp->src[i].level = true; | ||
| 1333 | } | ||
| 1334 | |||
| 1335 | /* timers and IPIs */ | ||
| 1336 | for (i = MAX_SRC; i < virq; i++) { | ||
| 1337 | opp->src[i].type = IRQ_TYPE_FSLSPECIAL; | ||
| 1338 | opp->src[i].level = false; | ||
| 1339 | } | ||
| 1340 | } | ||
| 1341 | |||
| 1342 | static int kvm_mpic_read_internal(struct openpic *opp, gpa_t addr, u32 *ptr) | ||
| 1343 | { | ||
| 1344 | int i; | ||
| 1345 | |||
| 1346 | for (i = 0; i < opp->num_mmio_regions; i++) { | ||
| 1347 | const struct mem_reg *mr = opp->mmio_regions[i]; | ||
| 1348 | |||
| 1349 | if (mr->start_addr > addr || addr >= mr->start_addr + mr->size) | ||
| 1350 | continue; | ||
| 1351 | |||
| 1352 | return mr->read(opp, addr - mr->start_addr, ptr); | ||
| 1353 | } | ||
| 1354 | |||
| 1355 | return -ENXIO; | ||
| 1356 | } | ||
| 1357 | |||
| 1358 | static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val) | ||
| 1359 | { | ||
| 1360 | int i; | ||
| 1361 | |||
| 1362 | for (i = 0; i < opp->num_mmio_regions; i++) { | ||
| 1363 | const struct mem_reg *mr = opp->mmio_regions[i]; | ||
| 1364 | |||
| 1365 | if (mr->start_addr > addr || addr >= mr->start_addr + mr->size) | ||
| 1366 | continue; | ||
| 1367 | |||
| 1368 | return mr->write(opp, addr - mr->start_addr, val); | ||
| 1369 | } | ||
| 1370 | |||
| 1371 | return -ENXIO; | ||
| 1372 | } | ||
| 1373 | |||
| 1374 | static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr, | ||
| 1375 | int len, void *ptr) | ||
| 1376 | { | ||
| 1377 | struct openpic *opp = container_of(this, struct openpic, mmio); | ||
| 1378 | int ret; | ||
| 1379 | union { | ||
| 1380 | u32 val; | ||
| 1381 | u8 bytes[4]; | ||
| 1382 | } u; | ||
| 1383 | |||
| 1384 | if (addr & (len - 1)) { | ||
| 1385 | pr_debug("%s: bad alignment %llx/%d\n", | ||
| 1386 | __func__, addr, len); | ||
| 1387 | return -EINVAL; | ||
| 1388 | } | ||
| 1389 | |||
| 1390 | spin_lock_irq(&opp->lock); | ||
| 1391 | ret = kvm_mpic_read_internal(opp, addr - opp->reg_base, &u.val); | ||
| 1392 | spin_unlock_irq(&opp->lock); | ||
| 1393 | |||
| 1394 | /* | ||
| 1395 | * Technically only 32-bit accesses are allowed, but be nice to | ||
| 1396 | * people dumping registers a byte at a time -- it works in real | ||
| 1397 | * hardware (reads only, not writes). | ||
| 1398 | */ | ||
| 1399 | if (len == 4) { | ||
| 1400 | *(u32 *)ptr = u.val; | ||
| 1401 | pr_debug("%s: addr %llx ret %d len 4 val %x\n", | ||
| 1402 | __func__, addr, ret, u.val); | ||
| 1403 | } else if (len == 1) { | ||
| 1404 | *(u8 *)ptr = u.bytes[addr & 3]; | ||
| 1405 | pr_debug("%s: addr %llx ret %d len 1 val %x\n", | ||
| 1406 | __func__, addr, ret, u.bytes[addr & 3]); | ||
| 1407 | } else { | ||
| 1408 | pr_debug("%s: bad length %d\n", __func__, len); | ||
| 1409 | return -EINVAL; | ||
| 1410 | } | ||
| 1411 | |||
| 1412 | return ret; | ||
| 1413 | } | ||
| 1414 | |||
| 1415 | static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr, | ||
| 1416 | int len, const void *ptr) | ||
| 1417 | { | ||
| 1418 | struct openpic *opp = container_of(this, struct openpic, mmio); | ||
| 1419 | int ret; | ||
| 1420 | |||
| 1421 | if (len != 4) { | ||
| 1422 | pr_debug("%s: bad length %d\n", __func__, len); | ||
| 1423 | return -EOPNOTSUPP; | ||
| 1424 | } | ||
| 1425 | if (addr & 3) { | ||
| 1426 | pr_debug("%s: bad alignment %llx/%d\n", __func__, addr, len); | ||
| 1427 | return -EOPNOTSUPP; | ||
| 1428 | } | ||
| 1429 | |||
| 1430 | spin_lock_irq(&opp->lock); | ||
| 1431 | ret = kvm_mpic_write_internal(opp, addr - opp->reg_base, | ||
| 1432 | *(const u32 *)ptr); | ||
| 1433 | spin_unlock_irq(&opp->lock); | ||
| 1434 | |||
| 1435 | pr_debug("%s: addr %llx ret %d val %x\n", | ||
| 1436 | __func__, addr, ret, *(const u32 *)ptr); | ||
| 1437 | |||
| 1438 | return ret; | ||
| 1439 | } | ||
| 1440 | |||
| 1441 | static const struct kvm_io_device_ops mpic_mmio_ops = { | ||
| 1442 | .read = kvm_mpic_read, | ||
| 1443 | .write = kvm_mpic_write, | ||
| 1444 | }; | ||
| 1445 | |||
| 1446 | static void map_mmio(struct openpic *opp) | ||
| 1447 | { | ||
| 1448 | kvm_iodevice_init(&opp->mmio, &mpic_mmio_ops); | ||
| 1449 | |||
| 1450 | kvm_io_bus_register_dev(opp->kvm, KVM_MMIO_BUS, | ||
| 1451 | opp->reg_base, OPENPIC_REG_SIZE, | ||
| 1452 | &opp->mmio); | ||
| 1453 | } | ||
| 1454 | |||
| 1455 | static void unmap_mmio(struct openpic *opp) | ||
| 1456 | { | ||
| 1457 | kvm_io_bus_unregister_dev(opp->kvm, KVM_MMIO_BUS, &opp->mmio); | ||
| 1458 | } | ||
| 1459 | |||
| 1460 | static int set_base_addr(struct openpic *opp, struct kvm_device_attr *attr) | ||
| 1461 | { | ||
| 1462 | u64 base; | ||
| 1463 | |||
| 1464 | if (copy_from_user(&base, (u64 __user *)(long)attr->addr, sizeof(u64))) | ||
| 1465 | return -EFAULT; | ||
| 1466 | |||
| 1467 | if (base & 0x3ffff) { | ||
| 1468 | pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx not aligned\n", | ||
| 1469 | __func__, base); | ||
| 1470 | return -EINVAL; | ||
| 1471 | } | ||
| 1472 | |||
| 1473 | if (base == opp->reg_base) | ||
| 1474 | return 0; | ||
| 1475 | |||
| 1476 | mutex_lock(&opp->kvm->slots_lock); | ||
| 1477 | |||
| 1478 | unmap_mmio(opp); | ||
| 1479 | opp->reg_base = base; | ||
| 1480 | |||
| 1481 | pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx\n", | ||
| 1482 | __func__, base); | ||
| 1483 | |||
| 1484 | if (base == 0) | ||
| 1485 | goto out; | ||
| 1486 | |||
| 1487 | map_mmio(opp); | ||
| 1488 | |||
| 1489 | out: | ||
| 1490 | mutex_unlock(&opp->kvm->slots_lock); | ||
| 1491 | return 0; | ||
| 1492 | } | ||
| 1493 | |||
| 1494 | #define ATTR_SET 0 | ||
| 1495 | #define ATTR_GET 1 | ||
| 1496 | |||
| 1497 | static int access_reg(struct openpic *opp, gpa_t addr, u32 *val, int type) | ||
| 1498 | { | ||
| 1499 | int ret; | ||
| 1500 | |||
| 1501 | if (addr & 3) | ||
| 1502 | return -ENXIO; | ||
| 1503 | |||
| 1504 | spin_lock_irq(&opp->lock); | ||
| 1505 | |||
| 1506 | if (type == ATTR_SET) | ||
| 1507 | ret = kvm_mpic_write_internal(opp, addr, *val); | ||
| 1508 | else | ||
| 1509 | ret = kvm_mpic_read_internal(opp, addr, val); | ||
| 1510 | |||
| 1511 | spin_unlock_irq(&opp->lock); | ||
| 1512 | |||
| 1513 | pr_debug("%s: type %d addr %llx val %x\n", __func__, type, addr, *val); | ||
| 1514 | |||
| 1515 | return ret; | ||
| 1516 | } | ||
| 1517 | |||
| 1518 | static int mpic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | ||
| 1519 | { | ||
| 1520 | struct openpic *opp = dev->private; | ||
| 1521 | u32 attr32; | ||
| 1522 | |||
| 1523 | switch (attr->group) { | ||
| 1524 | case KVM_DEV_MPIC_GRP_MISC: | ||
| 1525 | switch (attr->attr) { | ||
| 1526 | case KVM_DEV_MPIC_BASE_ADDR: | ||
| 1527 | return set_base_addr(opp, attr); | ||
| 1528 | } | ||
| 1529 | |||
| 1530 | break; | ||
| 1531 | |||
| 1532 | case KVM_DEV_MPIC_GRP_REGISTER: | ||
| 1533 | if (get_user(attr32, (u32 __user *)(long)attr->addr)) | ||
| 1534 | return -EFAULT; | ||
| 1535 | |||
| 1536 | return access_reg(opp, attr->attr, &attr32, ATTR_SET); | ||
| 1537 | |||
| 1538 | case KVM_DEV_MPIC_GRP_IRQ_ACTIVE: | ||
| 1539 | if (attr->attr > MAX_SRC) | ||
| 1540 | return -EINVAL; | ||
| 1541 | |||
| 1542 | if (get_user(attr32, (u32 __user *)(long)attr->addr)) | ||
| 1543 | return -EFAULT; | ||
| 1544 | |||
| 1545 | if (attr32 != 0 && attr32 != 1) | ||
| 1546 | return -EINVAL; | ||
| 1547 | |||
| 1548 | spin_lock_irq(&opp->lock); | ||
| 1549 | openpic_set_irq(opp, attr->attr, attr32); | ||
| 1550 | spin_unlock_irq(&opp->lock); | ||
| 1551 | return 0; | ||
| 1552 | } | ||
| 1553 | |||
| 1554 | return -ENXIO; | ||
| 1555 | } | ||
| 1556 | |||
| 1557 | static int mpic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | ||
| 1558 | { | ||
| 1559 | struct openpic *opp = dev->private; | ||
| 1560 | u64 attr64; | ||
| 1561 | u32 attr32; | ||
| 1562 | int ret; | ||
| 1563 | |||
| 1564 | switch (attr->group) { | ||
| 1565 | case KVM_DEV_MPIC_GRP_MISC: | ||
| 1566 | switch (attr->attr) { | ||
| 1567 | case KVM_DEV_MPIC_BASE_ADDR: | ||
| 1568 | mutex_lock(&opp->kvm->slots_lock); | ||
| 1569 | attr64 = opp->reg_base; | ||
| 1570 | mutex_unlock(&opp->kvm->slots_lock); | ||
| 1571 | |||
| 1572 | if (copy_to_user((u64 __user *)(long)attr->addr, | ||
| 1573 | &attr64, sizeof(u64))) | ||
| 1574 | return -EFAULT; | ||
| 1575 | |||
| 1576 | return 0; | ||
| 1577 | } | ||
| 1578 | |||
| 1579 | break; | ||
| 1580 | |||
| 1581 | case KVM_DEV_MPIC_GRP_REGISTER: | ||
| 1582 | ret = access_reg(opp, attr->attr, &attr32, ATTR_GET); | ||
| 1583 | if (ret) | ||
| 1584 | return ret; | ||
| 1585 | |||
| 1586 | if (put_user(attr32, (u32 __user *)(long)attr->addr)) | ||
| 1587 | return -EFAULT; | ||
| 1588 | |||
| 1589 | return 0; | ||
| 1590 | |||
| 1591 | case KVM_DEV_MPIC_GRP_IRQ_ACTIVE: | ||
| 1592 | if (attr->attr > MAX_SRC) | ||
| 1593 | return -EINVAL; | ||
| 1594 | |||
| 1595 | spin_lock_irq(&opp->lock); | ||
| 1596 | attr32 = opp->src[attr->attr].pending; | ||
| 1597 | spin_unlock_irq(&opp->lock); | ||
| 1598 | |||
| 1599 | if (put_user(attr32, (u32 __user *)(long)attr->addr)) | ||
| 1600 | return -EFAULT; | ||
| 1601 | |||
| 1602 | return 0; | ||
| 1603 | } | ||
| 1604 | |||
| 1605 | return -ENXIO; | ||
| 1606 | } | ||
| 1607 | |||
| 1608 | static int mpic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | ||
| 1609 | { | ||
| 1610 | switch (attr->group) { | ||
| 1611 | case KVM_DEV_MPIC_GRP_MISC: | ||
| 1612 | switch (attr->attr) { | ||
| 1613 | case KVM_DEV_MPIC_BASE_ADDR: | ||
| 1614 | return 0; | ||
| 1615 | } | ||
| 1616 | |||
| 1617 | break; | ||
| 1618 | |||
| 1619 | case KVM_DEV_MPIC_GRP_REGISTER: | ||
| 1620 | return 0; | ||
| 1621 | |||
| 1622 | case KVM_DEV_MPIC_GRP_IRQ_ACTIVE: | ||
| 1623 | if (attr->attr > MAX_SRC) | ||
| 1624 | break; | ||
| 1625 | |||
| 1626 | return 0; | ||
| 1627 | } | ||
| 1628 | |||
| 1629 | return -ENXIO; | ||
| 1630 | } | ||
| 1631 | |||
| 1632 | static void mpic_destroy(struct kvm_device *dev) | ||
| 1633 | { | ||
| 1634 | struct openpic *opp = dev->private; | ||
| 1635 | |||
| 1636 | dev->kvm->arch.mpic = NULL; | ||
| 1637 | kfree(opp); | ||
| 1638 | } | ||
| 1639 | |||
| 1640 | static int mpic_set_default_irq_routing(struct openpic *opp) | ||
| 1641 | { | ||
| 1642 | struct kvm_irq_routing_entry *routing; | ||
| 1643 | |||
| 1644 | /* Create a nop default map, so that dereferencing it still works */ | ||
| 1645 | routing = kzalloc((sizeof(*routing)), GFP_KERNEL); | ||
| 1646 | if (!routing) | ||
| 1647 | return -ENOMEM; | ||
| 1648 | |||
| 1649 | kvm_set_irq_routing(opp->kvm, routing, 0, 0); | ||
| 1650 | |||
| 1651 | kfree(routing); | ||
| 1652 | return 0; | ||
| 1653 | } | ||
| 1654 | |||
| 1655 | static int mpic_create(struct kvm_device *dev, u32 type) | ||
| 1656 | { | ||
| 1657 | struct openpic *opp; | ||
| 1658 | int ret; | ||
| 1659 | |||
| 1660 | /* We only support one MPIC at a time for now */ | ||
| 1661 | if (dev->kvm->arch.mpic) | ||
| 1662 | return -EINVAL; | ||
| 1663 | |||
| 1664 | opp = kzalloc(sizeof(struct openpic), GFP_KERNEL); | ||
| 1665 | if (!opp) | ||
| 1666 | return -ENOMEM; | ||
| 1667 | |||
| 1668 | dev->private = opp; | ||
| 1669 | opp->kvm = dev->kvm; | ||
| 1670 | opp->dev = dev; | ||
| 1671 | opp->model = type; | ||
| 1672 | spin_lock_init(&opp->lock); | ||
| 1673 | |||
| 1674 | add_mmio_region(opp, &openpic_gbl_mmio); | ||
| 1675 | add_mmio_region(opp, &openpic_tmr_mmio); | ||
| 1676 | add_mmio_region(opp, &openpic_src_mmio); | ||
| 1677 | add_mmio_region(opp, &openpic_cpu_mmio); | ||
| 1678 | |||
| 1679 | switch (opp->model) { | ||
| 1680 | case KVM_DEV_TYPE_FSL_MPIC_20: | ||
| 1681 | opp->fsl = &fsl_mpic_20; | ||
| 1682 | opp->brr1 = 0x00400200; | ||
| 1683 | opp->flags |= OPENPIC_FLAG_IDR_CRIT; | ||
| 1684 | opp->nb_irqs = 80; | ||
| 1685 | opp->mpic_mode_mask = GCR_MODE_MIXED; | ||
| 1686 | |||
| 1687 | fsl_common_init(opp); | ||
| 1688 | |||
| 1689 | break; | ||
| 1690 | |||
| 1691 | case KVM_DEV_TYPE_FSL_MPIC_42: | ||
| 1692 | opp->fsl = &fsl_mpic_42; | ||
| 1693 | opp->brr1 = 0x00400402; | ||
| 1694 | opp->flags |= OPENPIC_FLAG_ILR; | ||
| 1695 | opp->nb_irqs = 196; | ||
| 1696 | opp->mpic_mode_mask = GCR_MODE_PROXY; | ||
| 1697 | |||
| 1698 | fsl_common_init(opp); | ||
| 1699 | |||
| 1700 | break; | ||
| 1701 | |||
| 1702 | default: | ||
| 1703 | ret = -ENODEV; | ||
| 1704 | goto err; | ||
| 1705 | } | ||
| 1706 | |||
| 1707 | ret = mpic_set_default_irq_routing(opp); | ||
| 1708 | if (ret) | ||
| 1709 | goto err; | ||
| 1710 | |||
| 1711 | openpic_reset(opp); | ||
| 1712 | |||
| 1713 | smp_wmb(); | ||
| 1714 | dev->kvm->arch.mpic = opp; | ||
| 1715 | |||
| 1716 | return 0; | ||
| 1717 | |||
| 1718 | err: | ||
| 1719 | kfree(opp); | ||
| 1720 | return ret; | ||
| 1721 | } | ||
| 1722 | |||
| 1723 | struct kvm_device_ops kvm_mpic_ops = { | ||
| 1724 | .name = "kvm-mpic", | ||
| 1725 | .create = mpic_create, | ||
| 1726 | .destroy = mpic_destroy, | ||
| 1727 | .set_attr = mpic_set_attr, | ||
| 1728 | .get_attr = mpic_get_attr, | ||
| 1729 | .has_attr = mpic_has_attr, | ||
| 1730 | }; | ||
| 1731 | |||
| 1732 | int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, | ||
| 1733 | u32 cpu) | ||
| 1734 | { | ||
| 1735 | struct openpic *opp = dev->private; | ||
| 1736 | int ret = 0; | ||
| 1737 | |||
| 1738 | if (dev->ops != &kvm_mpic_ops) | ||
| 1739 | return -EPERM; | ||
| 1740 | if (opp->kvm != vcpu->kvm) | ||
| 1741 | return -EPERM; | ||
| 1742 | if (cpu < 0 || cpu >= MAX_CPU) | ||
| 1743 | return -EPERM; | ||
| 1744 | |||
| 1745 | spin_lock_irq(&opp->lock); | ||
| 1746 | |||
| 1747 | if (opp->dst[cpu].vcpu) { | ||
| 1748 | ret = -EEXIST; | ||
| 1749 | goto out; | ||
| 1750 | } | ||
| 1751 | if (vcpu->arch.irq_type) { | ||
| 1752 | ret = -EBUSY; | ||
| 1753 | goto out; | ||
| 1754 | } | ||
| 1755 | |||
| 1756 | opp->dst[cpu].vcpu = vcpu; | ||
| 1757 | opp->nb_cpus = max(opp->nb_cpus, cpu + 1); | ||
| 1758 | |||
| 1759 | vcpu->arch.mpic = opp; | ||
| 1760 | vcpu->arch.irq_cpu_id = cpu; | ||
| 1761 | vcpu->arch.irq_type = KVMPPC_IRQ_MPIC; | ||
| 1762 | |||
| 1763 | /* This might need to be changed if GCR gets extended */ | ||
| 1764 | if (opp->mpic_mode_mask == GCR_MODE_PROXY) | ||
| 1765 | vcpu->arch.epr_flags |= KVMPPC_EPR_KERNEL; | ||
| 1766 | |||
| 1767 | out: | ||
| 1768 | spin_unlock_irq(&opp->lock); | ||
| 1769 | return ret; | ||
| 1770 | } | ||
| 1771 | |||
| 1772 | /* | ||
| 1773 | * This should only happen immediately before the mpic is destroyed, | ||
| 1774 | * so we shouldn't need to worry about anything still trying to | ||
| 1775 | * access the vcpu pointer. | ||
| 1776 | */ | ||
| 1777 | void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu) | ||
| 1778 | { | ||
| 1779 | BUG_ON(!opp->dst[vcpu->arch.irq_cpu_id].vcpu); | ||
| 1780 | |||
| 1781 | opp->dst[vcpu->arch.irq_cpu_id].vcpu = NULL; | ||
| 1782 | } | ||
| 1783 | |||
| 1784 | /* | ||
| 1785 | * Return value: | ||
| 1786 | * < 0 Interrupt was ignored (masked or not delivered for other reasons) | ||
| 1787 | * = 0 Interrupt was coalesced (previous irq is still pending) | ||
| 1788 | * > 0 Number of CPUs interrupt was delivered to | ||
| 1789 | */ | ||
| 1790 | static int mpic_set_irq(struct kvm_kernel_irq_routing_entry *e, | ||
| 1791 | struct kvm *kvm, int irq_source_id, int level, | ||
| 1792 | bool line_status) | ||
| 1793 | { | ||
| 1794 | u32 irq = e->irqchip.pin; | ||
| 1795 | struct openpic *opp = kvm->arch.mpic; | ||
| 1796 | unsigned long flags; | ||
| 1797 | |||
| 1798 | spin_lock_irqsave(&opp->lock, flags); | ||
| 1799 | openpic_set_irq(opp, irq, level); | ||
| 1800 | spin_unlock_irqrestore(&opp->lock, flags); | ||
| 1801 | |||
| 1802 | /* All code paths we care about don't check for the return value */ | ||
| 1803 | return 0; | ||
| 1804 | } | ||
| 1805 | |||
| 1806 | int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | ||
| 1807 | struct kvm *kvm, int irq_source_id, int level, bool line_status) | ||
| 1808 | { | ||
| 1809 | struct openpic *opp = kvm->arch.mpic; | ||
| 1810 | unsigned long flags; | ||
| 1811 | |||
| 1812 | spin_lock_irqsave(&opp->lock, flags); | ||
| 1813 | |||
| 1814 | /* | ||
| 1815 | * XXX We ignore the target address for now, as we only support | ||
| 1816 | * a single MSI bank. | ||
| 1817 | */ | ||
| 1818 | openpic_msi_write(kvm->arch.mpic, MSIIR_OFFSET, e->msi.data); | ||
| 1819 | spin_unlock_irqrestore(&opp->lock, flags); | ||
| 1820 | |||
| 1821 | /* All code paths we care about don't check for the return value */ | ||
| 1822 | return 0; | ||
| 1823 | } | ||
| 1824 | |||
| 1825 | int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, | ||
| 1826 | struct kvm_kernel_irq_routing_entry *e, | ||
| 1827 | const struct kvm_irq_routing_entry *ue) | ||
| 1828 | { | ||
| 1829 | int r = -EINVAL; | ||
| 1830 | |||
| 1831 | switch (ue->type) { | ||
| 1832 | case KVM_IRQ_ROUTING_IRQCHIP: | ||
| 1833 | e->set = mpic_set_irq; | ||
| 1834 | e->irqchip.irqchip = ue->u.irqchip.irqchip; | ||
| 1835 | e->irqchip.pin = ue->u.irqchip.pin; | ||
| 1836 | if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) | ||
| 1837 | goto out; | ||
| 1838 | rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; | ||
| 1839 | break; | ||
| 1840 | case KVM_IRQ_ROUTING_MSI: | ||
| 1841 | e->set = kvm_set_msi; | ||
| 1842 | e->msi.address_lo = ue->u.msi.address_lo; | ||
| 1843 | e->msi.address_hi = ue->u.msi.address_hi; | ||
| 1844 | e->msi.data = ue->u.msi.data; | ||
| 1845 | break; | ||
| 1846 | default: | ||
| 1847 | goto out; | ||
| 1848 | } | ||
| 1849 | |||
| 1850 | r = 0; | ||
| 1851 | out: | ||
| 1852 | return r; | ||
| 1853 | } | ||
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 934413cd3a1b..6316ee336e88 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | #include <linux/hrtimer.h> | 25 | #include <linux/hrtimer.h> |
| 26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
| 27 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
| 28 | #include <linux/file.h> | ||
| 28 | #include <asm/cputable.h> | 29 | #include <asm/cputable.h> |
| 29 | #include <asm/uaccess.h> | 30 | #include <asm/uaccess.h> |
| 30 | #include <asm/kvm_ppc.h> | 31 | #include <asm/kvm_ppc.h> |
| @@ -32,6 +33,7 @@ | |||
| 32 | #include <asm/cputhreads.h> | 33 | #include <asm/cputhreads.h> |
| 33 | #include <asm/irqflags.h> | 34 | #include <asm/irqflags.h> |
| 34 | #include "timing.h" | 35 | #include "timing.h" |
| 36 | #include "irq.h" | ||
| 35 | #include "../mm/mmu_decl.h" | 37 | #include "../mm/mmu_decl.h" |
| 36 | 38 | ||
| 37 | #define CREATE_TRACE_POINTS | 39 | #define CREATE_TRACE_POINTS |
| @@ -317,6 +319,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 317 | case KVM_CAP_ENABLE_CAP: | 319 | case KVM_CAP_ENABLE_CAP: |
| 318 | case KVM_CAP_ONE_REG: | 320 | case KVM_CAP_ONE_REG: |
| 319 | case KVM_CAP_IOEVENTFD: | 321 | case KVM_CAP_IOEVENTFD: |
| 322 | case KVM_CAP_DEVICE_CTRL: | ||
| 320 | r = 1; | 323 | r = 1; |
| 321 | break; | 324 | break; |
| 322 | #ifndef CONFIG_KVM_BOOK3S_64_HV | 325 | #ifndef CONFIG_KVM_BOOK3S_64_HV |
| @@ -326,6 +329,9 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 326 | #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) | 329 | #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) |
| 327 | case KVM_CAP_SW_TLB: | 330 | case KVM_CAP_SW_TLB: |
| 328 | #endif | 331 | #endif |
| 332 | #ifdef CONFIG_KVM_MPIC | ||
| 333 | case KVM_CAP_IRQ_MPIC: | ||
| 334 | #endif | ||
| 329 | r = 1; | 335 | r = 1; |
| 330 | break; | 336 | break; |
| 331 | case KVM_CAP_COALESCED_MMIO: | 337 | case KVM_CAP_COALESCED_MMIO: |
| @@ -335,6 +341,10 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 335 | #ifdef CONFIG_PPC_BOOK3S_64 | 341 | #ifdef CONFIG_PPC_BOOK3S_64 |
| 336 | case KVM_CAP_SPAPR_TCE: | 342 | case KVM_CAP_SPAPR_TCE: |
| 337 | case KVM_CAP_PPC_ALLOC_HTAB: | 343 | case KVM_CAP_PPC_ALLOC_HTAB: |
| 344 | case KVM_CAP_PPC_RTAS: | ||
| 345 | #ifdef CONFIG_KVM_XICS | ||
| 346 | case KVM_CAP_IRQ_XICS: | ||
| 347 | #endif | ||
| 338 | r = 1; | 348 | r = 1; |
| 339 | break; | 349 | break; |
| 340 | #endif /* CONFIG_PPC_BOOK3S_64 */ | 350 | #endif /* CONFIG_PPC_BOOK3S_64 */ |
| @@ -411,18 +421,17 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | |||
| 411 | } | 421 | } |
| 412 | 422 | ||
| 413 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 423 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
| 414 | struct kvm_memory_slot *memslot, | 424 | struct kvm_memory_slot *memslot, |
| 415 | struct kvm_memory_slot old, | 425 | struct kvm_userspace_memory_region *mem, |
| 416 | struct kvm_userspace_memory_region *mem, | 426 | enum kvm_mr_change change) |
| 417 | bool user_alloc) | ||
| 418 | { | 427 | { |
| 419 | return kvmppc_core_prepare_memory_region(kvm, memslot, mem); | 428 | return kvmppc_core_prepare_memory_region(kvm, memslot, mem); |
| 420 | } | 429 | } |
| 421 | 430 | ||
| 422 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 431 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
| 423 | struct kvm_userspace_memory_region *mem, | 432 | struct kvm_userspace_memory_region *mem, |
| 424 | struct kvm_memory_slot old, | 433 | const struct kvm_memory_slot *old, |
| 425 | bool user_alloc) | 434 | enum kvm_mr_change change) |
| 426 | { | 435 | { |
| 427 | kvmppc_core_commit_memory_region(kvm, mem, old); | 436 | kvmppc_core_commit_memory_region(kvm, mem, old); |
| 428 | } | 437 | } |
| @@ -460,6 +469,16 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | |||
| 460 | tasklet_kill(&vcpu->arch.tasklet); | 469 | tasklet_kill(&vcpu->arch.tasklet); |
| 461 | 470 | ||
| 462 | kvmppc_remove_vcpu_debugfs(vcpu); | 471 | kvmppc_remove_vcpu_debugfs(vcpu); |
| 472 | |||
| 473 | switch (vcpu->arch.irq_type) { | ||
| 474 | case KVMPPC_IRQ_MPIC: | ||
| 475 | kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu); | ||
| 476 | break; | ||
| 477 | case KVMPPC_IRQ_XICS: | ||
| 478 | kvmppc_xics_free_icp(vcpu); | ||
| 479 | break; | ||
| 480 | } | ||
| 481 | |||
| 463 | kvmppc_core_vcpu_free(vcpu); | 482 | kvmppc_core_vcpu_free(vcpu); |
| 464 | } | 483 | } |
| 465 | 484 | ||
| @@ -532,12 +551,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | |||
| 532 | #endif | 551 | #endif |
| 533 | } | 552 | } |
| 534 | 553 | ||
| 535 | int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | ||
| 536 | struct kvm_guest_debug *dbg) | ||
| 537 | { | ||
| 538 | return -EINVAL; | ||
| 539 | } | ||
| 540 | |||
| 541 | static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, | 554 | static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, |
| 542 | struct kvm_run *run) | 555 | struct kvm_run *run) |
| 543 | { | 556 | { |
| @@ -612,6 +625,8 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, | |||
| 612 | int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | 625 | int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, |
| 613 | unsigned int rt, unsigned int bytes, int is_bigendian) | 626 | unsigned int rt, unsigned int bytes, int is_bigendian) |
| 614 | { | 627 | { |
| 628 | int idx, ret; | ||
| 629 | |||
| 615 | if (bytes > sizeof(run->mmio.data)) { | 630 | if (bytes > sizeof(run->mmio.data)) { |
| 616 | printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, | 631 | printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, |
| 617 | run->mmio.len); | 632 | run->mmio.len); |
| @@ -627,8 +642,14 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 627 | vcpu->mmio_is_write = 0; | 642 | vcpu->mmio_is_write = 0; |
| 628 | vcpu->arch.mmio_sign_extend = 0; | 643 | vcpu->arch.mmio_sign_extend = 0; |
| 629 | 644 | ||
| 630 | if (!kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, | 645 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
| 631 | bytes, &run->mmio.data)) { | 646 | |
| 647 | ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, | ||
| 648 | bytes, &run->mmio.data); | ||
| 649 | |||
| 650 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | ||
| 651 | |||
| 652 | if (!ret) { | ||
| 632 | kvmppc_complete_mmio_load(vcpu, run); | 653 | kvmppc_complete_mmio_load(vcpu, run); |
| 633 | vcpu->mmio_needed = 0; | 654 | vcpu->mmio_needed = 0; |
| 634 | return EMULATE_DONE; | 655 | return EMULATE_DONE; |
| @@ -653,6 +674,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 653 | u64 val, unsigned int bytes, int is_bigendian) | 674 | u64 val, unsigned int bytes, int is_bigendian) |
| 654 | { | 675 | { |
| 655 | void *data = run->mmio.data; | 676 | void *data = run->mmio.data; |
| 677 | int idx, ret; | ||
| 656 | 678 | ||
| 657 | if (bytes > sizeof(run->mmio.data)) { | 679 | if (bytes > sizeof(run->mmio.data)) { |
| 658 | printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, | 680 | printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, |
| @@ -682,9 +704,14 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 682 | } | 704 | } |
| 683 | } | 705 | } |
| 684 | 706 | ||
| 685 | if (!kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, | 707 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
| 686 | bytes, &run->mmio.data)) { | 708 | |
| 687 | kvmppc_complete_mmio_load(vcpu, run); | 709 | ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, |
| 710 | bytes, &run->mmio.data); | ||
| 711 | |||
| 712 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | ||
| 713 | |||
| 714 | if (!ret) { | ||
| 688 | vcpu->mmio_needed = 0; | 715 | vcpu->mmio_needed = 0; |
| 689 | return EMULATE_DONE; | 716 | return EMULATE_DONE; |
| 690 | } | 717 | } |
| @@ -740,7 +767,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
| 740 | int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) | 767 | int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) |
| 741 | { | 768 | { |
| 742 | if (irq->irq == KVM_INTERRUPT_UNSET) { | 769 | if (irq->irq == KVM_INTERRUPT_UNSET) { |
| 743 | kvmppc_core_dequeue_external(vcpu, irq); | 770 | kvmppc_core_dequeue_external(vcpu); |
| 744 | return 0; | 771 | return 0; |
| 745 | } | 772 | } |
| 746 | 773 | ||
| @@ -770,7 +797,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, | |||
| 770 | break; | 797 | break; |
| 771 | case KVM_CAP_PPC_EPR: | 798 | case KVM_CAP_PPC_EPR: |
| 772 | r = 0; | 799 | r = 0; |
| 773 | vcpu->arch.epr_enabled = cap->args[0]; | 800 | if (cap->args[0]) |
| 801 | vcpu->arch.epr_flags |= KVMPPC_EPR_USER; | ||
| 802 | else | ||
| 803 | vcpu->arch.epr_flags &= ~KVMPPC_EPR_USER; | ||
| 774 | break; | 804 | break; |
| 775 | #ifdef CONFIG_BOOKE | 805 | #ifdef CONFIG_BOOKE |
| 776 | case KVM_CAP_PPC_BOOKE_WATCHDOG: | 806 | case KVM_CAP_PPC_BOOKE_WATCHDOG: |
| @@ -791,6 +821,44 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, | |||
| 791 | break; | 821 | break; |
| 792 | } | 822 | } |
| 793 | #endif | 823 | #endif |
| 824 | #ifdef CONFIG_KVM_MPIC | ||
| 825 | case KVM_CAP_IRQ_MPIC: { | ||
| 826 | struct file *filp; | ||
| 827 | struct kvm_device *dev; | ||
| 828 | |||
| 829 | r = -EBADF; | ||
| 830 | filp = fget(cap->args[0]); | ||
| 831 | if (!filp) | ||
| 832 | break; | ||
| 833 | |||
| 834 | r = -EPERM; | ||
| 835 | dev = kvm_device_from_filp(filp); | ||
| 836 | if (dev) | ||
| 837 | r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]); | ||
| 838 | |||
| 839 | fput(filp); | ||
| 840 | break; | ||
| 841 | } | ||
| 842 | #endif | ||
| 843 | #ifdef CONFIG_KVM_XICS | ||
| 844 | case KVM_CAP_IRQ_XICS: { | ||
| 845 | struct file *filp; | ||
| 846 | struct kvm_device *dev; | ||
| 847 | |||
| 848 | r = -EBADF; | ||
| 849 | filp = fget(cap->args[0]); | ||
| 850 | if (!filp) | ||
| 851 | break; | ||
| 852 | |||
| 853 | r = -EPERM; | ||
| 854 | dev = kvm_device_from_filp(filp); | ||
| 855 | if (dev) | ||
| 856 | r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]); | ||
| 857 | |||
| 858 | fput(filp); | ||
| 859 | break; | ||
| 860 | } | ||
| 861 | #endif /* CONFIG_KVM_XICS */ | ||
| 794 | default: | 862 | default: |
| 795 | r = -EINVAL; | 863 | r = -EINVAL; |
| 796 | break; | 864 | break; |
| @@ -913,9 +981,22 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) | |||
| 913 | return 0; | 981 | return 0; |
| 914 | } | 982 | } |
| 915 | 983 | ||
| 984 | int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, | ||
| 985 | bool line_status) | ||
| 986 | { | ||
| 987 | if (!irqchip_in_kernel(kvm)) | ||
| 988 | return -ENXIO; | ||
| 989 | |||
| 990 | irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, | ||
| 991 | irq_event->irq, irq_event->level, | ||
| 992 | line_status); | ||
| 993 | return 0; | ||
| 994 | } | ||
| 995 | |||
| 916 | long kvm_arch_vm_ioctl(struct file *filp, | 996 | long kvm_arch_vm_ioctl(struct file *filp, |
| 917 | unsigned int ioctl, unsigned long arg) | 997 | unsigned int ioctl, unsigned long arg) |
| 918 | { | 998 | { |
| 999 | struct kvm *kvm __maybe_unused = filp->private_data; | ||
| 919 | void __user *argp = (void __user *)arg; | 1000 | void __user *argp = (void __user *)arg; |
| 920 | long r; | 1001 | long r; |
| 921 | 1002 | ||
| @@ -934,7 +1015,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 934 | #ifdef CONFIG_PPC_BOOK3S_64 | 1015 | #ifdef CONFIG_PPC_BOOK3S_64 |
| 935 | case KVM_CREATE_SPAPR_TCE: { | 1016 | case KVM_CREATE_SPAPR_TCE: { |
| 936 | struct kvm_create_spapr_tce create_tce; | 1017 | struct kvm_create_spapr_tce create_tce; |
| 937 | struct kvm *kvm = filp->private_data; | ||
| 938 | 1018 | ||
| 939 | r = -EFAULT; | 1019 | r = -EFAULT; |
| 940 | if (copy_from_user(&create_tce, argp, sizeof(create_tce))) | 1020 | if (copy_from_user(&create_tce, argp, sizeof(create_tce))) |
| @@ -946,8 +1026,8 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 946 | 1026 | ||
| 947 | #ifdef CONFIG_KVM_BOOK3S_64_HV | 1027 | #ifdef CONFIG_KVM_BOOK3S_64_HV |
| 948 | case KVM_ALLOCATE_RMA: { | 1028 | case KVM_ALLOCATE_RMA: { |
| 949 | struct kvm *kvm = filp->private_data; | ||
| 950 | struct kvm_allocate_rma rma; | 1029 | struct kvm_allocate_rma rma; |
| 1030 | struct kvm *kvm = filp->private_data; | ||
| 951 | 1031 | ||
| 952 | r = kvm_vm_ioctl_allocate_rma(kvm, &rma); | 1032 | r = kvm_vm_ioctl_allocate_rma(kvm, &rma); |
| 953 | if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma))) | 1033 | if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma))) |
| @@ -956,7 +1036,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 956 | } | 1036 | } |
| 957 | 1037 | ||
| 958 | case KVM_PPC_ALLOCATE_HTAB: { | 1038 | case KVM_PPC_ALLOCATE_HTAB: { |
| 959 | struct kvm *kvm = filp->private_data; | ||
| 960 | u32 htab_order; | 1039 | u32 htab_order; |
| 961 | 1040 | ||
| 962 | r = -EFAULT; | 1041 | r = -EFAULT; |
| @@ -973,7 +1052,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 973 | } | 1052 | } |
| 974 | 1053 | ||
| 975 | case KVM_PPC_GET_HTAB_FD: { | 1054 | case KVM_PPC_GET_HTAB_FD: { |
| 976 | struct kvm *kvm = filp->private_data; | ||
| 977 | struct kvm_get_htab_fd ghf; | 1055 | struct kvm_get_htab_fd ghf; |
| 978 | 1056 | ||
| 979 | r = -EFAULT; | 1057 | r = -EFAULT; |
| @@ -986,7 +1064,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 986 | 1064 | ||
| 987 | #ifdef CONFIG_PPC_BOOK3S_64 | 1065 | #ifdef CONFIG_PPC_BOOK3S_64 |
| 988 | case KVM_PPC_GET_SMMU_INFO: { | 1066 | case KVM_PPC_GET_SMMU_INFO: { |
| 989 | struct kvm *kvm = filp->private_data; | ||
| 990 | struct kvm_ppc_smmu_info info; | 1067 | struct kvm_ppc_smmu_info info; |
| 991 | 1068 | ||
| 992 | memset(&info, 0, sizeof(info)); | 1069 | memset(&info, 0, sizeof(info)); |
| @@ -995,6 +1072,12 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 995 | r = -EFAULT; | 1072 | r = -EFAULT; |
| 996 | break; | 1073 | break; |
| 997 | } | 1074 | } |
| 1075 | case KVM_PPC_RTAS_DEFINE_TOKEN: { | ||
| 1076 | struct kvm *kvm = filp->private_data; | ||
| 1077 | |||
| 1078 | r = kvm_vm_ioctl_rtas_define_token(kvm, argp); | ||
| 1079 | break; | ||
| 1080 | } | ||
| 998 | #endif /* CONFIG_PPC_BOOK3S_64 */ | 1081 | #endif /* CONFIG_PPC_BOOK3S_64 */ |
| 999 | default: | 1082 | default: |
| 1000 | r = -ENOTTY; | 1083 | r = -ENOTTY; |
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index 89db29d17c25..7cd728b3b5e4 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c | |||
| @@ -51,6 +51,12 @@ static struct icp_ipl __iomem *icp_native_regs[NR_CPUS]; | |||
| 51 | static inline unsigned int icp_native_get_xirr(void) | 51 | static inline unsigned int icp_native_get_xirr(void) |
| 52 | { | 52 | { |
| 53 | int cpu = smp_processor_id(); | 53 | int cpu = smp_processor_id(); |
| 54 | unsigned int xirr; | ||
| 55 | |||
| 56 | /* Handled an interrupt latched by KVM */ | ||
| 57 | xirr = kvmppc_get_xics_latch(); | ||
| 58 | if (xirr) | ||
| 59 | return xirr; | ||
| 54 | 60 | ||
| 55 | return in_be32(&icp_native_regs[cpu]->xirr.word); | 61 | return in_be32(&icp_native_regs[cpu]->xirr.word); |
| 56 | } | 62 | } |
| @@ -138,6 +144,7 @@ static unsigned int icp_native_get_irq(void) | |||
| 138 | 144 | ||
| 139 | static void icp_native_cause_ipi(int cpu, unsigned long data) | 145 | static void icp_native_cause_ipi(int cpu, unsigned long data) |
| 140 | { | 146 | { |
| 147 | kvmppc_set_host_ipi(cpu, 1); | ||
| 141 | icp_native_set_qirr(cpu, IPI_PRIORITY); | 148 | icp_native_set_qirr(cpu, IPI_PRIORITY); |
| 142 | } | 149 | } |
| 143 | 150 | ||
| @@ -151,6 +158,7 @@ static irqreturn_t icp_native_ipi_action(int irq, void *dev_id) | |||
| 151 | { | 158 | { |
| 152 | int cpu = smp_processor_id(); | 159 | int cpu = smp_processor_id(); |
| 153 | 160 | ||
| 161 | kvmppc_set_host_ipi(cpu, 0); | ||
| 154 | icp_native_set_qirr(cpu, 0xff); | 162 | icp_native_set_qirr(cpu, 0xff); |
| 155 | 163 | ||
| 156 | return smp_ipi_demux(); | 164 | return smp_ipi_demux(); |
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild index 7bf68fff7c5d..9ccd1905bdad 100644 --- a/arch/s390/include/uapi/asm/Kbuild +++ b/arch/s390/include/uapi/asm/Kbuild | |||
| @@ -44,5 +44,6 @@ header-y += termios.h | |||
| 44 | header-y += types.h | 44 | header-y += types.h |
| 45 | header-y += ucontext.h | 45 | header-y += ucontext.h |
| 46 | header-y += unistd.h | 46 | header-y += unistd.h |
| 47 | header-y += virtio-ccw.h | ||
| 47 | header-y += vtoc.h | 48 | header-y += vtoc.h |
| 48 | header-y += zcrypt.h | 49 | header-y += zcrypt.h |
diff --git a/arch/s390/include/uapi/asm/virtio-ccw.h b/arch/s390/include/uapi/asm/virtio-ccw.h new file mode 100644 index 000000000000..a9a4ebf79fa7 --- /dev/null +++ b/arch/s390/include/uapi/asm/virtio-ccw.h | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | /* | ||
| 2 | * Definitions for virtio-ccw devices. | ||
| 3 | * | ||
| 4 | * Copyright IBM Corp. 2013 | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify | ||
| 7 | * it under the terms of the GNU General Public License (version 2 only) | ||
| 8 | * as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> | ||
| 11 | */ | ||
| 12 | #ifndef __KVM_VIRTIO_CCW_H | ||
| 13 | #define __KVM_VIRTIO_CCW_H | ||
| 14 | |||
| 15 | /* Alignment of vring buffers. */ | ||
| 16 | #define KVM_VIRTIO_CCW_RING_ALIGN 4096 | ||
| 17 | |||
| 18 | /* Subcode for diagnose 500 (virtio hypercall). */ | ||
| 19 | #define KVM_S390_VIRTIO_CCW_NOTIFY 3 | ||
| 20 | |||
| 21 | #endif | ||
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 60f9f8ae0fc8..70b46eacf8e1 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig | |||
| @@ -22,6 +22,7 @@ config KVM | |||
| 22 | select PREEMPT_NOTIFIERS | 22 | select PREEMPT_NOTIFIERS |
| 23 | select ANON_INODES | 23 | select ANON_INODES |
| 24 | select HAVE_KVM_CPU_RELAX_INTERCEPT | 24 | select HAVE_KVM_CPU_RELAX_INTERCEPT |
| 25 | select HAVE_KVM_EVENTFD | ||
| 25 | ---help--- | 26 | ---help--- |
| 26 | Support hosting paravirtualized guest machines using the SIE | 27 | Support hosting paravirtualized guest machines using the SIE |
| 27 | virtualization capability on the mainframe. This should work | 28 | virtualization capability on the mainframe. This should work |
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 3975722bb19d..8fe9d65a4585 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile | |||
| @@ -6,7 +6,7 @@ | |||
| 6 | # it under the terms of the GNU General Public License (version 2 only) | 6 | # it under the terms of the GNU General Public License (version 2 only) |
| 7 | # as published by the Free Software Foundation. | 7 | # as published by the Free Software Foundation. |
| 8 | 8 | ||
| 9 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o) | 9 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o) |
| 10 | 10 | ||
| 11 | ccflags-y := -Ivirt/kvm -Iarch/s390/kvm | 11 | ccflags-y := -Ivirt/kvm -Iarch/s390/kvm |
| 12 | 12 | ||
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index a390687feb13..1c01a9912989 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | 13 | ||
| 14 | #include <linux/kvm.h> | 14 | #include <linux/kvm.h> |
| 15 | #include <linux/kvm_host.h> | 15 | #include <linux/kvm_host.h> |
| 16 | #include <asm/virtio-ccw.h> | ||
| 16 | #include "kvm-s390.h" | 17 | #include "kvm-s390.h" |
| 17 | #include "trace.h" | 18 | #include "trace.h" |
| 18 | #include "trace-s390.h" | 19 | #include "trace-s390.h" |
| @@ -104,6 +105,29 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) | |||
| 104 | return -EREMOTE; | 105 | return -EREMOTE; |
| 105 | } | 106 | } |
| 106 | 107 | ||
| 108 | static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) | ||
| 109 | { | ||
| 110 | int ret, idx; | ||
| 111 | |||
| 112 | /* No virtio-ccw notification? Get out quickly. */ | ||
| 113 | if (!vcpu->kvm->arch.css_support || | ||
| 114 | (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) | ||
| 115 | return -EOPNOTSUPP; | ||
| 116 | |||
| 117 | idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
| 118 | /* | ||
| 119 | * The layout is as follows: | ||
| 120 | * - gpr 2 contains the subchannel id (passed as addr) | ||
| 121 | * - gpr 3 contains the virtqueue index (passed as datamatch) | ||
| 122 | */ | ||
| 123 | ret = kvm_io_bus_write(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, | ||
| 124 | vcpu->run->s.regs.gprs[2], | ||
| 125 | 8, &vcpu->run->s.regs.gprs[3]); | ||
| 126 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | ||
| 127 | /* kvm_io_bus_write returns -EOPNOTSUPP if it found no match. */ | ||
| 128 | return ret < 0 ? ret : 0; | ||
| 129 | } | ||
| 130 | |||
| 107 | int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) | 131 | int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) |
| 108 | { | 132 | { |
| 109 | int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; | 133 | int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; |
| @@ -118,6 +142,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) | |||
| 118 | return __diag_time_slice_end_directed(vcpu); | 142 | return __diag_time_slice_end_directed(vcpu); |
| 119 | case 0x308: | 143 | case 0x308: |
| 120 | return __diag_ipl_functions(vcpu); | 144 | return __diag_ipl_functions(vcpu); |
| 145 | case 0x500: | ||
| 146 | return __diag_virtio_hypercall(vcpu); | ||
| 121 | default: | 147 | default: |
| 122 | return -EOPNOTSUPP; | 148 | return -EOPNOTSUPP; |
| 123 | } | 149 | } |
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 4703f129e95e..302e0e52b009 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h | |||
| @@ -18,369 +18,86 @@ | |||
| 18 | #include <asm/uaccess.h> | 18 | #include <asm/uaccess.h> |
| 19 | #include "kvm-s390.h" | 19 | #include "kvm-s390.h" |
| 20 | 20 | ||
| 21 | static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu, | 21 | static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu, |
| 22 | unsigned long guestaddr) | 22 | void __user *gptr, |
| 23 | int prefixing) | ||
| 23 | { | 24 | { |
| 24 | unsigned long prefix = vcpu->arch.sie_block->prefix; | 25 | unsigned long prefix = vcpu->arch.sie_block->prefix; |
| 25 | 26 | unsigned long gaddr = (unsigned long) gptr; | |
| 26 | if (guestaddr < 2 * PAGE_SIZE) | 27 | unsigned long uaddr; |
| 27 | guestaddr += prefix; | 28 | |
| 28 | else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE)) | 29 | if (prefixing) { |
| 29 | guestaddr -= prefix; | 30 | if (gaddr < 2 * PAGE_SIZE) |
| 30 | 31 | gaddr += prefix; | |
| 31 | return (void __user *) gmap_fault(guestaddr, vcpu->arch.gmap); | 32 | else if ((gaddr >= prefix) && (gaddr < prefix + 2 * PAGE_SIZE)) |
| 32 | } | 33 | gaddr -= prefix; |
| 33 | |||
| 34 | static inline int get_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr, | ||
| 35 | u64 *result) | ||
| 36 | { | ||
| 37 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
| 38 | |||
| 39 | BUG_ON(guestaddr & 7); | ||
| 40 | |||
| 41 | if (IS_ERR((void __force *) uptr)) | ||
| 42 | return PTR_ERR((void __force *) uptr); | ||
| 43 | |||
| 44 | return get_user(*result, (unsigned long __user *) uptr); | ||
| 45 | } | ||
| 46 | |||
| 47 | static inline int get_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr, | ||
| 48 | u32 *result) | ||
| 49 | { | ||
| 50 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
| 51 | |||
| 52 | BUG_ON(guestaddr & 3); | ||
| 53 | |||
| 54 | if (IS_ERR((void __force *) uptr)) | ||
| 55 | return PTR_ERR((void __force *) uptr); | ||
| 56 | |||
| 57 | return get_user(*result, (u32 __user *) uptr); | ||
| 58 | } | ||
| 59 | |||
| 60 | static inline int get_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr, | ||
| 61 | u16 *result) | ||
| 62 | { | ||
| 63 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
| 64 | |||
| 65 | BUG_ON(guestaddr & 1); | ||
| 66 | |||
| 67 | if (IS_ERR(uptr)) | ||
| 68 | return PTR_ERR(uptr); | ||
| 69 | |||
| 70 | return get_user(*result, (u16 __user *) uptr); | ||
| 71 | } | ||
| 72 | |||
| 73 | static inline int get_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr, | ||
| 74 | u8 *result) | ||
| 75 | { | ||
| 76 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
| 77 | |||
| 78 | if (IS_ERR((void __force *) uptr)) | ||
| 79 | return PTR_ERR((void __force *) uptr); | ||
| 80 | |||
| 81 | return get_user(*result, (u8 __user *) uptr); | ||
| 82 | } | ||
| 83 | |||
| 84 | static inline int put_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr, | ||
| 85 | u64 value) | ||
| 86 | { | ||
| 87 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
| 88 | |||
| 89 | BUG_ON(guestaddr & 7); | ||
| 90 | |||
| 91 | if (IS_ERR((void __force *) uptr)) | ||
| 92 | return PTR_ERR((void __force *) uptr); | ||
| 93 | |||
| 94 | return put_user(value, (u64 __user *) uptr); | ||
| 95 | } | ||
| 96 | |||
| 97 | static inline int put_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr, | ||
| 98 | u32 value) | ||
| 99 | { | ||
| 100 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
| 101 | |||
| 102 | BUG_ON(guestaddr & 3); | ||
| 103 | |||
| 104 | if (IS_ERR((void __force *) uptr)) | ||
| 105 | return PTR_ERR((void __force *) uptr); | ||
| 106 | |||
| 107 | return put_user(value, (u32 __user *) uptr); | ||
| 108 | } | ||
| 109 | |||
| 110 | static inline int put_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr, | ||
| 111 | u16 value) | ||
| 112 | { | ||
| 113 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
| 114 | |||
| 115 | BUG_ON(guestaddr & 1); | ||
| 116 | |||
| 117 | if (IS_ERR((void __force *) uptr)) | ||
| 118 | return PTR_ERR((void __force *) uptr); | ||
| 119 | |||
| 120 | return put_user(value, (u16 __user *) uptr); | ||
| 121 | } | ||
| 122 | |||
| 123 | static inline int put_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr, | ||
| 124 | u8 value) | ||
| 125 | { | ||
| 126 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
| 127 | |||
| 128 | if (IS_ERR((void __force *) uptr)) | ||
| 129 | return PTR_ERR((void __force *) uptr); | ||
| 130 | |||
| 131 | return put_user(value, (u8 __user *) uptr); | ||
| 132 | } | ||
| 133 | |||
| 134 | |||
| 135 | static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, | ||
| 136 | unsigned long guestdest, | ||
| 137 | void *from, unsigned long n) | ||
| 138 | { | ||
| 139 | int rc; | ||
| 140 | unsigned long i; | ||
| 141 | u8 *data = from; | ||
| 142 | |||
| 143 | for (i = 0; i < n; i++) { | ||
| 144 | rc = put_guest_u8(vcpu, guestdest++, *(data++)); | ||
| 145 | if (rc < 0) | ||
| 146 | return rc; | ||
| 147 | } | 34 | } |
| 148 | return 0; | 35 | uaddr = gmap_fault(gaddr, vcpu->arch.gmap); |
| 149 | } | 36 | if (IS_ERR_VALUE(uaddr)) |
| 150 | 37 | uaddr = -EFAULT; | |
| 151 | static inline int __copy_to_guest_fast(struct kvm_vcpu *vcpu, | 38 | return (void __user *)uaddr; |
| 152 | unsigned long guestdest, | 39 | } |
| 153 | void *from, unsigned long n) | 40 | |
| 154 | { | 41 | #define get_guest(vcpu, x, gptr) \ |
| 155 | int r; | 42 | ({ \ |
| 43 | __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\ | ||
| 44 | int __mask = sizeof(__typeof__(*(gptr))) - 1; \ | ||
| 45 | int __ret = PTR_RET((void __force *)__uptr); \ | ||
| 46 | \ | ||
| 47 | if (!__ret) { \ | ||
| 48 | BUG_ON((unsigned long)__uptr & __mask); \ | ||
| 49 | __ret = get_user(x, __uptr); \ | ||
| 50 | } \ | ||
| 51 | __ret; \ | ||
| 52 | }) | ||
| 53 | |||
| 54 | #define put_guest(vcpu, x, gptr) \ | ||
| 55 | ({ \ | ||
| 56 | __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\ | ||
| 57 | int __mask = sizeof(__typeof__(*(gptr))) - 1; \ | ||
| 58 | int __ret = PTR_RET((void __force *)__uptr); \ | ||
| 59 | \ | ||
| 60 | if (!__ret) { \ | ||
| 61 | BUG_ON((unsigned long)__uptr & __mask); \ | ||
| 62 | __ret = put_user(x, __uptr); \ | ||
| 63 | } \ | ||
| 64 | __ret; \ | ||
| 65 | }) | ||
| 66 | |||
| 67 | static inline int __copy_guest(struct kvm_vcpu *vcpu, unsigned long to, | ||
| 68 | unsigned long from, unsigned long len, | ||
| 69 | int to_guest, int prefixing) | ||
| 70 | { | ||
| 71 | unsigned long _len, rc; | ||
| 156 | void __user *uptr; | 72 | void __user *uptr; |
| 157 | unsigned long size; | ||
| 158 | |||
| 159 | if (guestdest + n < guestdest) | ||
| 160 | return -EFAULT; | ||
| 161 | |||
| 162 | /* simple case: all within one segment table entry? */ | ||
| 163 | if ((guestdest & PMD_MASK) == ((guestdest+n) & PMD_MASK)) { | ||
| 164 | uptr = (void __user *) gmap_fault(guestdest, vcpu->arch.gmap); | ||
| 165 | |||
| 166 | if (IS_ERR((void __force *) uptr)) | ||
| 167 | return PTR_ERR((void __force *) uptr); | ||
| 168 | |||
| 169 | r = copy_to_user(uptr, from, n); | ||
| 170 | |||
| 171 | if (r) | ||
| 172 | r = -EFAULT; | ||
| 173 | |||
| 174 | goto out; | ||
| 175 | } | ||
| 176 | |||
| 177 | /* copy first segment */ | ||
| 178 | uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap); | ||
| 179 | |||
| 180 | if (IS_ERR((void __force *) uptr)) | ||
| 181 | return PTR_ERR((void __force *) uptr); | ||
| 182 | 73 | ||
| 183 | size = PMD_SIZE - (guestdest & ~PMD_MASK); | 74 | while (len) { |
| 184 | 75 | uptr = to_guest ? (void __user *)to : (void __user *)from; | |
| 185 | r = copy_to_user(uptr, from, size); | 76 | uptr = __gptr_to_uptr(vcpu, uptr, prefixing); |
| 186 | 77 | if (IS_ERR((void __force *)uptr)) | |
| 187 | if (r) { | 78 | return -EFAULT; |
| 188 | r = -EFAULT; | 79 | _len = PAGE_SIZE - ((unsigned long)uptr & (PAGE_SIZE - 1)); |
| 189 | goto out; | 80 | _len = min(_len, len); |
| 190 | } | 81 | if (to_guest) |
| 191 | from += size; | 82 | rc = copy_to_user((void __user *) uptr, (void *)from, _len); |
| 192 | n -= size; | 83 | else |
| 193 | guestdest += size; | 84 | rc = copy_from_user((void *)to, (void __user *)uptr, _len); |
| 194 | 85 | if (rc) | |
| 195 | /* copy full segments */ | 86 | return -EFAULT; |
| 196 | while (n >= PMD_SIZE) { | 87 | len -= _len; |
| 197 | uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap); | 88 | from += _len; |
| 198 | 89 | to += _len; | |
| 199 | if (IS_ERR((void __force *) uptr)) | ||
| 200 | return PTR_ERR((void __force *) uptr); | ||
| 201 | |||
| 202 | r = copy_to_user(uptr, from, PMD_SIZE); | ||
| 203 | |||
| 204 | if (r) { | ||
| 205 | r = -EFAULT; | ||
| 206 | goto out; | ||
| 207 | } | ||
| 208 | from += PMD_SIZE; | ||
| 209 | n -= PMD_SIZE; | ||
| 210 | guestdest += PMD_SIZE; | ||
| 211 | } | ||
| 212 | |||
| 213 | /* copy the tail segment */ | ||
| 214 | if (n) { | ||
| 215 | uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap); | ||
| 216 | |||
| 217 | if (IS_ERR((void __force *) uptr)) | ||
| 218 | return PTR_ERR((void __force *) uptr); | ||
| 219 | |||
| 220 | r = copy_to_user(uptr, from, n); | ||
| 221 | |||
| 222 | if (r) | ||
| 223 | r = -EFAULT; | ||
| 224 | } | ||
| 225 | out: | ||
| 226 | return r; | ||
| 227 | } | ||
| 228 | |||
| 229 | static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu, | ||
| 230 | unsigned long guestdest, | ||
| 231 | void *from, unsigned long n) | ||
| 232 | { | ||
| 233 | return __copy_to_guest_fast(vcpu, guestdest, from, n); | ||
| 234 | } | ||
| 235 | |||
| 236 | static inline int copy_to_guest(struct kvm_vcpu *vcpu, unsigned long guestdest, | ||
| 237 | void *from, unsigned long n) | ||
| 238 | { | ||
| 239 | unsigned long prefix = vcpu->arch.sie_block->prefix; | ||
| 240 | |||
| 241 | if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE)) | ||
| 242 | goto slowpath; | ||
| 243 | |||
| 244 | if ((guestdest < prefix) && (guestdest + n > prefix)) | ||
| 245 | goto slowpath; | ||
| 246 | |||
| 247 | if ((guestdest < prefix + 2 * PAGE_SIZE) | ||
| 248 | && (guestdest + n > prefix + 2 * PAGE_SIZE)) | ||
| 249 | goto slowpath; | ||
| 250 | |||
| 251 | if (guestdest < 2 * PAGE_SIZE) | ||
| 252 | guestdest += prefix; | ||
| 253 | else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE)) | ||
| 254 | guestdest -= prefix; | ||
| 255 | |||
| 256 | return __copy_to_guest_fast(vcpu, guestdest, from, n); | ||
| 257 | slowpath: | ||
| 258 | return __copy_to_guest_slow(vcpu, guestdest, from, n); | ||
| 259 | } | ||
| 260 | |||
| 261 | static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to, | ||
| 262 | unsigned long guestsrc, | ||
| 263 | unsigned long n) | ||
| 264 | { | ||
| 265 | int rc; | ||
| 266 | unsigned long i; | ||
| 267 | u8 *data = to; | ||
| 268 | |||
| 269 | for (i = 0; i < n; i++) { | ||
| 270 | rc = get_guest_u8(vcpu, guestsrc++, data++); | ||
| 271 | if (rc < 0) | ||
| 272 | return rc; | ||
| 273 | } | 90 | } |
| 274 | return 0; | 91 | return 0; |
| 275 | } | 92 | } |
| 276 | 93 | ||
| 277 | static inline int __copy_from_guest_fast(struct kvm_vcpu *vcpu, void *to, | 94 | #define copy_to_guest(vcpu, to, from, size) \ |
| 278 | unsigned long guestsrc, | 95 | __copy_guest(vcpu, to, (unsigned long)from, size, 1, 1) |
| 279 | unsigned long n) | 96 | #define copy_from_guest(vcpu, to, from, size) \ |
| 280 | { | 97 | __copy_guest(vcpu, (unsigned long)to, from, size, 0, 1) |
| 281 | int r; | 98 | #define copy_to_guest_absolute(vcpu, to, from, size) \ |
| 282 | void __user *uptr; | 99 | __copy_guest(vcpu, to, (unsigned long)from, size, 1, 0) |
| 283 | unsigned long size; | 100 | #define copy_from_guest_absolute(vcpu, to, from, size) \ |
| 284 | 101 | __copy_guest(vcpu, (unsigned long)to, from, size, 0, 0) | |
| 285 | if (guestsrc + n < guestsrc) | ||
| 286 | return -EFAULT; | ||
| 287 | |||
| 288 | /* simple case: all within one segment table entry? */ | ||
| 289 | if ((guestsrc & PMD_MASK) == ((guestsrc+n) & PMD_MASK)) { | ||
| 290 | uptr = (void __user *) gmap_fault(guestsrc, vcpu->arch.gmap); | ||
| 291 | |||
| 292 | if (IS_ERR((void __force *) uptr)) | ||
| 293 | return PTR_ERR((void __force *) uptr); | ||
| 294 | |||
| 295 | r = copy_from_user(to, uptr, n); | ||
| 296 | |||
| 297 | if (r) | ||
| 298 | r = -EFAULT; | ||
| 299 | |||
| 300 | goto out; | ||
| 301 | } | ||
| 302 | |||
| 303 | /* copy first segment */ | ||
| 304 | uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap); | ||
| 305 | |||
| 306 | if (IS_ERR((void __force *) uptr)) | ||
| 307 | return PTR_ERR((void __force *) uptr); | ||
| 308 | |||
| 309 | size = PMD_SIZE - (guestsrc & ~PMD_MASK); | ||
| 310 | |||
| 311 | r = copy_from_user(to, uptr, size); | ||
| 312 | |||
| 313 | if (r) { | ||
| 314 | r = -EFAULT; | ||
| 315 | goto out; | ||
| 316 | } | ||
| 317 | to += size; | ||
| 318 | n -= size; | ||
| 319 | guestsrc += size; | ||
| 320 | |||
| 321 | /* copy full segments */ | ||
| 322 | while (n >= PMD_SIZE) { | ||
| 323 | uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap); | ||
| 324 | |||
| 325 | if (IS_ERR((void __force *) uptr)) | ||
| 326 | return PTR_ERR((void __force *) uptr); | ||
| 327 | |||
| 328 | r = copy_from_user(to, uptr, PMD_SIZE); | ||
| 329 | |||
| 330 | if (r) { | ||
| 331 | r = -EFAULT; | ||
| 332 | goto out; | ||
| 333 | } | ||
| 334 | to += PMD_SIZE; | ||
| 335 | n -= PMD_SIZE; | ||
| 336 | guestsrc += PMD_SIZE; | ||
| 337 | } | ||
| 338 | |||
| 339 | /* copy the tail segment */ | ||
| 340 | if (n) { | ||
| 341 | uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap); | ||
| 342 | |||
| 343 | if (IS_ERR((void __force *) uptr)) | ||
| 344 | return PTR_ERR((void __force *) uptr); | ||
| 345 | |||
| 346 | r = copy_from_user(to, uptr, n); | ||
| 347 | |||
| 348 | if (r) | ||
| 349 | r = -EFAULT; | ||
| 350 | } | ||
| 351 | out: | ||
| 352 | return r; | ||
| 353 | } | ||
| 354 | |||
| 355 | static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to, | ||
| 356 | unsigned long guestsrc, | ||
| 357 | unsigned long n) | ||
| 358 | { | ||
| 359 | return __copy_from_guest_fast(vcpu, to, guestsrc, n); | ||
| 360 | } | ||
| 361 | |||
| 362 | static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to, | ||
| 363 | unsigned long guestsrc, unsigned long n) | ||
| 364 | { | ||
| 365 | unsigned long prefix = vcpu->arch.sie_block->prefix; | ||
| 366 | |||
| 367 | if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE)) | ||
| 368 | goto slowpath; | ||
| 369 | 102 | ||
| 370 | if ((guestsrc < prefix) && (guestsrc + n > prefix)) | 103 | #endif /* __KVM_S390_GACCESS_H */ |
| 371 | goto slowpath; | ||
| 372 | |||
| 373 | if ((guestsrc < prefix + 2 * PAGE_SIZE) | ||
| 374 | && (guestsrc + n > prefix + 2 * PAGE_SIZE)) | ||
| 375 | goto slowpath; | ||
| 376 | |||
| 377 | if (guestsrc < 2 * PAGE_SIZE) | ||
| 378 | guestsrc += prefix; | ||
| 379 | else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE)) | ||
| 380 | guestsrc -= prefix; | ||
| 381 | |||
| 382 | return __copy_from_guest_fast(vcpu, to, guestsrc, n); | ||
| 383 | slowpath: | ||
| 384 | return __copy_from_guest_slow(vcpu, to, guestsrc, n); | ||
| 385 | } | ||
| 386 | #endif | ||
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index f26ff1e31bdb..b7d1b2edeeb3 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c | |||
| @@ -43,12 +43,10 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) | |||
| 43 | trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr); | 43 | trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr); |
| 44 | 44 | ||
| 45 | do { | 45 | do { |
| 46 | rc = get_guest_u64(vcpu, useraddr, | 46 | rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg], |
| 47 | &vcpu->arch.sie_block->gcr[reg]); | 47 | (u64 __user *) useraddr); |
| 48 | if (rc == -EFAULT) { | 48 | if (rc) |
| 49 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 49 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
| 50 | break; | ||
| 51 | } | ||
| 52 | useraddr += 8; | 50 | useraddr += 8; |
| 53 | if (reg == reg3) | 51 | if (reg == reg3) |
| 54 | break; | 52 | break; |
| @@ -78,11 +76,9 @@ static int handle_lctl(struct kvm_vcpu *vcpu) | |||
| 78 | 76 | ||
| 79 | reg = reg1; | 77 | reg = reg1; |
| 80 | do { | 78 | do { |
| 81 | rc = get_guest_u32(vcpu, useraddr, &val); | 79 | rc = get_guest(vcpu, val, (u32 __user *) useraddr); |
| 82 | if (rc == -EFAULT) { | 80 | if (rc) |
| 83 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 81 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
| 84 | break; | ||
| 85 | } | ||
| 86 | vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; | 82 | vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; |
| 87 | vcpu->arch.sie_block->gcr[reg] |= val; | 83 | vcpu->arch.sie_block->gcr[reg] |= val; |
| 88 | useraddr += 4; | 84 | useraddr += 4; |
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 37116a77cb4b..5c948177529e 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c | |||
| @@ -180,7 +180,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, | |||
| 180 | struct kvm_s390_interrupt_info *inti) | 180 | struct kvm_s390_interrupt_info *inti) |
| 181 | { | 181 | { |
| 182 | const unsigned short table[] = { 2, 4, 4, 6 }; | 182 | const unsigned short table[] = { 2, 4, 4, 6 }; |
| 183 | int rc, exception = 0; | 183 | int rc = 0; |
| 184 | 184 | ||
| 185 | switch (inti->type) { | 185 | switch (inti->type) { |
| 186 | case KVM_S390_INT_EMERGENCY: | 186 | case KVM_S390_INT_EMERGENCY: |
| @@ -188,74 +188,41 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, | |||
| 188 | vcpu->stat.deliver_emergency_signal++; | 188 | vcpu->stat.deliver_emergency_signal++; |
| 189 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, | 189 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, |
| 190 | inti->emerg.code, 0); | 190 | inti->emerg.code, 0); |
| 191 | rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201); | 191 | rc = put_guest(vcpu, 0x1201, (u16 __user *)__LC_EXT_INT_CODE); |
| 192 | if (rc == -EFAULT) | 192 | rc |= put_guest(vcpu, inti->emerg.code, |
| 193 | exception = 1; | 193 | (u16 __user *)__LC_EXT_CPU_ADDR); |
| 194 | 194 | rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, | |
| 195 | rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->emerg.code); | 195 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); |
| 196 | if (rc == -EFAULT) | 196 | rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, |
| 197 | exception = 1; | 197 | __LC_EXT_NEW_PSW, sizeof(psw_t)); |
| 198 | |||
| 199 | rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, | ||
| 200 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | ||
| 201 | if (rc == -EFAULT) | ||
| 202 | exception = 1; | ||
| 203 | |||
| 204 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
| 205 | __LC_EXT_NEW_PSW, sizeof(psw_t)); | ||
| 206 | if (rc == -EFAULT) | ||
| 207 | exception = 1; | ||
| 208 | break; | 198 | break; |
| 209 | |||
| 210 | case KVM_S390_INT_EXTERNAL_CALL: | 199 | case KVM_S390_INT_EXTERNAL_CALL: |
| 211 | VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); | 200 | VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); |
| 212 | vcpu->stat.deliver_external_call++; | 201 | vcpu->stat.deliver_external_call++; |
| 213 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, | 202 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, |
| 214 | inti->extcall.code, 0); | 203 | inti->extcall.code, 0); |
| 215 | rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202); | 204 | rc = put_guest(vcpu, 0x1202, (u16 __user *)__LC_EXT_INT_CODE); |
| 216 | if (rc == -EFAULT) | 205 | rc |= put_guest(vcpu, inti->extcall.code, |
| 217 | exception = 1; | 206 | (u16 __user *)__LC_EXT_CPU_ADDR); |
| 218 | 207 | rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, | |
| 219 | rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->extcall.code); | 208 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); |
| 220 | if (rc == -EFAULT) | 209 | rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, |
| 221 | exception = 1; | 210 | __LC_EXT_NEW_PSW, sizeof(psw_t)); |
| 222 | |||
| 223 | rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, | ||
| 224 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | ||
| 225 | if (rc == -EFAULT) | ||
| 226 | exception = 1; | ||
| 227 | |||
| 228 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
| 229 | __LC_EXT_NEW_PSW, sizeof(psw_t)); | ||
| 230 | if (rc == -EFAULT) | ||
| 231 | exception = 1; | ||
| 232 | break; | 211 | break; |
| 233 | |||
| 234 | case KVM_S390_INT_SERVICE: | 212 | case KVM_S390_INT_SERVICE: |
| 235 | VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", | 213 | VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", |
| 236 | inti->ext.ext_params); | 214 | inti->ext.ext_params); |
| 237 | vcpu->stat.deliver_service_signal++; | 215 | vcpu->stat.deliver_service_signal++; |
| 238 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, | 216 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, |
| 239 | inti->ext.ext_params, 0); | 217 | inti->ext.ext_params, 0); |
| 240 | rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401); | 218 | rc = put_guest(vcpu, 0x2401, (u16 __user *)__LC_EXT_INT_CODE); |
| 241 | if (rc == -EFAULT) | 219 | rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, |
| 242 | exception = 1; | 220 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); |
| 243 | 221 | rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | |
| 244 | rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, | 222 | __LC_EXT_NEW_PSW, sizeof(psw_t)); |
| 245 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | 223 | rc |= put_guest(vcpu, inti->ext.ext_params, |
| 246 | if (rc == -EFAULT) | 224 | (u32 __user *)__LC_EXT_PARAMS); |
| 247 | exception = 1; | ||
| 248 | |||
| 249 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
| 250 | __LC_EXT_NEW_PSW, sizeof(psw_t)); | ||
| 251 | if (rc == -EFAULT) | ||
| 252 | exception = 1; | ||
| 253 | |||
| 254 | rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); | ||
| 255 | if (rc == -EFAULT) | ||
| 256 | exception = 1; | ||
| 257 | break; | 225 | break; |
| 258 | |||
| 259 | case KVM_S390_INT_VIRTIO: | 226 | case KVM_S390_INT_VIRTIO: |
| 260 | VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", | 227 | VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", |
| 261 | inti->ext.ext_params, inti->ext.ext_params2); | 228 | inti->ext.ext_params, inti->ext.ext_params2); |
| @@ -263,34 +230,17 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, | |||
| 263 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, | 230 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, |
| 264 | inti->ext.ext_params, | 231 | inti->ext.ext_params, |
| 265 | inti->ext.ext_params2); | 232 | inti->ext.ext_params2); |
| 266 | rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603); | 233 | rc = put_guest(vcpu, 0x2603, (u16 __user *)__LC_EXT_INT_CODE); |
| 267 | if (rc == -EFAULT) | 234 | rc |= put_guest(vcpu, 0x0d00, (u16 __user *)__LC_EXT_CPU_ADDR); |
| 268 | exception = 1; | 235 | rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, |
| 269 | 236 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | |
| 270 | rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, 0x0d00); | 237 | rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, |
| 271 | if (rc == -EFAULT) | 238 | __LC_EXT_NEW_PSW, sizeof(psw_t)); |
| 272 | exception = 1; | 239 | rc |= put_guest(vcpu, inti->ext.ext_params, |
| 273 | 240 | (u32 __user *)__LC_EXT_PARAMS); | |
| 274 | rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, | 241 | rc |= put_guest(vcpu, inti->ext.ext_params2, |
| 275 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | 242 | (u64 __user *)__LC_EXT_PARAMS2); |
| 276 | if (rc == -EFAULT) | ||
| 277 | exception = 1; | ||
| 278 | |||
| 279 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
| 280 | __LC_EXT_NEW_PSW, sizeof(psw_t)); | ||
| 281 | if (rc == -EFAULT) | ||
| 282 | exception = 1; | ||
| 283 | |||
| 284 | rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); | ||
| 285 | if (rc == -EFAULT) | ||
| 286 | exception = 1; | ||
| 287 | |||
| 288 | rc = put_guest_u64(vcpu, __LC_EXT_PARAMS2, | ||
| 289 | inti->ext.ext_params2); | ||
| 290 | if (rc == -EFAULT) | ||
| 291 | exception = 1; | ||
| 292 | break; | 243 | break; |
| 293 | |||
| 294 | case KVM_S390_SIGP_STOP: | 244 | case KVM_S390_SIGP_STOP: |
| 295 | VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); | 245 | VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); |
| 296 | vcpu->stat.deliver_stop_signal++; | 246 | vcpu->stat.deliver_stop_signal++; |
| @@ -313,18 +263,14 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, | |||
| 313 | vcpu->stat.deliver_restart_signal++; | 263 | vcpu->stat.deliver_restart_signal++; |
| 314 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, | 264 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, |
| 315 | 0, 0); | 265 | 0, 0); |
| 316 | rc = copy_to_guest(vcpu, offsetof(struct _lowcore, | 266 | rc = copy_to_guest(vcpu, |
| 317 | restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | 267 | offsetof(struct _lowcore, restart_old_psw), |
| 318 | if (rc == -EFAULT) | 268 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); |
| 319 | exception = 1; | 269 | rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, |
| 320 | 270 | offsetof(struct _lowcore, restart_psw), | |
| 321 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | 271 | sizeof(psw_t)); |
| 322 | offsetof(struct _lowcore, restart_psw), sizeof(psw_t)); | ||
| 323 | if (rc == -EFAULT) | ||
| 324 | exception = 1; | ||
| 325 | atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); | 272 | atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); |
| 326 | break; | 273 | break; |
| 327 | |||
| 328 | case KVM_S390_PROGRAM_INT: | 274 | case KVM_S390_PROGRAM_INT: |
| 329 | VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", | 275 | VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", |
| 330 | inti->pgm.code, | 276 | inti->pgm.code, |
| @@ -332,24 +278,13 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, | |||
| 332 | vcpu->stat.deliver_program_int++; | 278 | vcpu->stat.deliver_program_int++; |
| 333 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, | 279 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, |
| 334 | inti->pgm.code, 0); | 280 | inti->pgm.code, 0); |
| 335 | rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code); | 281 | rc = put_guest(vcpu, inti->pgm.code, (u16 __user *)__LC_PGM_INT_CODE); |
| 336 | if (rc == -EFAULT) | 282 | rc |= put_guest(vcpu, table[vcpu->arch.sie_block->ipa >> 14], |
| 337 | exception = 1; | 283 | (u16 __user *)__LC_PGM_ILC); |
| 338 | 284 | rc |= copy_to_guest(vcpu, __LC_PGM_OLD_PSW, | |
| 339 | rc = put_guest_u16(vcpu, __LC_PGM_ILC, | 285 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); |
| 340 | table[vcpu->arch.sie_block->ipa >> 14]); | 286 | rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, |
| 341 | if (rc == -EFAULT) | 287 | __LC_PGM_NEW_PSW, sizeof(psw_t)); |
| 342 | exception = 1; | ||
| 343 | |||
| 344 | rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW, | ||
| 345 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | ||
| 346 | if (rc == -EFAULT) | ||
| 347 | exception = 1; | ||
| 348 | |||
| 349 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
| 350 | __LC_PGM_NEW_PSW, sizeof(psw_t)); | ||
| 351 | if (rc == -EFAULT) | ||
| 352 | exception = 1; | ||
| 353 | break; | 288 | break; |
| 354 | 289 | ||
| 355 | case KVM_S390_MCHK: | 290 | case KVM_S390_MCHK: |
| @@ -358,24 +293,13 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, | |||
| 358 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, | 293 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, |
| 359 | inti->mchk.cr14, | 294 | inti->mchk.cr14, |
| 360 | inti->mchk.mcic); | 295 | inti->mchk.mcic); |
| 361 | rc = kvm_s390_vcpu_store_status(vcpu, | 296 | rc = kvm_s390_vcpu_store_status(vcpu, |
| 362 | KVM_S390_STORE_STATUS_PREFIXED); | 297 | KVM_S390_STORE_STATUS_PREFIXED); |
| 363 | if (rc == -EFAULT) | 298 | rc |= put_guest(vcpu, inti->mchk.mcic, (u64 __user *) __LC_MCCK_CODE); |
| 364 | exception = 1; | 299 | rc |= copy_to_guest(vcpu, __LC_MCK_OLD_PSW, |
| 365 | 300 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | |
| 366 | rc = put_guest_u64(vcpu, __LC_MCCK_CODE, inti->mchk.mcic); | 301 | rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, |
| 367 | if (rc == -EFAULT) | 302 | __LC_MCK_NEW_PSW, sizeof(psw_t)); |
| 368 | exception = 1; | ||
| 369 | |||
| 370 | rc = copy_to_guest(vcpu, __LC_MCK_OLD_PSW, | ||
| 371 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | ||
| 372 | if (rc == -EFAULT) | ||
| 373 | exception = 1; | ||
| 374 | |||
| 375 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
| 376 | __LC_MCK_NEW_PSW, sizeof(psw_t)); | ||
| 377 | if (rc == -EFAULT) | ||
| 378 | exception = 1; | ||
| 379 | break; | 303 | break; |
| 380 | 304 | ||
| 381 | case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: | 305 | case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: |
| @@ -388,67 +312,44 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, | |||
| 388 | vcpu->stat.deliver_io_int++; | 312 | vcpu->stat.deliver_io_int++; |
| 389 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, | 313 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, |
| 390 | param0, param1); | 314 | param0, param1); |
| 391 | rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_ID, | 315 | rc = put_guest(vcpu, inti->io.subchannel_id, |
| 392 | inti->io.subchannel_id); | 316 | (u16 __user *) __LC_SUBCHANNEL_ID); |
| 393 | if (rc == -EFAULT) | 317 | rc |= put_guest(vcpu, inti->io.subchannel_nr, |
| 394 | exception = 1; | 318 | (u16 __user *) __LC_SUBCHANNEL_NR); |
| 395 | 319 | rc |= put_guest(vcpu, inti->io.io_int_parm, | |
| 396 | rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_NR, | 320 | (u32 __user *) __LC_IO_INT_PARM); |
| 397 | inti->io.subchannel_nr); | 321 | rc |= put_guest(vcpu, inti->io.io_int_word, |
| 398 | if (rc == -EFAULT) | 322 | (u32 __user *) __LC_IO_INT_WORD); |
| 399 | exception = 1; | 323 | rc |= copy_to_guest(vcpu, __LC_IO_OLD_PSW, |
| 400 | 324 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | |
| 401 | rc = put_guest_u32(vcpu, __LC_IO_INT_PARM, | 325 | rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, |
| 402 | inti->io.io_int_parm); | 326 | __LC_IO_NEW_PSW, sizeof(psw_t)); |
| 403 | if (rc == -EFAULT) | ||
| 404 | exception = 1; | ||
| 405 | |||
| 406 | rc = put_guest_u32(vcpu, __LC_IO_INT_WORD, | ||
| 407 | inti->io.io_int_word); | ||
| 408 | if (rc == -EFAULT) | ||
| 409 | exception = 1; | ||
| 410 | |||
| 411 | rc = copy_to_guest(vcpu, __LC_IO_OLD_PSW, | ||
| 412 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | ||
| 413 | if (rc == -EFAULT) | ||
| 414 | exception = 1; | ||
| 415 | |||
| 416 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
| 417 | __LC_IO_NEW_PSW, sizeof(psw_t)); | ||
| 418 | if (rc == -EFAULT) | ||
| 419 | exception = 1; | ||
| 420 | break; | 327 | break; |
| 421 | } | 328 | } |
| 422 | default: | 329 | default: |
| 423 | BUG(); | 330 | BUG(); |
| 424 | } | 331 | } |
| 425 | if (exception) { | 332 | if (rc) { |
| 426 | printk("kvm: The guest lowcore is not mapped during interrupt " | 333 | printk("kvm: The guest lowcore is not mapped during interrupt " |
| 427 | "delivery, killing userspace\n"); | 334 | "delivery, killing userspace\n"); |
| 428 | do_exit(SIGKILL); | 335 | do_exit(SIGKILL); |
| 429 | } | 336 | } |
| 430 | } | 337 | } |
| 431 | 338 | ||
| 432 | static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu) | 339 | static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu) |
| 433 | { | 340 | { |
| 434 | int rc, exception = 0; | 341 | int rc; |
| 435 | 342 | ||
| 436 | if (psw_extint_disabled(vcpu)) | 343 | if (psw_extint_disabled(vcpu)) |
| 437 | return 0; | 344 | return 0; |
| 438 | if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul)) | 345 | if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul)) |
| 439 | return 0; | 346 | return 0; |
| 440 | rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004); | 347 | rc = put_guest(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE); |
| 441 | if (rc == -EFAULT) | 348 | rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, |
| 442 | exception = 1; | 349 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); |
| 443 | rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, | 350 | rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, |
| 444 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | 351 | __LC_EXT_NEW_PSW, sizeof(psw_t)); |
| 445 | if (rc == -EFAULT) | 352 | if (rc) { |
| 446 | exception = 1; | ||
| 447 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
| 448 | __LC_EXT_NEW_PSW, sizeof(psw_t)); | ||
| 449 | if (rc == -EFAULT) | ||
| 450 | exception = 1; | ||
| 451 | if (exception) { | ||
| 452 | printk("kvm: The guest lowcore is not mapped during interrupt " | 353 | printk("kvm: The guest lowcore is not mapped during interrupt " |
| 453 | "delivery, killing userspace\n"); | 354 | "delivery, killing userspace\n"); |
| 454 | do_exit(SIGKILL); | 355 | do_exit(SIGKILL); |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4cf35a0a79e7..c1c7c683fa26 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
| @@ -142,12 +142,16 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 142 | case KVM_CAP_ONE_REG: | 142 | case KVM_CAP_ONE_REG: |
| 143 | case KVM_CAP_ENABLE_CAP: | 143 | case KVM_CAP_ENABLE_CAP: |
| 144 | case KVM_CAP_S390_CSS_SUPPORT: | 144 | case KVM_CAP_S390_CSS_SUPPORT: |
| 145 | case KVM_CAP_IOEVENTFD: | ||
| 145 | r = 1; | 146 | r = 1; |
| 146 | break; | 147 | break; |
| 147 | case KVM_CAP_NR_VCPUS: | 148 | case KVM_CAP_NR_VCPUS: |
| 148 | case KVM_CAP_MAX_VCPUS: | 149 | case KVM_CAP_MAX_VCPUS: |
| 149 | r = KVM_MAX_VCPUS; | 150 | r = KVM_MAX_VCPUS; |
| 150 | break; | 151 | break; |
| 152 | case KVM_CAP_NR_MEMSLOTS: | ||
| 153 | r = KVM_USER_MEM_SLOTS; | ||
| 154 | break; | ||
| 151 | case KVM_CAP_S390_COW: | 155 | case KVM_CAP_S390_COW: |
| 152 | r = MACHINE_HAS_ESOP; | 156 | r = MACHINE_HAS_ESOP; |
| 153 | break; | 157 | break; |
| @@ -632,8 +636,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
| 632 | } else { | 636 | } else { |
| 633 | VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); | 637 | VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); |
| 634 | trace_kvm_s390_sie_fault(vcpu); | 638 | trace_kvm_s390_sie_fault(vcpu); |
| 635 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 639 | rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
| 636 | rc = 0; | ||
| 637 | } | 640 | } |
| 638 | } | 641 | } |
| 639 | VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", | 642 | VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", |
| @@ -974,22 +977,13 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | |||
| 974 | /* Section: memory related */ | 977 | /* Section: memory related */ |
| 975 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 978 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
| 976 | struct kvm_memory_slot *memslot, | 979 | struct kvm_memory_slot *memslot, |
| 977 | struct kvm_memory_slot old, | ||
| 978 | struct kvm_userspace_memory_region *mem, | 980 | struct kvm_userspace_memory_region *mem, |
| 979 | bool user_alloc) | 981 | enum kvm_mr_change change) |
| 980 | { | 982 | { |
| 981 | /* A few sanity checks. We can have exactly one memory slot which has | 983 | /* A few sanity checks. We can have memory slots which have to be |
| 982 | to start at guest virtual zero and which has to be located at a | 984 | located/ended at a segment boundary (1MB). The memory in userland is |
| 983 | page boundary in userland and which has to end at a page boundary. | 985 | ok to be fragmented into various different vmas. It is okay to mmap() |
| 984 | The memory in userland is ok to be fragmented into various different | 986 | and munmap() stuff in this slot after doing this call at any time */ |
| 985 | vmas. It is okay to mmap() and munmap() stuff in this slot after | ||
| 986 | doing this call at any time */ | ||
| 987 | |||
| 988 | if (mem->slot) | ||
| 989 | return -EINVAL; | ||
| 990 | |||
| 991 | if (mem->guest_phys_addr) | ||
| 992 | return -EINVAL; | ||
| 993 | 987 | ||
| 994 | if (mem->userspace_addr & 0xffffful) | 988 | if (mem->userspace_addr & 0xffffful) |
| 995 | return -EINVAL; | 989 | return -EINVAL; |
| @@ -997,19 +991,26 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
| 997 | if (mem->memory_size & 0xffffful) | 991 | if (mem->memory_size & 0xffffful) |
| 998 | return -EINVAL; | 992 | return -EINVAL; |
| 999 | 993 | ||
| 1000 | if (!user_alloc) | ||
| 1001 | return -EINVAL; | ||
| 1002 | |||
| 1003 | return 0; | 994 | return 0; |
| 1004 | } | 995 | } |
| 1005 | 996 | ||
| 1006 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 997 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
| 1007 | struct kvm_userspace_memory_region *mem, | 998 | struct kvm_userspace_memory_region *mem, |
| 1008 | struct kvm_memory_slot old, | 999 | const struct kvm_memory_slot *old, |
| 1009 | bool user_alloc) | 1000 | enum kvm_mr_change change) |
| 1010 | { | 1001 | { |
| 1011 | int rc; | 1002 | int rc; |
| 1012 | 1003 | ||
| 1004 | /* If the basics of the memslot do not change, we do not want | ||
| 1005 | * to update the gmap. Every update causes several unnecessary | ||
| 1006 | * segment translation exceptions. This is usually handled just | ||
| 1007 | * fine by the normal fault handler + gmap, but it will also | ||
| 1008 | * cause faults on the prefix page of running guest CPUs. | ||
| 1009 | */ | ||
| 1010 | if (old->userspace_addr == mem->userspace_addr && | ||
| 1011 | old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && | ||
| 1012 | old->npages * PAGE_SIZE == mem->memory_size) | ||
| 1013 | return; | ||
| 1013 | 1014 | ||
| 1014 | rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, | 1015 | rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, |
| 1015 | mem->guest_phys_addr, mem->memory_size); | 1016 | mem->guest_phys_addr, mem->memory_size); |
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 4d89d64a8161..efc14f687265 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
| @@ -110,12 +110,12 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); | |||
| 110 | void kvm_s390_tasklet(unsigned long parm); | 110 | void kvm_s390_tasklet(unsigned long parm); |
| 111 | void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); | 111 | void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); |
| 112 | void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); | 112 | void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); |
| 113 | int kvm_s390_inject_vm(struct kvm *kvm, | 113 | int __must_check kvm_s390_inject_vm(struct kvm *kvm, |
| 114 | struct kvm_s390_interrupt *s390int); | 114 | struct kvm_s390_interrupt *s390int); |
| 115 | int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, | 115 | int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, |
| 116 | struct kvm_s390_interrupt *s390int); | 116 | struct kvm_s390_interrupt *s390int); |
| 117 | int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); | 117 | int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); |
| 118 | int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); | 118 | int __must_check kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); |
| 119 | struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, | 119 | struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, |
| 120 | u64 cr6, u64 schid); | 120 | u64 cr6, u64 schid); |
| 121 | 121 | ||
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 0ef9894606e5..6bbd7b5a0bbe 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c | |||
| @@ -14,6 +14,8 @@ | |||
| 14 | #include <linux/kvm.h> | 14 | #include <linux/kvm.h> |
| 15 | #include <linux/gfp.h> | 15 | #include <linux/gfp.h> |
| 16 | #include <linux/errno.h> | 16 | #include <linux/errno.h> |
| 17 | #include <linux/compat.h> | ||
| 18 | #include <asm/asm-offsets.h> | ||
| 17 | #include <asm/current.h> | 19 | #include <asm/current.h> |
| 18 | #include <asm/debug.h> | 20 | #include <asm/debug.h> |
| 19 | #include <asm/ebcdic.h> | 21 | #include <asm/ebcdic.h> |
| @@ -35,31 +37,24 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) | |||
| 35 | operand2 = kvm_s390_get_base_disp_s(vcpu); | 37 | operand2 = kvm_s390_get_base_disp_s(vcpu); |
| 36 | 38 | ||
| 37 | /* must be word boundary */ | 39 | /* must be word boundary */ |
| 38 | if (operand2 & 3) { | 40 | if (operand2 & 3) |
| 39 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 41 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
| 40 | goto out; | ||
| 41 | } | ||
| 42 | 42 | ||
| 43 | /* get the value */ | 43 | /* get the value */ |
| 44 | if (get_guest_u32(vcpu, operand2, &address)) { | 44 | if (get_guest(vcpu, address, (u32 __user *) operand2)) |
| 45 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 45 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
| 46 | goto out; | ||
| 47 | } | ||
| 48 | 46 | ||
| 49 | address = address & 0x7fffe000u; | 47 | address = address & 0x7fffe000u; |
| 50 | 48 | ||
| 51 | /* make sure that the new value is valid memory */ | 49 | /* make sure that the new value is valid memory */ |
| 52 | if (copy_from_guest_absolute(vcpu, &tmp, address, 1) || | 50 | if (copy_from_guest_absolute(vcpu, &tmp, address, 1) || |
| 53 | (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) { | 51 | (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) |
| 54 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 52 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
| 55 | goto out; | ||
| 56 | } | ||
| 57 | 53 | ||
| 58 | kvm_s390_set_prefix(vcpu, address); | 54 | kvm_s390_set_prefix(vcpu, address); |
| 59 | 55 | ||
| 60 | VCPU_EVENT(vcpu, 5, "setting prefix to %x", address); | 56 | VCPU_EVENT(vcpu, 5, "setting prefix to %x", address); |
| 61 | trace_kvm_s390_handle_prefix(vcpu, 1, address); | 57 | trace_kvm_s390_handle_prefix(vcpu, 1, address); |
| 62 | out: | ||
| 63 | return 0; | 58 | return 0; |
| 64 | } | 59 | } |
| 65 | 60 | ||
| @@ -73,49 +68,37 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) | |||
| 73 | operand2 = kvm_s390_get_base_disp_s(vcpu); | 68 | operand2 = kvm_s390_get_base_disp_s(vcpu); |
| 74 | 69 | ||
| 75 | /* must be word boundary */ | 70 | /* must be word boundary */ |
| 76 | if (operand2 & 3) { | 71 | if (operand2 & 3) |
| 77 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 72 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
| 78 | goto out; | ||
| 79 | } | ||
| 80 | 73 | ||
| 81 | address = vcpu->arch.sie_block->prefix; | 74 | address = vcpu->arch.sie_block->prefix; |
| 82 | address = address & 0x7fffe000u; | 75 | address = address & 0x7fffe000u; |
| 83 | 76 | ||
| 84 | /* get the value */ | 77 | /* get the value */ |
| 85 | if (put_guest_u32(vcpu, operand2, address)) { | 78 | if (put_guest(vcpu, address, (u32 __user *)operand2)) |
| 86 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 79 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
| 87 | goto out; | ||
| 88 | } | ||
| 89 | 80 | ||
| 90 | VCPU_EVENT(vcpu, 5, "storing prefix to %x", address); | 81 | VCPU_EVENT(vcpu, 5, "storing prefix to %x", address); |
| 91 | trace_kvm_s390_handle_prefix(vcpu, 0, address); | 82 | trace_kvm_s390_handle_prefix(vcpu, 0, address); |
| 92 | out: | ||
| 93 | return 0; | 83 | return 0; |
| 94 | } | 84 | } |
| 95 | 85 | ||
| 96 | static int handle_store_cpu_address(struct kvm_vcpu *vcpu) | 86 | static int handle_store_cpu_address(struct kvm_vcpu *vcpu) |
| 97 | { | 87 | { |
| 98 | u64 useraddr; | 88 | u64 useraddr; |
| 99 | int rc; | ||
| 100 | 89 | ||
| 101 | vcpu->stat.instruction_stap++; | 90 | vcpu->stat.instruction_stap++; |
| 102 | 91 | ||
| 103 | useraddr = kvm_s390_get_base_disp_s(vcpu); | 92 | useraddr = kvm_s390_get_base_disp_s(vcpu); |
| 104 | 93 | ||
| 105 | if (useraddr & 1) { | 94 | if (useraddr & 1) |
| 106 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 95 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
| 107 | goto out; | ||
| 108 | } | ||
| 109 | 96 | ||
| 110 | rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id); | 97 | if (put_guest(vcpu, vcpu->vcpu_id, (u16 __user *)useraddr)) |
| 111 | if (rc == -EFAULT) { | 98 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
| 112 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
| 113 | goto out; | ||
| 114 | } | ||
| 115 | 99 | ||
| 116 | VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr); | 100 | VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr); |
| 117 | trace_kvm_s390_handle_stap(vcpu, useraddr); | 101 | trace_kvm_s390_handle_stap(vcpu, useraddr); |
| 118 | out: | ||
| 119 | return 0; | 102 | return 0; |
| 120 | } | 103 | } |
| 121 | 104 | ||
| @@ -129,36 +112,38 @@ static int handle_skey(struct kvm_vcpu *vcpu) | |||
| 129 | 112 | ||
| 130 | static int handle_tpi(struct kvm_vcpu *vcpu) | 113 | static int handle_tpi(struct kvm_vcpu *vcpu) |
| 131 | { | 114 | { |
| 132 | u64 addr; | ||
| 133 | struct kvm_s390_interrupt_info *inti; | 115 | struct kvm_s390_interrupt_info *inti; |
| 116 | u64 addr; | ||
| 134 | int cc; | 117 | int cc; |
| 135 | 118 | ||
| 136 | addr = kvm_s390_get_base_disp_s(vcpu); | 119 | addr = kvm_s390_get_base_disp_s(vcpu); |
| 137 | 120 | if (addr & 3) | |
| 121 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
| 122 | cc = 0; | ||
| 138 | inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0); | 123 | inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0); |
| 139 | if (inti) { | 124 | if (!inti) |
| 140 | if (addr) { | 125 | goto no_interrupt; |
| 141 | /* | 126 | cc = 1; |
| 142 | * Store the two-word I/O interruption code into the | 127 | if (addr) { |
| 143 | * provided area. | 128 | /* |
| 144 | */ | 129 | * Store the two-word I/O interruption code into the |
| 145 | put_guest_u16(vcpu, addr, inti->io.subchannel_id); | 130 | * provided area. |
| 146 | put_guest_u16(vcpu, addr + 2, inti->io.subchannel_nr); | 131 | */ |
| 147 | put_guest_u32(vcpu, addr + 4, inti->io.io_int_parm); | 132 | put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) addr); |
| 148 | } else { | 133 | put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) (addr + 2)); |
| 149 | /* | 134 | put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) (addr + 4)); |
| 150 | * Store the three-word I/O interruption code into | 135 | } else { |
| 151 | * the appropriate lowcore area. | 136 | /* |
| 152 | */ | 137 | * Store the three-word I/O interruption code into |
| 153 | put_guest_u16(vcpu, 184, inti->io.subchannel_id); | 138 | * the appropriate lowcore area. |
| 154 | put_guest_u16(vcpu, 186, inti->io.subchannel_nr); | 139 | */ |
| 155 | put_guest_u32(vcpu, 188, inti->io.io_int_parm); | 140 | put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) __LC_SUBCHANNEL_ID); |
| 156 | put_guest_u32(vcpu, 192, inti->io.io_int_word); | 141 | put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) __LC_SUBCHANNEL_NR); |
| 157 | } | 142 | put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) __LC_IO_INT_PARM); |
| 158 | cc = 1; | 143 | put_guest(vcpu, inti->io.io_int_word, (u32 __user *) __LC_IO_INT_WORD); |
| 159 | } else | 144 | } |
| 160 | cc = 0; | ||
| 161 | kfree(inti); | 145 | kfree(inti); |
| 146 | no_interrupt: | ||
| 162 | /* Set condition code and we're done. */ | 147 | /* Set condition code and we're done. */ |
| 163 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); | 148 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); |
| 164 | vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44; | 149 | vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44; |
| @@ -230,13 +215,10 @@ static int handle_stfl(struct kvm_vcpu *vcpu) | |||
| 230 | 215 | ||
| 231 | rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), | 216 | rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), |
| 232 | &facility_list, sizeof(facility_list)); | 217 | &facility_list, sizeof(facility_list)); |
| 233 | if (rc == -EFAULT) | 218 | if (rc) |
| 234 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 219 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
| 235 | else { | 220 | VCPU_EVENT(vcpu, 5, "store facility list value %x", facility_list); |
| 236 | VCPU_EVENT(vcpu, 5, "store facility list value %x", | 221 | trace_kvm_s390_handle_stfl(vcpu, facility_list); |
| 237 | facility_list); | ||
| 238 | trace_kvm_s390_handle_stfl(vcpu, facility_list); | ||
| 239 | } | ||
| 240 | return 0; | 222 | return 0; |
| 241 | } | 223 | } |
| 242 | 224 | ||
| @@ -249,112 +231,80 @@ static void handle_new_psw(struct kvm_vcpu *vcpu) | |||
| 249 | 231 | ||
| 250 | #define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA) | 232 | #define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA) |
| 251 | #define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL | 233 | #define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL |
| 252 | #define PSW_ADDR_24 0x00000000000fffffUL | 234 | #define PSW_ADDR_24 0x0000000000ffffffUL |
| 253 | #define PSW_ADDR_31 0x000000007fffffffUL | 235 | #define PSW_ADDR_31 0x000000007fffffffUL |
| 254 | 236 | ||
| 237 | static int is_valid_psw(psw_t *psw) { | ||
| 238 | if (psw->mask & PSW_MASK_UNASSIGNED) | ||
| 239 | return 0; | ||
| 240 | if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_BA) { | ||
| 241 | if (psw->addr & ~PSW_ADDR_31) | ||
| 242 | return 0; | ||
| 243 | } | ||
| 244 | if (!(psw->mask & PSW_MASK_ADDR_MODE) && (psw->addr & ~PSW_ADDR_24)) | ||
| 245 | return 0; | ||
| 246 | if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_EA) | ||
| 247 | return 0; | ||
| 248 | return 1; | ||
| 249 | } | ||
| 250 | |||
| 255 | int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) | 251 | int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) |
| 256 | { | 252 | { |
| 257 | u64 addr; | 253 | psw_t *gpsw = &vcpu->arch.sie_block->gpsw; |
| 258 | psw_compat_t new_psw; | 254 | psw_compat_t new_psw; |
| 255 | u64 addr; | ||
| 259 | 256 | ||
| 260 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | 257 | if (gpsw->mask & PSW_MASK_PSTATE) |
| 261 | return kvm_s390_inject_program_int(vcpu, | 258 | return kvm_s390_inject_program_int(vcpu, |
| 262 | PGM_PRIVILEGED_OPERATION); | 259 | PGM_PRIVILEGED_OPERATION); |
| 263 | |||
| 264 | addr = kvm_s390_get_base_disp_s(vcpu); | 260 | addr = kvm_s390_get_base_disp_s(vcpu); |
| 265 | 261 | if (addr & 7) | |
| 266 | if (addr & 7) { | 262 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
| 267 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 263 | if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) |
| 268 | goto out; | 264 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
| 269 | } | 265 | if (!(new_psw.mask & PSW32_MASK_BASE)) |
| 270 | 266 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | |
| 271 | if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) { | 267 | gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32; |
| 272 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 268 | gpsw->mask |= new_psw.addr & PSW32_ADDR_AMODE; |
| 273 | goto out; | 269 | gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE; |
| 274 | } | 270 | if (!is_valid_psw(gpsw)) |
| 275 | 271 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | |
| 276 | if (!(new_psw.mask & PSW32_MASK_BASE)) { | ||
| 277 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
| 278 | goto out; | ||
| 279 | } | ||
| 280 | |||
| 281 | vcpu->arch.sie_block->gpsw.mask = | ||
| 282 | (new_psw.mask & ~PSW32_MASK_BASE) << 32; | ||
| 283 | vcpu->arch.sie_block->gpsw.addr = new_psw.addr; | ||
| 284 | |||
| 285 | if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) || | ||
| 286 | (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) && | ||
| 287 | (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) || | ||
| 288 | ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == | ||
| 289 | PSW_MASK_EA)) { | ||
| 290 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
| 291 | goto out; | ||
| 292 | } | ||
| 293 | |||
| 294 | handle_new_psw(vcpu); | 272 | handle_new_psw(vcpu); |
| 295 | out: | ||
| 296 | return 0; | 273 | return 0; |
| 297 | } | 274 | } |
| 298 | 275 | ||
| 299 | static int handle_lpswe(struct kvm_vcpu *vcpu) | 276 | static int handle_lpswe(struct kvm_vcpu *vcpu) |
| 300 | { | 277 | { |
| 301 | u64 addr; | ||
| 302 | psw_t new_psw; | 278 | psw_t new_psw; |
| 279 | u64 addr; | ||
| 303 | 280 | ||
| 304 | addr = kvm_s390_get_base_disp_s(vcpu); | 281 | addr = kvm_s390_get_base_disp_s(vcpu); |
| 305 | 282 | if (addr & 7) | |
| 306 | if (addr & 7) { | 283 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
| 307 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 284 | if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) |
| 308 | goto out; | 285 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
| 309 | } | 286 | vcpu->arch.sie_block->gpsw = new_psw; |
| 310 | 287 | if (!is_valid_psw(&vcpu->arch.sie_block->gpsw)) | |
| 311 | if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) { | 288 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
| 312 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
| 313 | goto out; | ||
| 314 | } | ||
| 315 | |||
| 316 | vcpu->arch.sie_block->gpsw.mask = new_psw.mask; | ||
| 317 | vcpu->arch.sie_block->gpsw.addr = new_psw.addr; | ||
| 318 | |||
| 319 | if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) || | ||
| 320 | (((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == | ||
| 321 | PSW_MASK_BA) && | ||
| 322 | (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_31)) || | ||
| 323 | (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) && | ||
| 324 | (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) || | ||
| 325 | ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == | ||
| 326 | PSW_MASK_EA)) { | ||
| 327 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
| 328 | goto out; | ||
| 329 | } | ||
| 330 | |||
| 331 | handle_new_psw(vcpu); | 289 | handle_new_psw(vcpu); |
| 332 | out: | ||
| 333 | return 0; | 290 | return 0; |
| 334 | } | 291 | } |
| 335 | 292 | ||
| 336 | static int handle_stidp(struct kvm_vcpu *vcpu) | 293 | static int handle_stidp(struct kvm_vcpu *vcpu) |
| 337 | { | 294 | { |
| 338 | u64 operand2; | 295 | u64 operand2; |
| 339 | int rc; | ||
| 340 | 296 | ||
| 341 | vcpu->stat.instruction_stidp++; | 297 | vcpu->stat.instruction_stidp++; |
| 342 | 298 | ||
| 343 | operand2 = kvm_s390_get_base_disp_s(vcpu); | 299 | operand2 = kvm_s390_get_base_disp_s(vcpu); |
| 344 | 300 | ||
| 345 | if (operand2 & 7) { | 301 | if (operand2 & 7) |
| 346 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 302 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
| 347 | goto out; | ||
| 348 | } | ||
| 349 | 303 | ||
| 350 | rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data); | 304 | if (put_guest(vcpu, vcpu->arch.stidp_data, (u64 __user *)operand2)) |
| 351 | if (rc == -EFAULT) { | 305 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
| 352 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
| 353 | goto out; | ||
| 354 | } | ||
| 355 | 306 | ||
| 356 | VCPU_EVENT(vcpu, 5, "%s", "store cpu id"); | 307 | VCPU_EVENT(vcpu, 5, "%s", "store cpu id"); |
| 357 | out: | ||
| 358 | return 0; | 308 | return 0; |
| 359 | } | 309 | } |
| 360 | 310 | ||
| @@ -394,8 +344,9 @@ static int handle_stsi(struct kvm_vcpu *vcpu) | |||
| 394 | int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28; | 344 | int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28; |
| 395 | int sel1 = vcpu->run->s.regs.gprs[0] & 0xff; | 345 | int sel1 = vcpu->run->s.regs.gprs[0] & 0xff; |
| 396 | int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff; | 346 | int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff; |
| 347 | unsigned long mem = 0; | ||
| 397 | u64 operand2; | 348 | u64 operand2; |
| 398 | unsigned long mem; | 349 | int rc = 0; |
| 399 | 350 | ||
| 400 | vcpu->stat.instruction_stsi++; | 351 | vcpu->stat.instruction_stsi++; |
| 401 | VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); | 352 | VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); |
| @@ -414,37 +365,37 @@ static int handle_stsi(struct kvm_vcpu *vcpu) | |||
| 414 | case 2: | 365 | case 2: |
| 415 | mem = get_zeroed_page(GFP_KERNEL); | 366 | mem = get_zeroed_page(GFP_KERNEL); |
| 416 | if (!mem) | 367 | if (!mem) |
| 417 | goto out_fail; | 368 | goto out_no_data; |
| 418 | if (stsi((void *) mem, fc, sel1, sel2)) | 369 | if (stsi((void *) mem, fc, sel1, sel2)) |
| 419 | goto out_mem; | 370 | goto out_no_data; |
| 420 | break; | 371 | break; |
| 421 | case 3: | 372 | case 3: |
| 422 | if (sel1 != 2 || sel2 != 2) | 373 | if (sel1 != 2 || sel2 != 2) |
| 423 | goto out_fail; | 374 | goto out_no_data; |
| 424 | mem = get_zeroed_page(GFP_KERNEL); | 375 | mem = get_zeroed_page(GFP_KERNEL); |
| 425 | if (!mem) | 376 | if (!mem) |
| 426 | goto out_fail; | 377 | goto out_no_data; |
| 427 | handle_stsi_3_2_2(vcpu, (void *) mem); | 378 | handle_stsi_3_2_2(vcpu, (void *) mem); |
| 428 | break; | 379 | break; |
| 429 | default: | 380 | default: |
| 430 | goto out_fail; | 381 | goto out_no_data; |
| 431 | } | 382 | } |
| 432 | 383 | ||
| 433 | if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { | 384 | if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { |
| 434 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 385 | rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
| 435 | goto out_mem; | 386 | goto out_exception; |
| 436 | } | 387 | } |
| 437 | trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); | 388 | trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); |
| 438 | free_page(mem); | 389 | free_page(mem); |
| 439 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); | 390 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); |
| 440 | vcpu->run->s.regs.gprs[0] = 0; | 391 | vcpu->run->s.regs.gprs[0] = 0; |
| 441 | return 0; | 392 | return 0; |
| 442 | out_mem: | 393 | out_no_data: |
| 443 | free_page(mem); | ||
| 444 | out_fail: | ||
| 445 | /* condition code 3 */ | 394 | /* condition code 3 */ |
| 446 | vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; | 395 | vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; |
| 447 | return 0; | 396 | out_exception: |
| 397 | free_page(mem); | ||
| 398 | return rc; | ||
| 448 | } | 399 | } |
| 449 | 400 | ||
| 450 | static const intercept_handler_t b2_handlers[256] = { | 401 | static const intercept_handler_t b2_handlers[256] = { |
| @@ -575,20 +526,13 @@ static int handle_tprot(struct kvm_vcpu *vcpu) | |||
| 575 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) | 526 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) |
| 576 | return -EOPNOTSUPP; | 527 | return -EOPNOTSUPP; |
| 577 | 528 | ||
| 578 | |||
| 579 | /* we must resolve the address without holding the mmap semaphore. | ||
| 580 | * This is ok since the userspace hypervisor is not supposed to change | ||
| 581 | * the mapping while the guest queries the memory. Otherwise the guest | ||
| 582 | * might crash or get wrong info anyway. */ | ||
| 583 | user_address = (unsigned long) __guestaddr_to_user(vcpu, address1); | ||
| 584 | |||
| 585 | down_read(¤t->mm->mmap_sem); | 529 | down_read(¤t->mm->mmap_sem); |
| 530 | user_address = __gmap_translate(address1, vcpu->arch.gmap); | ||
| 531 | if (IS_ERR_VALUE(user_address)) | ||
| 532 | goto out_inject; | ||
| 586 | vma = find_vma(current->mm, user_address); | 533 | vma = find_vma(current->mm, user_address); |
| 587 | if (!vma) { | 534 | if (!vma) |
| 588 | up_read(¤t->mm->mmap_sem); | 535 | goto out_inject; |
| 589 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
| 590 | } | ||
| 591 | |||
| 592 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); | 536 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); |
| 593 | if (!(vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_READ)) | 537 | if (!(vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_READ)) |
| 594 | vcpu->arch.sie_block->gpsw.mask |= (1ul << 44); | 538 | vcpu->arch.sie_block->gpsw.mask |= (1ul << 44); |
| @@ -597,6 +541,10 @@ static int handle_tprot(struct kvm_vcpu *vcpu) | |||
| 597 | 541 | ||
| 598 | up_read(¤t->mm->mmap_sem); | 542 | up_read(¤t->mm->mmap_sem); |
| 599 | return 0; | 543 | return 0; |
| 544 | |||
| 545 | out_inject: | ||
| 546 | up_read(¤t->mm->mmap_sem); | ||
| 547 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
| 600 | } | 548 | } |
| 601 | 549 | ||
| 602 | int kvm_s390_handle_e5(struct kvm_vcpu *vcpu) | 550 | int kvm_s390_handle_e5(struct kvm_vcpu *vcpu) |
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 40afa0005c69..9bd4ecac72be 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h | |||
| @@ -19,6 +19,10 @@ BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) | |||
| 19 | 19 | ||
| 20 | BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) | 20 | BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) |
| 21 | 21 | ||
| 22 | #ifdef CONFIG_HAVE_KVM | ||
| 23 | BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR) | ||
| 24 | #endif | ||
| 25 | |||
| 22 | /* | 26 | /* |
| 23 | * every pentium local APIC has two 'local interrupts', with a | 27 | * every pentium local APIC has two 'local interrupts', with a |
| 24 | * soft-definable vector attached to both interrupts, one of | 28 | * soft-definable vector attached to both interrupts, one of |
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 81f04cee5f74..ab0ae1aa6d0a 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h | |||
| @@ -12,6 +12,9 @@ typedef struct { | |||
| 12 | unsigned int irq_spurious_count; | 12 | unsigned int irq_spurious_count; |
| 13 | unsigned int icr_read_retry_count; | 13 | unsigned int icr_read_retry_count; |
| 14 | #endif | 14 | #endif |
| 15 | #ifdef CONFIG_HAVE_KVM | ||
| 16 | unsigned int kvm_posted_intr_ipis; | ||
| 17 | #endif | ||
| 15 | unsigned int x86_platform_ipis; /* arch dependent */ | 18 | unsigned int x86_platform_ipis; /* arch dependent */ |
| 16 | unsigned int apic_perf_irqs; | 19 | unsigned int apic_perf_irqs; |
| 17 | unsigned int apic_irq_work_irqs; | 20 | unsigned int apic_irq_work_irqs; |
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 10a78c3d3d5a..1da97efad08a 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | /* Interrupt handlers registered during init_IRQ */ | 28 | /* Interrupt handlers registered during init_IRQ */ |
| 29 | extern void apic_timer_interrupt(void); | 29 | extern void apic_timer_interrupt(void); |
| 30 | extern void x86_platform_ipi(void); | 30 | extern void x86_platform_ipi(void); |
| 31 | extern void kvm_posted_intr_ipi(void); | ||
| 31 | extern void error_interrupt(void); | 32 | extern void error_interrupt(void); |
| 32 | extern void irq_work_interrupt(void); | 33 | extern void irq_work_interrupt(void); |
| 33 | 34 | ||
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index aac5fa62a86c..5702d7e3111d 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
| @@ -102,6 +102,11 @@ | |||
| 102 | */ | 102 | */ |
| 103 | #define X86_PLATFORM_IPI_VECTOR 0xf7 | 103 | #define X86_PLATFORM_IPI_VECTOR 0xf7 |
| 104 | 104 | ||
| 105 | /* Vector for KVM to deliver posted interrupt IPI */ | ||
| 106 | #ifdef CONFIG_HAVE_KVM | ||
| 107 | #define POSTED_INTR_VECTOR 0xf2 | ||
| 108 | #endif | ||
| 109 | |||
| 105 | /* | 110 | /* |
| 106 | * IRQ work vector: | 111 | * IRQ work vector: |
| 107 | */ | 112 | */ |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4979778cc7fb..3741c653767c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
| @@ -31,7 +31,7 @@ | |||
| 31 | #include <asm/msr-index.h> | 31 | #include <asm/msr-index.h> |
| 32 | #include <asm/asm.h> | 32 | #include <asm/asm.h> |
| 33 | 33 | ||
| 34 | #define KVM_MAX_VCPUS 254 | 34 | #define KVM_MAX_VCPUS 255 |
| 35 | #define KVM_SOFT_MAX_VCPUS 160 | 35 | #define KVM_SOFT_MAX_VCPUS 160 |
| 36 | #define KVM_USER_MEM_SLOTS 125 | 36 | #define KVM_USER_MEM_SLOTS 125 |
| 37 | /* memory slots that are not exposed to userspace */ | 37 | /* memory slots that are not exposed to userspace */ |
| @@ -43,6 +43,8 @@ | |||
| 43 | #define KVM_PIO_PAGE_OFFSET 1 | 43 | #define KVM_PIO_PAGE_OFFSET 1 |
| 44 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 | 44 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 |
| 45 | 45 | ||
| 46 | #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS | ||
| 47 | |||
| 46 | #define CR0_RESERVED_BITS \ | 48 | #define CR0_RESERVED_BITS \ |
| 47 | (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | 49 | (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ |
| 48 | | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ | 50 | | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ |
| @@ -94,9 +96,6 @@ | |||
| 94 | 96 | ||
| 95 | #define ASYNC_PF_PER_VCPU 64 | 97 | #define ASYNC_PF_PER_VCPU 64 |
| 96 | 98 | ||
| 97 | extern raw_spinlock_t kvm_lock; | ||
| 98 | extern struct list_head vm_list; | ||
| 99 | |||
| 100 | struct kvm_vcpu; | 99 | struct kvm_vcpu; |
| 101 | struct kvm; | 100 | struct kvm; |
| 102 | struct kvm_async_pf; | 101 | struct kvm_async_pf; |
| @@ -230,6 +229,7 @@ struct kvm_mmu_page { | |||
| 230 | #endif | 229 | #endif |
| 231 | 230 | ||
| 232 | int write_flooding_count; | 231 | int write_flooding_count; |
| 232 | bool mmio_cached; | ||
| 233 | }; | 233 | }; |
| 234 | 234 | ||
| 235 | struct kvm_pio_request { | 235 | struct kvm_pio_request { |
| @@ -345,7 +345,6 @@ struct kvm_vcpu_arch { | |||
| 345 | unsigned long apic_attention; | 345 | unsigned long apic_attention; |
| 346 | int32_t apic_arb_prio; | 346 | int32_t apic_arb_prio; |
| 347 | int mp_state; | 347 | int mp_state; |
| 348 | int sipi_vector; | ||
| 349 | u64 ia32_misc_enable_msr; | 348 | u64 ia32_misc_enable_msr; |
| 350 | bool tpr_access_reporting; | 349 | bool tpr_access_reporting; |
| 351 | 350 | ||
| @@ -643,7 +642,7 @@ struct kvm_x86_ops { | |||
| 643 | /* Create, but do not attach this VCPU */ | 642 | /* Create, but do not attach this VCPU */ |
| 644 | struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); | 643 | struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); |
| 645 | void (*vcpu_free)(struct kvm_vcpu *vcpu); | 644 | void (*vcpu_free)(struct kvm_vcpu *vcpu); |
| 646 | int (*vcpu_reset)(struct kvm_vcpu *vcpu); | 645 | void (*vcpu_reset)(struct kvm_vcpu *vcpu); |
| 647 | 646 | ||
| 648 | void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); | 647 | void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); |
| 649 | void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); | 648 | void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); |
| @@ -696,14 +695,16 @@ struct kvm_x86_ops { | |||
| 696 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); | 695 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); |
| 697 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); | 696 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); |
| 698 | void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); | 697 | void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); |
| 699 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); | 698 | int (*enable_nmi_window)(struct kvm_vcpu *vcpu); |
| 700 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); | 699 | int (*enable_irq_window)(struct kvm_vcpu *vcpu); |
| 701 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); | 700 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); |
| 702 | int (*vm_has_apicv)(struct kvm *kvm); | 701 | int (*vm_has_apicv)(struct kvm *kvm); |
| 703 | void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); | 702 | void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); |
| 704 | void (*hwapic_isr_update)(struct kvm *kvm, int isr); | 703 | void (*hwapic_isr_update)(struct kvm *kvm, int isr); |
| 705 | void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); | 704 | void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); |
| 706 | void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); | 705 | void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); |
| 706 | void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); | ||
| 707 | void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); | ||
| 707 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); | 708 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); |
| 708 | int (*get_tdp_level)(void); | 709 | int (*get_tdp_level)(void); |
| 709 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); | 710 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); |
| @@ -730,6 +731,7 @@ struct kvm_x86_ops { | |||
| 730 | int (*check_intercept)(struct kvm_vcpu *vcpu, | 731 | int (*check_intercept)(struct kvm_vcpu *vcpu, |
| 731 | struct x86_instruction_info *info, | 732 | struct x86_instruction_info *info, |
| 732 | enum x86_intercept_stage stage); | 733 | enum x86_intercept_stage stage); |
| 734 | void (*handle_external_intr)(struct kvm_vcpu *vcpu); | ||
| 733 | }; | 735 | }; |
| 734 | 736 | ||
| 735 | struct kvm_arch_async_pf { | 737 | struct kvm_arch_async_pf { |
| @@ -767,6 +769,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, | |||
| 767 | struct kvm_memory_slot *slot, | 769 | struct kvm_memory_slot *slot, |
| 768 | gfn_t gfn_offset, unsigned long mask); | 770 | gfn_t gfn_offset, unsigned long mask); |
| 769 | void kvm_mmu_zap_all(struct kvm *kvm); | 771 | void kvm_mmu_zap_all(struct kvm *kvm); |
| 772 | void kvm_mmu_zap_mmio_sptes(struct kvm *kvm); | ||
| 770 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); | 773 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); |
| 771 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); | 774 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); |
| 772 | 775 | ||
| @@ -797,6 +800,7 @@ enum emulation_result { | |||
| 797 | #define EMULTYPE_TRAP_UD (1 << 1) | 800 | #define EMULTYPE_TRAP_UD (1 << 1) |
| 798 | #define EMULTYPE_SKIP (1 << 2) | 801 | #define EMULTYPE_SKIP (1 << 2) |
| 799 | #define EMULTYPE_RETRY (1 << 3) | 802 | #define EMULTYPE_RETRY (1 << 3) |
| 803 | #define EMULTYPE_NO_REEXECUTE (1 << 4) | ||
| 800 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, | 804 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, |
| 801 | int emulation_type, void *insn, int insn_len); | 805 | int emulation_type, void *insn, int insn_len); |
| 802 | 806 | ||
| @@ -807,6 +811,7 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 807 | } | 811 | } |
| 808 | 812 | ||
| 809 | void kvm_enable_efer_bits(u64); | 813 | void kvm_enable_efer_bits(u64); |
| 814 | bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); | ||
| 810 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); | 815 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); |
| 811 | int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); | 816 | int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); |
| 812 | 817 | ||
| @@ -819,6 +824,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); | |||
| 819 | 824 | ||
| 820 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); | 825 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
| 821 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); | 826 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); |
| 827 | void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector); | ||
| 822 | 828 | ||
| 823 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, | 829 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, |
| 824 | int reason, bool has_error_code, u32 error_code); | 830 | int reason, bool has_error_code, u32 error_code); |
| @@ -973,7 +979,6 @@ enum { | |||
| 973 | * Trap the fault and ignore the instruction if that happens. | 979 | * Trap the fault and ignore the instruction if that happens. |
| 974 | */ | 980 | */ |
| 975 | asmlinkage void kvm_spurious_fault(void); | 981 | asmlinkage void kvm_spurious_fault(void); |
| 976 | extern bool kvm_rebooting; | ||
| 977 | 982 | ||
| 978 | #define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ | 983 | #define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ |
| 979 | "666: " insn "\n\t" \ | 984 | "666: " insn "\n\t" \ |
| @@ -1002,6 +1007,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); | |||
| 1002 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); | 1007 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); |
| 1003 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); | 1008 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); |
| 1004 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); | 1009 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); |
| 1010 | void kvm_vcpu_reset(struct kvm_vcpu *vcpu); | ||
| 1005 | 1011 | ||
| 1006 | void kvm_define_shared_msr(unsigned index, u32 msr); | 1012 | void kvm_define_shared_msr(unsigned index, u32 msr); |
| 1007 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); | 1013 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); |
| @@ -1027,7 +1033,7 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu); | |||
| 1027 | void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu); | 1033 | void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu); |
| 1028 | bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr); | 1034 | bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr); |
| 1029 | int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); | 1035 | int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); |
| 1030 | int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data); | 1036 | int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); |
| 1031 | int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); | 1037 | int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); |
| 1032 | void kvm_handle_pmu_event(struct kvm_vcpu *vcpu); | 1038 | void kvm_handle_pmu_event(struct kvm_vcpu *vcpu); |
| 1033 | void kvm_deliver_pmi(struct kvm_vcpu *vcpu); | 1039 | void kvm_deliver_pmi(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index b6fbf860e398..f3e01a2cbaa1 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
| @@ -65,11 +65,16 @@ | |||
| 65 | #define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 | 65 | #define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 |
| 66 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 | 66 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 |
| 67 | #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 | 67 | #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 |
| 68 | #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 | ||
| 68 | 69 | ||
| 69 | 70 | ||
| 70 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 | 71 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 |
| 71 | #define PIN_BASED_NMI_EXITING 0x00000008 | 72 | #define PIN_BASED_NMI_EXITING 0x00000008 |
| 72 | #define PIN_BASED_VIRTUAL_NMIS 0x00000020 | 73 | #define PIN_BASED_VIRTUAL_NMIS 0x00000020 |
| 74 | #define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 | ||
| 75 | #define PIN_BASED_POSTED_INTR 0x00000080 | ||
| 76 | |||
| 77 | #define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 | ||
| 73 | 78 | ||
| 74 | #define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002 | 79 | #define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002 |
| 75 | #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 | 80 | #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 |
| @@ -81,6 +86,8 @@ | |||
| 81 | #define VM_EXIT_LOAD_IA32_EFER 0x00200000 | 86 | #define VM_EXIT_LOAD_IA32_EFER 0x00200000 |
| 82 | #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 | 87 | #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 |
| 83 | 88 | ||
| 89 | #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff | ||
| 90 | |||
| 84 | #define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000002 | 91 | #define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000002 |
| 85 | #define VM_ENTRY_IA32E_MODE 0x00000200 | 92 | #define VM_ENTRY_IA32E_MODE 0x00000200 |
| 86 | #define VM_ENTRY_SMM 0x00000400 | 93 | #define VM_ENTRY_SMM 0x00000400 |
| @@ -89,9 +96,15 @@ | |||
| 89 | #define VM_ENTRY_LOAD_IA32_PAT 0x00004000 | 96 | #define VM_ENTRY_LOAD_IA32_PAT 0x00004000 |
| 90 | #define VM_ENTRY_LOAD_IA32_EFER 0x00008000 | 97 | #define VM_ENTRY_LOAD_IA32_EFER 0x00008000 |
| 91 | 98 | ||
| 99 | #define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff | ||
| 100 | |||
| 101 | #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f | ||
| 102 | #define VMX_MISC_SAVE_EFER_LMA 0x00000020 | ||
| 103 | |||
| 92 | /* VMCS Encodings */ | 104 | /* VMCS Encodings */ |
| 93 | enum vmcs_field { | 105 | enum vmcs_field { |
| 94 | VIRTUAL_PROCESSOR_ID = 0x00000000, | 106 | VIRTUAL_PROCESSOR_ID = 0x00000000, |
| 107 | POSTED_INTR_NV = 0x00000002, | ||
| 95 | GUEST_ES_SELECTOR = 0x00000800, | 108 | GUEST_ES_SELECTOR = 0x00000800, |
| 96 | GUEST_CS_SELECTOR = 0x00000802, | 109 | GUEST_CS_SELECTOR = 0x00000802, |
| 97 | GUEST_SS_SELECTOR = 0x00000804, | 110 | GUEST_SS_SELECTOR = 0x00000804, |
| @@ -126,6 +139,8 @@ enum vmcs_field { | |||
| 126 | VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, | 139 | VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, |
| 127 | APIC_ACCESS_ADDR = 0x00002014, | 140 | APIC_ACCESS_ADDR = 0x00002014, |
| 128 | APIC_ACCESS_ADDR_HIGH = 0x00002015, | 141 | APIC_ACCESS_ADDR_HIGH = 0x00002015, |
| 142 | POSTED_INTR_DESC_ADDR = 0x00002016, | ||
| 143 | POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, | ||
| 129 | EPT_POINTER = 0x0000201a, | 144 | EPT_POINTER = 0x0000201a, |
| 130 | EPT_POINTER_HIGH = 0x0000201b, | 145 | EPT_POINTER_HIGH = 0x0000201b, |
| 131 | EOI_EXIT_BITMAP0 = 0x0000201c, | 146 | EOI_EXIT_BITMAP0 = 0x0000201c, |
| @@ -136,6 +151,8 @@ enum vmcs_field { | |||
| 136 | EOI_EXIT_BITMAP2_HIGH = 0x00002021, | 151 | EOI_EXIT_BITMAP2_HIGH = 0x00002021, |
| 137 | EOI_EXIT_BITMAP3 = 0x00002022, | 152 | EOI_EXIT_BITMAP3 = 0x00002022, |
| 138 | EOI_EXIT_BITMAP3_HIGH = 0x00002023, | 153 | EOI_EXIT_BITMAP3_HIGH = 0x00002023, |
| 154 | VMREAD_BITMAP = 0x00002026, | ||
| 155 | VMWRITE_BITMAP = 0x00002028, | ||
| 139 | GUEST_PHYSICAL_ADDRESS = 0x00002400, | 156 | GUEST_PHYSICAL_ADDRESS = 0x00002400, |
| 140 | GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, | 157 | GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, |
| 141 | VMCS_LINK_POINTER = 0x00002800, | 158 | VMCS_LINK_POINTER = 0x00002800, |
| @@ -209,6 +226,7 @@ enum vmcs_field { | |||
| 209 | GUEST_INTERRUPTIBILITY_INFO = 0x00004824, | 226 | GUEST_INTERRUPTIBILITY_INFO = 0x00004824, |
| 210 | GUEST_ACTIVITY_STATE = 0X00004826, | 227 | GUEST_ACTIVITY_STATE = 0X00004826, |
| 211 | GUEST_SYSENTER_CS = 0x0000482A, | 228 | GUEST_SYSENTER_CS = 0x0000482A, |
| 229 | VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, | ||
| 212 | HOST_IA32_SYSENTER_CS = 0x00004c00, | 230 | HOST_IA32_SYSENTER_CS = 0x00004c00, |
| 213 | CR0_GUEST_HOST_MASK = 0x00006000, | 231 | CR0_GUEST_HOST_MASK = 0x00006000, |
| 214 | CR4_GUEST_HOST_MASK = 0x00006002, | 232 | CR4_GUEST_HOST_MASK = 0x00006002, |
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index a65ec29e6ffb..5d9a3033b3d7 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h | |||
| @@ -29,7 +29,6 @@ | |||
| 29 | #define __KVM_HAVE_PIT | 29 | #define __KVM_HAVE_PIT |
| 30 | #define __KVM_HAVE_IOAPIC | 30 | #define __KVM_HAVE_IOAPIC |
| 31 | #define __KVM_HAVE_IRQ_LINE | 31 | #define __KVM_HAVE_IRQ_LINE |
| 32 | #define __KVM_HAVE_DEVICE_ASSIGNMENT | ||
| 33 | #define __KVM_HAVE_MSI | 32 | #define __KVM_HAVE_MSI |
| 34 | #define __KVM_HAVE_USER_NMI | 33 | #define __KVM_HAVE_USER_NMI |
| 35 | #define __KVM_HAVE_GUEST_DEBUG | 34 | #define __KVM_HAVE_GUEST_DEBUG |
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index b5757885d7a4..b3a4866661c5 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h | |||
| @@ -528,6 +528,8 @@ | |||
| 528 | #define VMX_BASIC_MEM_TYPE_WB 6LLU | 528 | #define VMX_BASIC_MEM_TYPE_WB 6LLU |
| 529 | #define VMX_BASIC_INOUT 0x0040000000000000LLU | 529 | #define VMX_BASIC_INOUT 0x0040000000000000LLU |
| 530 | 530 | ||
| 531 | /* MSR_IA32_VMX_MISC bits */ | ||
| 532 | #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) | ||
| 531 | /* AMD-V MSRs */ | 533 | /* AMD-V MSRs */ |
| 532 | 534 | ||
| 533 | #define MSR_VM_CR 0xc0010114 | 535 | #define MSR_VM_CR 0xc0010114 |
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 2871fccfee68..d651082c7cf7 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h | |||
| @@ -65,6 +65,7 @@ | |||
| 65 | #define EXIT_REASON_EOI_INDUCED 45 | 65 | #define EXIT_REASON_EOI_INDUCED 45 |
| 66 | #define EXIT_REASON_EPT_VIOLATION 48 | 66 | #define EXIT_REASON_EPT_VIOLATION 48 |
| 67 | #define EXIT_REASON_EPT_MISCONFIG 49 | 67 | #define EXIT_REASON_EPT_MISCONFIG 49 |
| 68 | #define EXIT_REASON_PREEMPTION_TIMER 52 | ||
| 68 | #define EXIT_REASON_WBINVD 54 | 69 | #define EXIT_REASON_WBINVD 54 |
| 69 | #define EXIT_REASON_XSETBV 55 | 70 | #define EXIT_REASON_XSETBV 55 |
| 70 | #define EXIT_REASON_APIC_WRITE 56 | 71 | #define EXIT_REASON_APIC_WRITE 56 |
| @@ -110,7 +111,7 @@ | |||
| 110 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ | 111 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ |
| 111 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ | 112 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ |
| 112 | { EXIT_REASON_INVD, "INVD" }, \ | 113 | { EXIT_REASON_INVD, "INVD" }, \ |
| 113 | { EXIT_REASON_INVPCID, "INVPCID" } | 114 | { EXIT_REASON_INVPCID, "INVPCID" }, \ |
| 114 | 115 | { EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" } | |
| 115 | 116 | ||
| 116 | #endif /* _UAPIVMX_H */ | 117 | #endif /* _UAPIVMX_H */ |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c1d01e6ca790..727208941030 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
| @@ -1166,6 +1166,11 @@ apicinterrupt LOCAL_TIMER_VECTOR \ | |||
| 1166 | apicinterrupt X86_PLATFORM_IPI_VECTOR \ | 1166 | apicinterrupt X86_PLATFORM_IPI_VECTOR \ |
| 1167 | x86_platform_ipi smp_x86_platform_ipi | 1167 | x86_platform_ipi smp_x86_platform_ipi |
| 1168 | 1168 | ||
| 1169 | #ifdef CONFIG_HAVE_KVM | ||
| 1170 | apicinterrupt POSTED_INTR_VECTOR \ | ||
| 1171 | kvm_posted_intr_ipi smp_kvm_posted_intr_ipi | ||
| 1172 | #endif | ||
| 1173 | |||
| 1169 | apicinterrupt THRESHOLD_APIC_VECTOR \ | 1174 | apicinterrupt THRESHOLD_APIC_VECTOR \ |
| 1170 | threshold_interrupt smp_threshold_interrupt | 1175 | threshold_interrupt smp_threshold_interrupt |
| 1171 | apicinterrupt THERMAL_APIC_VECTOR \ | 1176 | apicinterrupt THERMAL_APIC_VECTOR \ |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 84b778962c66..ac0631d8996f 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
| @@ -224,6 +224,28 @@ void smp_x86_platform_ipi(struct pt_regs *regs) | |||
| 224 | set_irq_regs(old_regs); | 224 | set_irq_regs(old_regs); |
| 225 | } | 225 | } |
| 226 | 226 | ||
| 227 | #ifdef CONFIG_HAVE_KVM | ||
| 228 | /* | ||
| 229 | * Handler for POSTED_INTERRUPT_VECTOR. | ||
| 230 | */ | ||
| 231 | void smp_kvm_posted_intr_ipi(struct pt_regs *regs) | ||
| 232 | { | ||
| 233 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
| 234 | |||
| 235 | ack_APIC_irq(); | ||
| 236 | |||
| 237 | irq_enter(); | ||
| 238 | |||
| 239 | exit_idle(); | ||
| 240 | |||
| 241 | inc_irq_stat(kvm_posted_intr_ipis); | ||
| 242 | |||
| 243 | irq_exit(); | ||
| 244 | |||
| 245 | set_irq_regs(old_regs); | ||
| 246 | } | ||
| 247 | #endif | ||
| 248 | |||
| 227 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); | 249 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); |
| 228 | 250 | ||
| 229 | #ifdef CONFIG_HOTPLUG_CPU | 251 | #ifdef CONFIG_HOTPLUG_CPU |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 7dc4e459c2b3..a2a1fbc594ff 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
| @@ -172,6 +172,10 @@ static void __init apic_intr_init(void) | |||
| 172 | 172 | ||
| 173 | /* IPI for X86 platform specific use */ | 173 | /* IPI for X86 platform specific use */ |
| 174 | alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); | 174 | alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); |
| 175 | #ifdef CONFIG_HAVE_KVM | ||
| 176 | /* IPI for KVM to deliver posted interrupt */ | ||
| 177 | alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi); | ||
| 178 | #endif | ||
| 175 | 179 | ||
| 176 | /* IPI vectors for APIC spurious and error interrupts */ | 180 | /* IPI vectors for APIC spurious and error interrupts */ |
| 177 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | 181 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 0732f0089a3d..d2c381280e3c 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
| @@ -160,8 +160,12 @@ int kvm_register_clock(char *txt) | |||
| 160 | { | 160 | { |
| 161 | int cpu = smp_processor_id(); | 161 | int cpu = smp_processor_id(); |
| 162 | int low, high, ret; | 162 | int low, high, ret; |
| 163 | struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti; | 163 | struct pvclock_vcpu_time_info *src; |
| 164 | |||
| 165 | if (!hv_clock) | ||
| 166 | return 0; | ||
| 164 | 167 | ||
| 168 | src = &hv_clock[cpu].pvti; | ||
| 165 | low = (int)slow_virt_to_phys(src) | 1; | 169 | low = (int)slow_virt_to_phys(src) | 1; |
| 166 | high = ((u64)slow_virt_to_phys(src) >> 32); | 170 | high = ((u64)slow_virt_to_phys(src) >> 32); |
| 167 | ret = native_write_msr_safe(msr_kvm_system_time, low, high); | 171 | ret = native_write_msr_safe(msr_kvm_system_time, low, high); |
| @@ -276,6 +280,9 @@ int __init kvm_setup_vsyscall_timeinfo(void) | |||
| 276 | struct pvclock_vcpu_time_info *vcpu_time; | 280 | struct pvclock_vcpu_time_info *vcpu_time; |
| 277 | unsigned int size; | 281 | unsigned int size; |
| 278 | 282 | ||
| 283 | if (!hv_clock) | ||
| 284 | return 0; | ||
| 285 | |||
| 279 | size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); | 286 | size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); |
| 280 | 287 | ||
| 281 | preempt_disable(); | 288 | preempt_disable(); |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 586f00059805..a47a3e54b964 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
| @@ -21,14 +21,13 @@ config KVM | |||
| 21 | tristate "Kernel-based Virtual Machine (KVM) support" | 21 | tristate "Kernel-based Virtual Machine (KVM) support" |
| 22 | depends on HAVE_KVM | 22 | depends on HAVE_KVM |
| 23 | depends on HIGH_RES_TIMERS | 23 | depends on HIGH_RES_TIMERS |
| 24 | # for device assignment: | ||
| 25 | depends on PCI | ||
| 26 | # for TASKSTATS/TASK_DELAY_ACCT: | 24 | # for TASKSTATS/TASK_DELAY_ACCT: |
| 27 | depends on NET | 25 | depends on NET |
| 28 | select PREEMPT_NOTIFIERS | 26 | select PREEMPT_NOTIFIERS |
| 29 | select MMU_NOTIFIER | 27 | select MMU_NOTIFIER |
| 30 | select ANON_INODES | 28 | select ANON_INODES |
| 31 | select HAVE_KVM_IRQCHIP | 29 | select HAVE_KVM_IRQCHIP |
| 30 | select HAVE_KVM_IRQ_ROUTING | ||
| 32 | select HAVE_KVM_EVENTFD | 31 | select HAVE_KVM_EVENTFD |
| 33 | select KVM_APIC_ARCHITECTURE | 32 | select KVM_APIC_ARCHITECTURE |
| 34 | select KVM_ASYNC_PF | 33 | select KVM_ASYNC_PF |
| @@ -82,6 +81,17 @@ config KVM_MMU_AUDIT | |||
| 82 | This option adds a R/W kVM module parameter 'mmu_audit', which allows | 81 | This option adds a R/W kVM module parameter 'mmu_audit', which allows |
| 83 | audit KVM MMU at runtime. | 82 | audit KVM MMU at runtime. |
| 84 | 83 | ||
| 84 | config KVM_DEVICE_ASSIGNMENT | ||
| 85 | bool "KVM legacy PCI device assignment support" | ||
| 86 | depends on KVM && PCI && IOMMU_API | ||
| 87 | default y | ||
| 88 | ---help--- | ||
| 89 | Provide support for legacy PCI device assignment through KVM. The | ||
| 90 | kernel now also supports a full featured userspace device driver | ||
| 91 | framework through VFIO, which supersedes much of this support. | ||
| 92 | |||
| 93 | If unsure, say Y. | ||
| 94 | |||
| 85 | # OK, it's a little counter-intuitive to do this, but it puts it neatly under | 95 | # OK, it's a little counter-intuitive to do this, but it puts it neatly under |
| 86 | # the virtualization menu. | 96 | # the virtualization menu. |
| 87 | source drivers/vhost/Kconfig | 97 | source drivers/vhost/Kconfig |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 04d30401c5cb..d609e1d84048 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
| @@ -7,8 +7,9 @@ CFLAGS_vmx.o := -I. | |||
| 7 | 7 | ||
| 8 | kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ | 8 | kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ |
| 9 | coalesced_mmio.o irq_comm.o eventfd.o \ | 9 | coalesced_mmio.o irq_comm.o eventfd.o \ |
| 10 | assigned-dev.o) | 10 | irqchip.o) |
| 11 | kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) | 11 | kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(addprefix ../../../virt/kvm/, \ |
| 12 | assigned-dev.o iommu.o) | ||
| 12 | kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) | 13 | kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) |
| 13 | 14 | ||
| 14 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ | 15 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a335cc6cde72..8e517bba6a7c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
| @@ -132,8 +132,9 @@ | |||
| 132 | #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ | 132 | #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ |
| 133 | #define No64 (1<<28) | 133 | #define No64 (1<<28) |
| 134 | #define PageTable (1 << 29) /* instruction used to write page table */ | 134 | #define PageTable (1 << 29) /* instruction used to write page table */ |
| 135 | #define NotImpl (1 << 30) /* instruction is not implemented */ | ||
| 135 | /* Source 2 operand type */ | 136 | /* Source 2 operand type */ |
| 136 | #define Src2Shift (30) | 137 | #define Src2Shift (31) |
| 137 | #define Src2None (OpNone << Src2Shift) | 138 | #define Src2None (OpNone << Src2Shift) |
| 138 | #define Src2CL (OpCL << Src2Shift) | 139 | #define Src2CL (OpCL << Src2Shift) |
| 139 | #define Src2ImmByte (OpImmByte << Src2Shift) | 140 | #define Src2ImmByte (OpImmByte << Src2Shift) |
| @@ -1578,12 +1579,21 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
| 1578 | 1579 | ||
| 1579 | memset(&seg_desc, 0, sizeof seg_desc); | 1580 | memset(&seg_desc, 0, sizeof seg_desc); |
| 1580 | 1581 | ||
| 1581 | if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) | 1582 | if (ctxt->mode == X86EMUL_MODE_REAL) { |
| 1582 | || ctxt->mode == X86EMUL_MODE_REAL) { | 1583 | /* set real mode segment descriptor (keep limit etc. for |
| 1583 | /* set real mode segment descriptor */ | 1584 | * unreal mode) */ |
| 1584 | ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg); | 1585 | ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg); |
| 1585 | set_desc_base(&seg_desc, selector << 4); | 1586 | set_desc_base(&seg_desc, selector << 4); |
| 1586 | goto load; | 1587 | goto load; |
| 1588 | } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) { | ||
| 1589 | /* VM86 needs a clean new segment descriptor */ | ||
| 1590 | set_desc_base(&seg_desc, selector << 4); | ||
| 1591 | set_desc_limit(&seg_desc, 0xffff); | ||
| 1592 | seg_desc.type = 3; | ||
| 1593 | seg_desc.p = 1; | ||
| 1594 | seg_desc.s = 1; | ||
| 1595 | seg_desc.dpl = 3; | ||
| 1596 | goto load; | ||
| 1587 | } | 1597 | } |
| 1588 | 1598 | ||
| 1589 | rpl = selector & 3; | 1599 | rpl = selector & 3; |
| @@ -3615,7 +3625,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) | |||
| 3615 | #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } | 3625 | #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } |
| 3616 | #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ | 3626 | #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ |
| 3617 | .check_perm = (_p) } | 3627 | .check_perm = (_p) } |
| 3618 | #define N D(0) | 3628 | #define N D(NotImpl) |
| 3619 | #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } | 3629 | #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } |
| 3620 | #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } | 3630 | #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } |
| 3621 | #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } | 3631 | #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } |
| @@ -3713,7 +3723,7 @@ static const struct opcode group5[] = { | |||
| 3713 | I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), | 3723 | I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), |
| 3714 | I(SrcMem | Stack, em_grp45), | 3724 | I(SrcMem | Stack, em_grp45), |
| 3715 | I(SrcMemFAddr | ImplicitOps, em_grp45), | 3725 | I(SrcMemFAddr | ImplicitOps, em_grp45), |
| 3716 | I(SrcMem | Stack, em_grp45), N, | 3726 | I(SrcMem | Stack, em_grp45), D(Undefined), |
| 3717 | }; | 3727 | }; |
| 3718 | 3728 | ||
| 3719 | static const struct opcode group6[] = { | 3729 | static const struct opcode group6[] = { |
| @@ -4162,6 +4172,10 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
| 4162 | break; | 4172 | break; |
| 4163 | case OpMem8: | 4173 | case OpMem8: |
| 4164 | ctxt->memop.bytes = 1; | 4174 | ctxt->memop.bytes = 1; |
| 4175 | if (ctxt->memop.type == OP_REG) { | ||
| 4176 | ctxt->memop.addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1); | ||
| 4177 | fetch_register_operand(&ctxt->memop); | ||
| 4178 | } | ||
| 4165 | goto mem_common; | 4179 | goto mem_common; |
| 4166 | case OpMem16: | 4180 | case OpMem16: |
| 4167 | ctxt->memop.bytes = 2; | 4181 | ctxt->memop.bytes = 2; |
| @@ -4373,7 +4387,7 @@ done_prefixes: | |||
| 4373 | ctxt->intercept = opcode.intercept; | 4387 | ctxt->intercept = opcode.intercept; |
| 4374 | 4388 | ||
| 4375 | /* Unrecognised? */ | 4389 | /* Unrecognised? */ |
| 4376 | if (ctxt->d == 0 || (ctxt->d & Undefined)) | 4390 | if (ctxt->d == 0 || (ctxt->d & NotImpl)) |
| 4377 | return EMULATION_FAILED; | 4391 | return EMULATION_FAILED; |
| 4378 | 4392 | ||
| 4379 | if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn) | 4393 | if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn) |
| @@ -4511,7 +4525,8 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
| 4511 | 4525 | ||
| 4512 | ctxt->mem_read.pos = 0; | 4526 | ctxt->mem_read.pos = 0; |
| 4513 | 4527 | ||
| 4514 | if (ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) { | 4528 | if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) || |
| 4529 | (ctxt->d & Undefined)) { | ||
| 4515 | rc = emulate_ud(ctxt); | 4530 | rc = emulate_ud(ctxt); |
| 4516 | goto done; | 4531 | goto done; |
| 4517 | } | 4532 | } |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index c1d30b2fc9bb..412a5aa0ef94 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
| @@ -290,8 +290,8 @@ static void pit_do_work(struct kthread_work *work) | |||
| 290 | } | 290 | } |
| 291 | spin_unlock(&ps->inject_lock); | 291 | spin_unlock(&ps->inject_lock); |
| 292 | if (inject) { | 292 | if (inject) { |
| 293 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); | 293 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false); |
| 294 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); | 294 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false); |
| 295 | 295 | ||
| 296 | /* | 296 | /* |
| 297 | * Provides NMI watchdog support via Virtual Wire mode. | 297 | * Provides NMI watchdog support via Virtual Wire mode. |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index f77df1c5de6e..e1adbb4aca75 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
| @@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void *bitmap) | |||
| 94 | return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | 94 | return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); |
| 95 | } | 95 | } |
| 96 | 96 | ||
| 97 | bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector) | ||
| 98 | { | ||
| 99 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
| 100 | |||
| 101 | return apic_test_vector(vector, apic->regs + APIC_ISR) || | ||
| 102 | apic_test_vector(vector, apic->regs + APIC_IRR); | ||
| 103 | } | ||
| 104 | |||
| 97 | static inline void apic_set_vector(int vec, void *bitmap) | 105 | static inline void apic_set_vector(int vec, void *bitmap) |
| 98 | { | 106 | { |
| 99 | set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | 107 | set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); |
| @@ -145,53 +153,6 @@ static inline int kvm_apic_id(struct kvm_lapic *apic) | |||
| 145 | return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; | 153 | return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; |
| 146 | } | 154 | } |
| 147 | 155 | ||
| 148 | void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, | ||
| 149 | struct kvm_lapic_irq *irq, | ||
| 150 | u64 *eoi_exit_bitmap) | ||
| 151 | { | ||
| 152 | struct kvm_lapic **dst; | ||
| 153 | struct kvm_apic_map *map; | ||
| 154 | unsigned long bitmap = 1; | ||
| 155 | int i; | ||
| 156 | |||
| 157 | rcu_read_lock(); | ||
| 158 | map = rcu_dereference(vcpu->kvm->arch.apic_map); | ||
| 159 | |||
| 160 | if (unlikely(!map)) { | ||
| 161 | __set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap); | ||
| 162 | goto out; | ||
| 163 | } | ||
| 164 | |||
| 165 | if (irq->dest_mode == 0) { /* physical mode */ | ||
| 166 | if (irq->delivery_mode == APIC_DM_LOWEST || | ||
| 167 | irq->dest_id == 0xff) { | ||
| 168 | __set_bit(irq->vector, | ||
| 169 | (unsigned long *)eoi_exit_bitmap); | ||
| 170 | goto out; | ||
| 171 | } | ||
| 172 | dst = &map->phys_map[irq->dest_id & 0xff]; | ||
| 173 | } else { | ||
| 174 | u32 mda = irq->dest_id << (32 - map->ldr_bits); | ||
| 175 | |||
| 176 | dst = map->logical_map[apic_cluster_id(map, mda)]; | ||
| 177 | |||
| 178 | bitmap = apic_logical_id(map, mda); | ||
| 179 | } | ||
| 180 | |||
| 181 | for_each_set_bit(i, &bitmap, 16) { | ||
| 182 | if (!dst[i]) | ||
| 183 | continue; | ||
| 184 | if (dst[i]->vcpu == vcpu) { | ||
| 185 | __set_bit(irq->vector, | ||
| 186 | (unsigned long *)eoi_exit_bitmap); | ||
| 187 | break; | ||
| 188 | } | ||
| 189 | } | ||
| 190 | |||
| 191 | out: | ||
| 192 | rcu_read_unlock(); | ||
| 193 | } | ||
| 194 | |||
| 195 | static void recalculate_apic_map(struct kvm *kvm) | 156 | static void recalculate_apic_map(struct kvm *kvm) |
| 196 | { | 157 | { |
| 197 | struct kvm_apic_map *new, *old = NULL; | 158 | struct kvm_apic_map *new, *old = NULL; |
| @@ -256,7 +217,7 @@ out: | |||
| 256 | if (old) | 217 | if (old) |
| 257 | kfree_rcu(old, rcu); | 218 | kfree_rcu(old, rcu); |
| 258 | 219 | ||
| 259 | kvm_ioapic_make_eoibitmap_request(kvm); | 220 | kvm_vcpu_request_scan_ioapic(kvm); |
| 260 | } | 221 | } |
| 261 | 222 | ||
| 262 | static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) | 223 | static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) |
| @@ -357,6 +318,19 @@ static u8 count_vectors(void *bitmap) | |||
| 357 | return count; | 318 | return count; |
| 358 | } | 319 | } |
| 359 | 320 | ||
| 321 | void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) | ||
| 322 | { | ||
| 323 | u32 i, pir_val; | ||
| 324 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
| 325 | |||
| 326 | for (i = 0; i <= 7; i++) { | ||
| 327 | pir_val = xchg(&pir[i], 0); | ||
| 328 | if (pir_val) | ||
| 329 | *((u32 *)(apic->regs + APIC_IRR + i * 0x10)) |= pir_val; | ||
| 330 | } | ||
| 331 | } | ||
| 332 | EXPORT_SYMBOL_GPL(kvm_apic_update_irr); | ||
| 333 | |||
| 360 | static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) | 334 | static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) |
| 361 | { | 335 | { |
| 362 | apic->irr_pending = true; | 336 | apic->irr_pending = true; |
| @@ -379,6 +353,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) | |||
| 379 | if (!apic->irr_pending) | 353 | if (!apic->irr_pending) |
| 380 | return -1; | 354 | return -1; |
| 381 | 355 | ||
| 356 | kvm_x86_ops->sync_pir_to_irr(apic->vcpu); | ||
| 382 | result = apic_search_irr(apic); | 357 | result = apic_search_irr(apic); |
| 383 | ASSERT(result == -1 || result >= 16); | 358 | ASSERT(result == -1 || result >= 16); |
| 384 | 359 | ||
| @@ -431,14 +406,16 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) | |||
| 431 | } | 406 | } |
| 432 | 407 | ||
| 433 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | 408 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, |
| 434 | int vector, int level, int trig_mode); | 409 | int vector, int level, int trig_mode, |
| 410 | unsigned long *dest_map); | ||
| 435 | 411 | ||
| 436 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) | 412 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, |
| 413 | unsigned long *dest_map) | ||
| 437 | { | 414 | { |
| 438 | struct kvm_lapic *apic = vcpu->arch.apic; | 415 | struct kvm_lapic *apic = vcpu->arch.apic; |
| 439 | 416 | ||
| 440 | return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, | 417 | return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, |
| 441 | irq->level, irq->trig_mode); | 418 | irq->level, irq->trig_mode, dest_map); |
| 442 | } | 419 | } |
| 443 | 420 | ||
| 444 | static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) | 421 | static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) |
| @@ -505,6 +482,15 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic) | |||
| 505 | return result; | 482 | return result; |
| 506 | } | 483 | } |
| 507 | 484 | ||
| 485 | void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr) | ||
| 486 | { | ||
| 487 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
| 488 | int i; | ||
| 489 | |||
| 490 | for (i = 0; i < 8; i++) | ||
| 491 | apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]); | ||
| 492 | } | ||
| 493 | |||
| 508 | static void apic_update_ppr(struct kvm_lapic *apic) | 494 | static void apic_update_ppr(struct kvm_lapic *apic) |
| 509 | { | 495 | { |
| 510 | u32 tpr, isrv, ppr, old_ppr; | 496 | u32 tpr, isrv, ppr, old_ppr; |
| @@ -611,7 +597,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | |||
| 611 | } | 597 | } |
| 612 | 598 | ||
| 613 | bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | 599 | bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, |
| 614 | struct kvm_lapic_irq *irq, int *r) | 600 | struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map) |
| 615 | { | 601 | { |
| 616 | struct kvm_apic_map *map; | 602 | struct kvm_apic_map *map; |
| 617 | unsigned long bitmap = 1; | 603 | unsigned long bitmap = 1; |
| @@ -622,7 +608,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | |||
| 622 | *r = -1; | 608 | *r = -1; |
| 623 | 609 | ||
| 624 | if (irq->shorthand == APIC_DEST_SELF) { | 610 | if (irq->shorthand == APIC_DEST_SELF) { |
| 625 | *r = kvm_apic_set_irq(src->vcpu, irq); | 611 | *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); |
| 626 | return true; | 612 | return true; |
| 627 | } | 613 | } |
| 628 | 614 | ||
| @@ -667,7 +653,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | |||
| 667 | continue; | 653 | continue; |
| 668 | if (*r < 0) | 654 | if (*r < 0) |
| 669 | *r = 0; | 655 | *r = 0; |
| 670 | *r += kvm_apic_set_irq(dst[i]->vcpu, irq); | 656 | *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); |
| 671 | } | 657 | } |
| 672 | 658 | ||
| 673 | ret = true; | 659 | ret = true; |
| @@ -681,7 +667,8 @@ out: | |||
| 681 | * Return 1 if successfully added and 0 if discarded. | 667 | * Return 1 if successfully added and 0 if discarded. |
| 682 | */ | 668 | */ |
| 683 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | 669 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, |
| 684 | int vector, int level, int trig_mode) | 670 | int vector, int level, int trig_mode, |
| 671 | unsigned long *dest_map) | ||
| 685 | { | 672 | { |
| 686 | int result = 0; | 673 | int result = 0; |
| 687 | struct kvm_vcpu *vcpu = apic->vcpu; | 674 | struct kvm_vcpu *vcpu = apic->vcpu; |
| @@ -694,24 +681,28 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
| 694 | if (unlikely(!apic_enabled(apic))) | 681 | if (unlikely(!apic_enabled(apic))) |
| 695 | break; | 682 | break; |
| 696 | 683 | ||
| 697 | if (trig_mode) { | 684 | if (dest_map) |
| 698 | apic_debug("level trig mode for vector %d", vector); | 685 | __set_bit(vcpu->vcpu_id, dest_map); |
| 699 | apic_set_vector(vector, apic->regs + APIC_TMR); | ||
| 700 | } else | ||
| 701 | apic_clear_vector(vector, apic->regs + APIC_TMR); | ||
| 702 | 686 | ||
| 703 | result = !apic_test_and_set_irr(vector, apic); | 687 | if (kvm_x86_ops->deliver_posted_interrupt) { |
| 704 | trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, | 688 | result = 1; |
| 705 | trig_mode, vector, !result); | 689 | kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); |
| 706 | if (!result) { | 690 | } else { |
| 707 | if (trig_mode) | 691 | result = !apic_test_and_set_irr(vector, apic); |
| 708 | apic_debug("level trig mode repeatedly for " | ||
| 709 | "vector %d", vector); | ||
| 710 | break; | ||
| 711 | } | ||
| 712 | 692 | ||
| 713 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 693 | if (!result) { |
| 714 | kvm_vcpu_kick(vcpu); | 694 | if (trig_mode) |
| 695 | apic_debug("level trig mode repeatedly " | ||
| 696 | "for vector %d", vector); | ||
| 697 | goto out; | ||
| 698 | } | ||
| 699 | |||
| 700 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
| 701 | kvm_vcpu_kick(vcpu); | ||
| 702 | } | ||
| 703 | out: | ||
| 704 | trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, | ||
| 705 | trig_mode, vector, !result); | ||
| 715 | break; | 706 | break; |
| 716 | 707 | ||
| 717 | case APIC_DM_REMRD: | 708 | case APIC_DM_REMRD: |
| @@ -731,7 +722,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
| 731 | case APIC_DM_INIT: | 722 | case APIC_DM_INIT: |
| 732 | if (!trig_mode || level) { | 723 | if (!trig_mode || level) { |
| 733 | result = 1; | 724 | result = 1; |
| 734 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; | 725 | /* assumes that there are only KVM_APIC_INIT/SIPI */ |
| 726 | apic->pending_events = (1UL << KVM_APIC_INIT); | ||
| 727 | /* make sure pending_events is visible before sending | ||
| 728 | * the request */ | ||
| 729 | smp_wmb(); | ||
| 735 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 730 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
| 736 | kvm_vcpu_kick(vcpu); | 731 | kvm_vcpu_kick(vcpu); |
| 737 | } else { | 732 | } else { |
| @@ -743,13 +738,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
| 743 | case APIC_DM_STARTUP: | 738 | case APIC_DM_STARTUP: |
| 744 | apic_debug("SIPI to vcpu %d vector 0x%02x\n", | 739 | apic_debug("SIPI to vcpu %d vector 0x%02x\n", |
| 745 | vcpu->vcpu_id, vector); | 740 | vcpu->vcpu_id, vector); |
| 746 | if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { | 741 | result = 1; |
| 747 | result = 1; | 742 | apic->sipi_vector = vector; |
| 748 | vcpu->arch.sipi_vector = vector; | 743 | /* make sure sipi_vector is visible for the receiver */ |
| 749 | vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; | 744 | smp_wmb(); |
| 750 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 745 | set_bit(KVM_APIC_SIPI, &apic->pending_events); |
| 751 | kvm_vcpu_kick(vcpu); | 746 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
| 752 | } | 747 | kvm_vcpu_kick(vcpu); |
| 753 | break; | 748 | break; |
| 754 | 749 | ||
| 755 | case APIC_DM_EXTINT: | 750 | case APIC_DM_EXTINT: |
| @@ -782,7 +777,7 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) | |||
| 782 | trigger_mode = IOAPIC_LEVEL_TRIG; | 777 | trigger_mode = IOAPIC_LEVEL_TRIG; |
| 783 | else | 778 | else |
| 784 | trigger_mode = IOAPIC_EDGE_TRIG; | 779 | trigger_mode = IOAPIC_EDGE_TRIG; |
| 785 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); | 780 | kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); |
| 786 | } | 781 | } |
| 787 | } | 782 | } |
| 788 | 783 | ||
| @@ -848,7 +843,7 @@ static void apic_send_ipi(struct kvm_lapic *apic) | |||
| 848 | irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, | 843 | irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, |
| 849 | irq.vector); | 844 | irq.vector); |
| 850 | 845 | ||
| 851 | kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); | 846 | kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); |
| 852 | } | 847 | } |
| 853 | 848 | ||
| 854 | static u32 apic_get_tmcct(struct kvm_lapic *apic) | 849 | static u32 apic_get_tmcct(struct kvm_lapic *apic) |
| @@ -1484,7 +1479,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) | |||
| 1484 | vector = reg & APIC_VECTOR_MASK; | 1479 | vector = reg & APIC_VECTOR_MASK; |
| 1485 | mode = reg & APIC_MODE_MASK; | 1480 | mode = reg & APIC_MODE_MASK; |
| 1486 | trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; | 1481 | trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; |
| 1487 | return __apic_accept_irq(apic, mode, vector, 1, trig_mode); | 1482 | return __apic_accept_irq(apic, mode, vector, 1, trig_mode, |
| 1483 | NULL); | ||
| 1488 | } | 1484 | } |
| 1489 | return 0; | 1485 | return 0; |
| 1490 | } | 1486 | } |
| @@ -1654,6 +1650,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, | |||
| 1654 | apic->highest_isr_cache = -1; | 1650 | apic->highest_isr_cache = -1; |
| 1655 | kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); | 1651 | kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); |
| 1656 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 1652 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
| 1653 | kvm_rtc_eoi_tracking_restore_one(vcpu); | ||
| 1657 | } | 1654 | } |
| 1658 | 1655 | ||
| 1659 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) | 1656 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) |
| @@ -1860,6 +1857,34 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) | |||
| 1860 | addr, sizeof(u8)); | 1857 | addr, sizeof(u8)); |
| 1861 | } | 1858 | } |
| 1862 | 1859 | ||
| 1860 | void kvm_apic_accept_events(struct kvm_vcpu *vcpu) | ||
| 1861 | { | ||
| 1862 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
| 1863 | unsigned int sipi_vector; | ||
| 1864 | |||
| 1865 | if (!kvm_vcpu_has_lapic(vcpu)) | ||
| 1866 | return; | ||
| 1867 | |||
| 1868 | if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) { | ||
| 1869 | kvm_lapic_reset(vcpu); | ||
| 1870 | kvm_vcpu_reset(vcpu); | ||
| 1871 | if (kvm_vcpu_is_bsp(apic->vcpu)) | ||
| 1872 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
| 1873 | else | ||
| 1874 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; | ||
| 1875 | } | ||
| 1876 | if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events) && | ||
| 1877 | vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { | ||
| 1878 | /* evaluate pending_events before reading the vector */ | ||
| 1879 | smp_rmb(); | ||
| 1880 | sipi_vector = apic->sipi_vector; | ||
| 1881 | pr_debug("vcpu %d received sipi with vector # %x\n", | ||
| 1882 | vcpu->vcpu_id, sipi_vector); | ||
| 1883 | kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); | ||
| 1884 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
| 1885 | } | ||
| 1886 | } | ||
| 1887 | |||
| 1863 | void kvm_lapic_init(void) | 1888 | void kvm_lapic_init(void) |
| 1864 | { | 1889 | { |
| 1865 | /* do not patch jump label more than once per second */ | 1890 | /* do not patch jump label more than once per second */ |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 1676d34ddb4e..c730ac9fe801 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
| @@ -5,6 +5,9 @@ | |||
| 5 | 5 | ||
| 6 | #include <linux/kvm_host.h> | 6 | #include <linux/kvm_host.h> |
| 7 | 7 | ||
| 8 | #define KVM_APIC_INIT 0 | ||
| 9 | #define KVM_APIC_SIPI 1 | ||
| 10 | |||
| 8 | struct kvm_timer { | 11 | struct kvm_timer { |
| 9 | struct hrtimer timer; | 12 | struct hrtimer timer; |
| 10 | s64 period; /* unit: ns */ | 13 | s64 period; /* unit: ns */ |
| @@ -32,6 +35,8 @@ struct kvm_lapic { | |||
| 32 | void *regs; | 35 | void *regs; |
| 33 | gpa_t vapic_addr; | 36 | gpa_t vapic_addr; |
| 34 | struct page *vapic_page; | 37 | struct page *vapic_page; |
| 38 | unsigned long pending_events; | ||
| 39 | unsigned int sipi_vector; | ||
| 35 | }; | 40 | }; |
| 36 | int kvm_create_lapic(struct kvm_vcpu *vcpu); | 41 | int kvm_create_lapic(struct kvm_vcpu *vcpu); |
| 37 | void kvm_free_lapic(struct kvm_vcpu *vcpu); | 42 | void kvm_free_lapic(struct kvm_vcpu *vcpu); |
| @@ -39,6 +44,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu); | |||
| 39 | int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); | 44 | int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); |
| 40 | int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); | 45 | int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); |
| 41 | int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); | 46 | int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); |
| 47 | void kvm_apic_accept_events(struct kvm_vcpu *vcpu); | ||
| 42 | void kvm_lapic_reset(struct kvm_vcpu *vcpu); | 48 | void kvm_lapic_reset(struct kvm_vcpu *vcpu); |
| 43 | u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); | 49 | u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); |
| 44 | void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); | 50 | void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); |
| @@ -47,13 +53,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); | |||
| 47 | u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); | 53 | u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); |
| 48 | void kvm_apic_set_version(struct kvm_vcpu *vcpu); | 54 | void kvm_apic_set_version(struct kvm_vcpu *vcpu); |
| 49 | 55 | ||
| 56 | void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr); | ||
| 57 | void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); | ||
| 50 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); | 58 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); |
| 51 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); | 59 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); |
| 52 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); | 60 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, |
| 61 | unsigned long *dest_map); | ||
| 53 | int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); | 62 | int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); |
| 54 | 63 | ||
| 55 | bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | 64 | bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, |
| 56 | struct kvm_lapic_irq *irq, int *r); | 65 | struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map); |
| 57 | 66 | ||
| 58 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); | 67 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); |
| 59 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); | 68 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); |
| @@ -154,8 +163,11 @@ static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) | |||
| 154 | return ldr & map->lid_mask; | 163 | return ldr & map->lid_mask; |
| 155 | } | 164 | } |
| 156 | 165 | ||
| 157 | void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, | 166 | static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) |
| 158 | struct kvm_lapic_irq *irq, | 167 | { |
| 159 | u64 *eoi_bitmap); | 168 | return vcpu->arch.apic->pending_events; |
| 169 | } | ||
| 170 | |||
| 171 | bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); | ||
| 160 | 172 | ||
| 161 | #endif | 173 | #endif |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 956ca358108a..004cc87b781c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
| @@ -199,8 +199,11 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); | |||
| 199 | 199 | ||
| 200 | static void mark_mmio_spte(u64 *sptep, u64 gfn, unsigned access) | 200 | static void mark_mmio_spte(u64 *sptep, u64 gfn, unsigned access) |
| 201 | { | 201 | { |
| 202 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); | ||
| 203 | |||
| 202 | access &= ACC_WRITE_MASK | ACC_USER_MASK; | 204 | access &= ACC_WRITE_MASK | ACC_USER_MASK; |
| 203 | 205 | ||
| 206 | sp->mmio_cached = true; | ||
| 204 | trace_mark_mmio_spte(sptep, gfn, access); | 207 | trace_mark_mmio_spte(sptep, gfn, access); |
| 205 | mmu_spte_set(sptep, shadow_mmio_mask | access | gfn << PAGE_SHIFT); | 208 | mmu_spte_set(sptep, shadow_mmio_mask | access | gfn << PAGE_SHIFT); |
| 206 | } | 209 | } |
| @@ -1502,6 +1505,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
| 1502 | u64 *parent_pte, int direct) | 1505 | u64 *parent_pte, int direct) |
| 1503 | { | 1506 | { |
| 1504 | struct kvm_mmu_page *sp; | 1507 | struct kvm_mmu_page *sp; |
| 1508 | |||
| 1505 | sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); | 1509 | sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); |
| 1506 | sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); | 1510 | sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); |
| 1507 | if (!direct) | 1511 | if (!direct) |
| @@ -1644,16 +1648,14 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
| 1644 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | 1648 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, |
| 1645 | struct list_head *invalid_list); | 1649 | struct list_head *invalid_list); |
| 1646 | 1650 | ||
| 1647 | #define for_each_gfn_sp(kvm, sp, gfn) \ | 1651 | #define for_each_gfn_sp(_kvm, _sp, _gfn) \ |
| 1648 | hlist_for_each_entry(sp, \ | 1652 | hlist_for_each_entry(_sp, \ |
| 1649 | &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ | 1653 | &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ |
| 1650 | if ((sp)->gfn != (gfn)) {} else | 1654 | if ((_sp)->gfn != (_gfn)) {} else |
| 1651 | 1655 | ||
| 1652 | #define for_each_gfn_indirect_valid_sp(kvm, sp, gfn) \ | 1656 | #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \ |
| 1653 | hlist_for_each_entry(sp, \ | 1657 | for_each_gfn_sp(_kvm, _sp, _gfn) \ |
| 1654 | &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ | 1658 | if ((_sp)->role.direct || (_sp)->role.invalid) {} else |
| 1655 | if ((sp)->gfn != (gfn) || (sp)->role.direct || \ | ||
| 1656 | (sp)->role.invalid) {} else | ||
| 1657 | 1659 | ||
| 1658 | /* @sp->gfn should be write-protected at the call site */ | 1660 | /* @sp->gfn should be write-protected at the call site */ |
| 1659 | static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 1661 | static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
| @@ -2089,7 +2091,7 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
| 2089 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | 2091 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, |
| 2090 | struct list_head *invalid_list) | 2092 | struct list_head *invalid_list) |
| 2091 | { | 2093 | { |
| 2092 | struct kvm_mmu_page *sp; | 2094 | struct kvm_mmu_page *sp, *nsp; |
| 2093 | 2095 | ||
| 2094 | if (list_empty(invalid_list)) | 2096 | if (list_empty(invalid_list)) |
| 2095 | return; | 2097 | return; |
| @@ -2106,11 +2108,25 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, | |||
| 2106 | */ | 2108 | */ |
| 2107 | kvm_flush_remote_tlbs(kvm); | 2109 | kvm_flush_remote_tlbs(kvm); |
| 2108 | 2110 | ||
| 2109 | do { | 2111 | list_for_each_entry_safe(sp, nsp, invalid_list, link) { |
| 2110 | sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); | ||
| 2111 | WARN_ON(!sp->role.invalid || sp->root_count); | 2112 | WARN_ON(!sp->role.invalid || sp->root_count); |
| 2112 | kvm_mmu_free_page(sp); | 2113 | kvm_mmu_free_page(sp); |
| 2113 | } while (!list_empty(invalid_list)); | 2114 | } |
| 2115 | } | ||
| 2116 | |||
| 2117 | static bool prepare_zap_oldest_mmu_page(struct kvm *kvm, | ||
| 2118 | struct list_head *invalid_list) | ||
| 2119 | { | ||
| 2120 | struct kvm_mmu_page *sp; | ||
| 2121 | |||
| 2122 | if (list_empty(&kvm->arch.active_mmu_pages)) | ||
| 2123 | return false; | ||
| 2124 | |||
| 2125 | sp = list_entry(kvm->arch.active_mmu_pages.prev, | ||
| 2126 | struct kvm_mmu_page, link); | ||
| 2127 | kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); | ||
| 2128 | |||
| 2129 | return true; | ||
| 2114 | } | 2130 | } |
| 2115 | 2131 | ||
| 2116 | /* | 2132 | /* |
| @@ -2120,23 +2136,15 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, | |||
| 2120 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) | 2136 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) |
| 2121 | { | 2137 | { |
| 2122 | LIST_HEAD(invalid_list); | 2138 | LIST_HEAD(invalid_list); |
| 2123 | /* | ||
| 2124 | * If we set the number of mmu pages to be smaller be than the | ||
| 2125 | * number of actived pages , we must to free some mmu pages before we | ||
| 2126 | * change the value | ||
| 2127 | */ | ||
| 2128 | 2139 | ||
| 2129 | spin_lock(&kvm->mmu_lock); | 2140 | spin_lock(&kvm->mmu_lock); |
| 2130 | 2141 | ||
| 2131 | if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) { | 2142 | if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) { |
| 2132 | while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages && | 2143 | /* Need to free some mmu pages to achieve the goal. */ |
| 2133 | !list_empty(&kvm->arch.active_mmu_pages)) { | 2144 | while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) |
| 2134 | struct kvm_mmu_page *page; | 2145 | if (!prepare_zap_oldest_mmu_page(kvm, &invalid_list)) |
| 2146 | break; | ||
| 2135 | 2147 | ||
| 2136 | page = container_of(kvm->arch.active_mmu_pages.prev, | ||
| 2137 | struct kvm_mmu_page, link); | ||
| 2138 | kvm_mmu_prepare_zap_page(kvm, page, &invalid_list); | ||
| 2139 | } | ||
| 2140 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 2148 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
| 2141 | goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages; | 2149 | goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages; |
| 2142 | } | 2150 | } |
| @@ -2794,6 +2802,7 @@ exit: | |||
| 2794 | 2802 | ||
| 2795 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, | 2803 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, |
| 2796 | gva_t gva, pfn_t *pfn, bool write, bool *writable); | 2804 | gva_t gva, pfn_t *pfn, bool write, bool *writable); |
| 2805 | static void make_mmu_pages_available(struct kvm_vcpu *vcpu); | ||
| 2797 | 2806 | ||
| 2798 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, | 2807 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, |
| 2799 | gfn_t gfn, bool prefault) | 2808 | gfn_t gfn, bool prefault) |
| @@ -2835,7 +2844,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, | |||
| 2835 | spin_lock(&vcpu->kvm->mmu_lock); | 2844 | spin_lock(&vcpu->kvm->mmu_lock); |
| 2836 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) | 2845 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) |
| 2837 | goto out_unlock; | 2846 | goto out_unlock; |
| 2838 | kvm_mmu_free_some_pages(vcpu); | 2847 | make_mmu_pages_available(vcpu); |
| 2839 | if (likely(!force_pt_level)) | 2848 | if (likely(!force_pt_level)) |
| 2840 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); | 2849 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); |
| 2841 | r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn, | 2850 | r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn, |
| @@ -2913,7 +2922,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) | |||
| 2913 | 2922 | ||
| 2914 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 2923 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { |
| 2915 | spin_lock(&vcpu->kvm->mmu_lock); | 2924 | spin_lock(&vcpu->kvm->mmu_lock); |
| 2916 | kvm_mmu_free_some_pages(vcpu); | 2925 | make_mmu_pages_available(vcpu); |
| 2917 | sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, | 2926 | sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, |
| 2918 | 1, ACC_ALL, NULL); | 2927 | 1, ACC_ALL, NULL); |
| 2919 | ++sp->root_count; | 2928 | ++sp->root_count; |
| @@ -2925,7 +2934,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) | |||
| 2925 | 2934 | ||
| 2926 | ASSERT(!VALID_PAGE(root)); | 2935 | ASSERT(!VALID_PAGE(root)); |
| 2927 | spin_lock(&vcpu->kvm->mmu_lock); | 2936 | spin_lock(&vcpu->kvm->mmu_lock); |
| 2928 | kvm_mmu_free_some_pages(vcpu); | 2937 | make_mmu_pages_available(vcpu); |
| 2929 | sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), | 2938 | sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), |
| 2930 | i << 30, | 2939 | i << 30, |
| 2931 | PT32_ROOT_LEVEL, 1, ACC_ALL, | 2940 | PT32_ROOT_LEVEL, 1, ACC_ALL, |
| @@ -2964,7 +2973,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | |||
| 2964 | ASSERT(!VALID_PAGE(root)); | 2973 | ASSERT(!VALID_PAGE(root)); |
| 2965 | 2974 | ||
| 2966 | spin_lock(&vcpu->kvm->mmu_lock); | 2975 | spin_lock(&vcpu->kvm->mmu_lock); |
| 2967 | kvm_mmu_free_some_pages(vcpu); | 2976 | make_mmu_pages_available(vcpu); |
| 2968 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, | 2977 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, |
| 2969 | 0, ACC_ALL, NULL); | 2978 | 0, ACC_ALL, NULL); |
| 2970 | root = __pa(sp->spt); | 2979 | root = __pa(sp->spt); |
| @@ -2998,7 +3007,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | |||
| 2998 | return 1; | 3007 | return 1; |
| 2999 | } | 3008 | } |
| 3000 | spin_lock(&vcpu->kvm->mmu_lock); | 3009 | spin_lock(&vcpu->kvm->mmu_lock); |
| 3001 | kvm_mmu_free_some_pages(vcpu); | 3010 | make_mmu_pages_available(vcpu); |
| 3002 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | 3011 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
| 3003 | PT32_ROOT_LEVEL, 0, | 3012 | PT32_ROOT_LEVEL, 0, |
| 3004 | ACC_ALL, NULL); | 3013 | ACC_ALL, NULL); |
| @@ -3304,7 +3313,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
| 3304 | spin_lock(&vcpu->kvm->mmu_lock); | 3313 | spin_lock(&vcpu->kvm->mmu_lock); |
| 3305 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) | 3314 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) |
| 3306 | goto out_unlock; | 3315 | goto out_unlock; |
| 3307 | kvm_mmu_free_some_pages(vcpu); | 3316 | make_mmu_pages_available(vcpu); |
| 3308 | if (likely(!force_pt_level)) | 3317 | if (likely(!force_pt_level)) |
| 3309 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); | 3318 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); |
| 3310 | r = __direct_map(vcpu, gpa, write, map_writable, | 3319 | r = __direct_map(vcpu, gpa, write, map_writable, |
| @@ -4006,17 +4015,17 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | |||
| 4006 | } | 4015 | } |
| 4007 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); | 4016 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); |
| 4008 | 4017 | ||
| 4009 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 4018 | static void make_mmu_pages_available(struct kvm_vcpu *vcpu) |
| 4010 | { | 4019 | { |
| 4011 | LIST_HEAD(invalid_list); | 4020 | LIST_HEAD(invalid_list); |
| 4012 | 4021 | ||
| 4013 | while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES && | 4022 | if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES)) |
| 4014 | !list_empty(&vcpu->kvm->arch.active_mmu_pages)) { | 4023 | return; |
| 4015 | struct kvm_mmu_page *sp; | 4024 | |
| 4025 | while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) { | ||
| 4026 | if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list)) | ||
| 4027 | break; | ||
| 4016 | 4028 | ||
| 4017 | sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev, | ||
| 4018 | struct kvm_mmu_page, link); | ||
| 4019 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); | ||
| 4020 | ++vcpu->kvm->stat.mmu_recycled; | 4029 | ++vcpu->kvm->stat.mmu_recycled; |
| 4021 | } | 4030 | } |
| 4022 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 4031 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
| @@ -4185,17 +4194,22 @@ restart: | |||
| 4185 | spin_unlock(&kvm->mmu_lock); | 4194 | spin_unlock(&kvm->mmu_lock); |
| 4186 | } | 4195 | } |
| 4187 | 4196 | ||
| 4188 | static void kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, | 4197 | void kvm_mmu_zap_mmio_sptes(struct kvm *kvm) |
| 4189 | struct list_head *invalid_list) | ||
| 4190 | { | 4198 | { |
| 4191 | struct kvm_mmu_page *page; | 4199 | struct kvm_mmu_page *sp, *node; |
| 4200 | LIST_HEAD(invalid_list); | ||
| 4192 | 4201 | ||
| 4193 | if (list_empty(&kvm->arch.active_mmu_pages)) | 4202 | spin_lock(&kvm->mmu_lock); |
| 4194 | return; | 4203 | restart: |
| 4204 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { | ||
| 4205 | if (!sp->mmio_cached) | ||
| 4206 | continue; | ||
| 4207 | if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list)) | ||
| 4208 | goto restart; | ||
| 4209 | } | ||
| 4195 | 4210 | ||
| 4196 | page = container_of(kvm->arch.active_mmu_pages.prev, | 4211 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
| 4197 | struct kvm_mmu_page, link); | 4212 | spin_unlock(&kvm->mmu_lock); |
| 4198 | kvm_mmu_prepare_zap_page(kvm, page, invalid_list); | ||
| 4199 | } | 4213 | } |
| 4200 | 4214 | ||
| 4201 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | 4215 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) |
| @@ -4232,7 +4246,7 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
| 4232 | idx = srcu_read_lock(&kvm->srcu); | 4246 | idx = srcu_read_lock(&kvm->srcu); |
| 4233 | spin_lock(&kvm->mmu_lock); | 4247 | spin_lock(&kvm->mmu_lock); |
| 4234 | 4248 | ||
| 4235 | kvm_mmu_remove_some_alloc_mmu_pages(kvm, &invalid_list); | 4249 | prepare_zap_oldest_mmu_page(kvm, &invalid_list); |
| 4236 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 4250 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
| 4237 | 4251 | ||
| 4238 | spin_unlock(&kvm->mmu_lock); | 4252 | spin_unlock(&kvm->mmu_lock); |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 69871080e866..2adcbc2cac6d 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
| @@ -57,14 +57,11 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); | |||
| 57 | 57 | ||
| 58 | static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) | 58 | static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) |
| 59 | { | 59 | { |
| 60 | return kvm->arch.n_max_mmu_pages - | 60 | if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages) |
| 61 | kvm->arch.n_used_mmu_pages; | 61 | return kvm->arch.n_max_mmu_pages - |
| 62 | } | 62 | kvm->arch.n_used_mmu_pages; |
| 63 | 63 | ||
| 64 | static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 64 | return 0; |
| 65 | { | ||
| 66 | if (unlikely(kvm_mmu_available_pages(vcpu->kvm)< KVM_MIN_FREE_MMU_PAGES)) | ||
| 67 | __kvm_mmu_free_some_pages(vcpu); | ||
| 68 | } | 65 | } |
| 69 | 66 | ||
| 70 | static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) | 67 | static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 105dd5bd550e..da20860b457a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
| @@ -627,7 +627,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
| 627 | goto out_unlock; | 627 | goto out_unlock; |
| 628 | 628 | ||
| 629 | kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); | 629 | kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); |
| 630 | kvm_mmu_free_some_pages(vcpu); | 630 | make_mmu_pages_available(vcpu); |
| 631 | if (!force_pt_level) | 631 | if (!force_pt_level) |
| 632 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); | 632 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); |
| 633 | r = FNAME(fetch)(vcpu, addr, &walker, write_fault, | 633 | r = FNAME(fetch)(vcpu, addr, &walker, write_fault, |
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index cfc258a6bf97..c53e797e7369 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c | |||
| @@ -360,10 +360,12 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) | |||
| 360 | return 1; | 360 | return 1; |
| 361 | } | 361 | } |
| 362 | 362 | ||
| 363 | int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) | 363 | int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
| 364 | { | 364 | { |
| 365 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | 365 | struct kvm_pmu *pmu = &vcpu->arch.pmu; |
| 366 | struct kvm_pmc *pmc; | 366 | struct kvm_pmc *pmc; |
| 367 | u32 index = msr_info->index; | ||
| 368 | u64 data = msr_info->data; | ||
| 367 | 369 | ||
| 368 | switch (index) { | 370 | switch (index) { |
| 369 | case MSR_CORE_PERF_FIXED_CTR_CTRL: | 371 | case MSR_CORE_PERF_FIXED_CTR_CTRL: |
| @@ -375,6 +377,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) | |||
| 375 | } | 377 | } |
| 376 | break; | 378 | break; |
| 377 | case MSR_CORE_PERF_GLOBAL_STATUS: | 379 | case MSR_CORE_PERF_GLOBAL_STATUS: |
| 380 | if (msr_info->host_initiated) { | ||
| 381 | pmu->global_status = data; | ||
| 382 | return 0; | ||
| 383 | } | ||
| 378 | break; /* RO MSR */ | 384 | break; /* RO MSR */ |
| 379 | case MSR_CORE_PERF_GLOBAL_CTRL: | 385 | case MSR_CORE_PERF_GLOBAL_CTRL: |
| 380 | if (pmu->global_ctrl == data) | 386 | if (pmu->global_ctrl == data) |
| @@ -386,7 +392,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) | |||
| 386 | break; | 392 | break; |
| 387 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | 393 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: |
| 388 | if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) { | 394 | if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) { |
| 389 | pmu->global_status &= ~data; | 395 | if (!msr_info->host_initiated) |
| 396 | pmu->global_status &= ~data; | ||
| 390 | pmu->global_ovf_ctrl = data; | 397 | pmu->global_ovf_ctrl = data; |
| 391 | return 0; | 398 | return 0; |
| 392 | } | 399 | } |
| @@ -394,7 +401,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) | |||
| 394 | default: | 401 | default: |
| 395 | if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) || | 402 | if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) || |
| 396 | (pmc = get_fixed_pmc(pmu, index))) { | 403 | (pmc = get_fixed_pmc(pmu, index))) { |
| 397 | data = (s64)(s32)data; | 404 | if (!msr_info->host_initiated) |
| 405 | data = (s64)(s32)data; | ||
| 398 | pmc->counter += data - read_pmc(pmc); | 406 | pmc->counter += data - read_pmc(pmc); |
| 399 | return 0; | 407 | return 0; |
| 400 | } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { | 408 | } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7d39d70647e3..a14a6eaf871d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
| @@ -1131,17 +1131,11 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 1131 | init_seg(&save->gs); | 1131 | init_seg(&save->gs); |
| 1132 | 1132 | ||
| 1133 | save->cs.selector = 0xf000; | 1133 | save->cs.selector = 0xf000; |
| 1134 | save->cs.base = 0xffff0000; | ||
| 1134 | /* Executable/Readable Code Segment */ | 1135 | /* Executable/Readable Code Segment */ |
| 1135 | save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK | | 1136 | save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK | |
| 1136 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK; | 1137 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK; |
| 1137 | save->cs.limit = 0xffff; | 1138 | save->cs.limit = 0xffff; |
| 1138 | /* | ||
| 1139 | * cs.base should really be 0xffff0000, but vmx can't handle that, so | ||
| 1140 | * be consistent with it. | ||
| 1141 | * | ||
| 1142 | * Replace when we have real mode working for vmx. | ||
| 1143 | */ | ||
| 1144 | save->cs.base = 0xf0000; | ||
| 1145 | 1139 | ||
| 1146 | save->gdtr.limit = 0xffff; | 1140 | save->gdtr.limit = 0xffff; |
| 1147 | save->idtr.limit = 0xffff; | 1141 | save->idtr.limit = 0xffff; |
| @@ -1191,7 +1185,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 1191 | enable_gif(svm); | 1185 | enable_gif(svm); |
| 1192 | } | 1186 | } |
| 1193 | 1187 | ||
| 1194 | static int svm_vcpu_reset(struct kvm_vcpu *vcpu) | 1188 | static void svm_vcpu_reset(struct kvm_vcpu *vcpu) |
| 1195 | { | 1189 | { |
| 1196 | struct vcpu_svm *svm = to_svm(vcpu); | 1190 | struct vcpu_svm *svm = to_svm(vcpu); |
| 1197 | u32 dummy; | 1191 | u32 dummy; |
| @@ -1199,16 +1193,8 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 1199 | 1193 | ||
| 1200 | init_vmcb(svm); | 1194 | init_vmcb(svm); |
| 1201 | 1195 | ||
| 1202 | if (!kvm_vcpu_is_bsp(vcpu)) { | ||
| 1203 | kvm_rip_write(vcpu, 0); | ||
| 1204 | svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12; | ||
| 1205 | svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8; | ||
| 1206 | } | ||
| 1207 | |||
| 1208 | kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); | 1196 | kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); |
| 1209 | kvm_register_write(vcpu, VCPU_REGS_RDX, eax); | 1197 | kvm_register_write(vcpu, VCPU_REGS_RDX, eax); |
| 1210 | |||
| 1211 | return 0; | ||
| 1212 | } | 1198 | } |
| 1213 | 1199 | ||
| 1214 | static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | 1200 | static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) |
| @@ -3487,7 +3473,7 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
| 3487 | exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && | 3473 | exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && |
| 3488 | exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH && | 3474 | exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH && |
| 3489 | exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI) | 3475 | exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI) |
| 3490 | printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " | 3476 | printk(KERN_ERR "%s: unexpected exit_int_info 0x%x " |
| 3491 | "exit_code 0x%x\n", | 3477 | "exit_code 0x%x\n", |
| 3492 | __func__, svm->vmcb->control.exit_int_info, | 3478 | __func__, svm->vmcb->control.exit_int_info, |
| 3493 | exit_code); | 3479 | exit_code); |
| @@ -3591,6 +3577,11 @@ static void svm_hwapic_isr_update(struct kvm *kvm, int isr) | |||
| 3591 | return; | 3577 | return; |
| 3592 | } | 3578 | } |
| 3593 | 3579 | ||
| 3580 | static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) | ||
| 3581 | { | ||
| 3582 | return; | ||
| 3583 | } | ||
| 3584 | |||
| 3594 | static int svm_nmi_allowed(struct kvm_vcpu *vcpu) | 3585 | static int svm_nmi_allowed(struct kvm_vcpu *vcpu) |
| 3595 | { | 3586 | { |
| 3596 | struct vcpu_svm *svm = to_svm(vcpu); | 3587 | struct vcpu_svm *svm = to_svm(vcpu); |
| @@ -3641,7 +3632,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) | |||
| 3641 | return ret; | 3632 | return ret; |
| 3642 | } | 3633 | } |
| 3643 | 3634 | ||
| 3644 | static void enable_irq_window(struct kvm_vcpu *vcpu) | 3635 | static int enable_irq_window(struct kvm_vcpu *vcpu) |
| 3645 | { | 3636 | { |
| 3646 | struct vcpu_svm *svm = to_svm(vcpu); | 3637 | struct vcpu_svm *svm = to_svm(vcpu); |
| 3647 | 3638 | ||
| @@ -3655,15 +3646,16 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) | |||
| 3655 | svm_set_vintr(svm); | 3646 | svm_set_vintr(svm); |
| 3656 | svm_inject_irq(svm, 0x0); | 3647 | svm_inject_irq(svm, 0x0); |
| 3657 | } | 3648 | } |
| 3649 | return 0; | ||
| 3658 | } | 3650 | } |
| 3659 | 3651 | ||
| 3660 | static void enable_nmi_window(struct kvm_vcpu *vcpu) | 3652 | static int enable_nmi_window(struct kvm_vcpu *vcpu) |
| 3661 | { | 3653 | { |
| 3662 | struct vcpu_svm *svm = to_svm(vcpu); | 3654 | struct vcpu_svm *svm = to_svm(vcpu); |
| 3663 | 3655 | ||
| 3664 | if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK)) | 3656 | if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK)) |
| 3665 | == HF_NMI_MASK) | 3657 | == HF_NMI_MASK) |
| 3666 | return; /* IRET will cause a vm exit */ | 3658 | return 0; /* IRET will cause a vm exit */ |
| 3667 | 3659 | ||
| 3668 | /* | 3660 | /* |
| 3669 | * Something prevents NMI from been injected. Single step over possible | 3661 | * Something prevents NMI from been injected. Single step over possible |
| @@ -3672,6 +3664,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
| 3672 | svm->nmi_singlestep = true; | 3664 | svm->nmi_singlestep = true; |
| 3673 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); | 3665 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); |
| 3674 | update_db_bp_intercept(vcpu); | 3666 | update_db_bp_intercept(vcpu); |
| 3667 | return 0; | ||
| 3675 | } | 3668 | } |
| 3676 | 3669 | ||
| 3677 | static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) | 3670 | static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) |
| @@ -4247,6 +4240,11 @@ out: | |||
| 4247 | return ret; | 4240 | return ret; |
| 4248 | } | 4241 | } |
| 4249 | 4242 | ||
| 4243 | static void svm_handle_external_intr(struct kvm_vcpu *vcpu) | ||
| 4244 | { | ||
| 4245 | local_irq_enable(); | ||
| 4246 | } | ||
| 4247 | |||
| 4250 | static struct kvm_x86_ops svm_x86_ops = { | 4248 | static struct kvm_x86_ops svm_x86_ops = { |
| 4251 | .cpu_has_kvm_support = has_svm, | 4249 | .cpu_has_kvm_support = has_svm, |
| 4252 | .disabled_by_bios = is_disabled, | 4250 | .disabled_by_bios = is_disabled, |
| @@ -4314,6 +4312,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
| 4314 | .vm_has_apicv = svm_vm_has_apicv, | 4312 | .vm_has_apicv = svm_vm_has_apicv, |
| 4315 | .load_eoi_exitmap = svm_load_eoi_exitmap, | 4313 | .load_eoi_exitmap = svm_load_eoi_exitmap, |
| 4316 | .hwapic_isr_update = svm_hwapic_isr_update, | 4314 | .hwapic_isr_update = svm_hwapic_isr_update, |
| 4315 | .sync_pir_to_irr = svm_sync_pir_to_irr, | ||
| 4317 | 4316 | ||
| 4318 | .set_tss_addr = svm_set_tss_addr, | 4317 | .set_tss_addr = svm_set_tss_addr, |
| 4319 | .get_tdp_level = get_npt_level, | 4318 | .get_tdp_level = get_npt_level, |
| @@ -4342,6 +4341,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
| 4342 | .set_tdp_cr3 = set_tdp_cr3, | 4341 | .set_tdp_cr3 = set_tdp_cr3, |
| 4343 | 4342 | ||
| 4344 | .check_intercept = svm_check_intercept, | 4343 | .check_intercept = svm_check_intercept, |
| 4344 | .handle_external_intr = svm_handle_external_intr, | ||
| 4345 | }; | 4345 | }; |
| 4346 | 4346 | ||
| 4347 | static int __init svm_init(void) | 4347 | static int __init svm_init(void) |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 867b81037f96..25a791ed21c8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
| @@ -84,8 +84,11 @@ module_param(vmm_exclusive, bool, S_IRUGO); | |||
| 84 | static bool __read_mostly fasteoi = 1; | 84 | static bool __read_mostly fasteoi = 1; |
| 85 | module_param(fasteoi, bool, S_IRUGO); | 85 | module_param(fasteoi, bool, S_IRUGO); |
| 86 | 86 | ||
| 87 | static bool __read_mostly enable_apicv_reg_vid; | 87 | static bool __read_mostly enable_apicv = 1; |
| 88 | module_param(enable_apicv, bool, S_IRUGO); | ||
| 88 | 89 | ||
| 90 | static bool __read_mostly enable_shadow_vmcs = 1; | ||
| 91 | module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO); | ||
| 89 | /* | 92 | /* |
| 90 | * If nested=1, nested virtualization is supported, i.e., guests may use | 93 | * If nested=1, nested virtualization is supported, i.e., guests may use |
| 91 | * VMX and be a hypervisor for its own guests. If nested=0, guests may not | 94 | * VMX and be a hypervisor for its own guests. If nested=0, guests may not |
| @@ -298,7 +301,8 @@ struct __packed vmcs12 { | |||
| 298 | u32 guest_activity_state; | 301 | u32 guest_activity_state; |
| 299 | u32 guest_sysenter_cs; | 302 | u32 guest_sysenter_cs; |
| 300 | u32 host_ia32_sysenter_cs; | 303 | u32 host_ia32_sysenter_cs; |
| 301 | u32 padding32[8]; /* room for future expansion */ | 304 | u32 vmx_preemption_timer_value; |
| 305 | u32 padding32[7]; /* room for future expansion */ | ||
| 302 | u16 virtual_processor_id; | 306 | u16 virtual_processor_id; |
| 303 | u16 guest_es_selector; | 307 | u16 guest_es_selector; |
| 304 | u16 guest_cs_selector; | 308 | u16 guest_cs_selector; |
| @@ -351,6 +355,12 @@ struct nested_vmx { | |||
| 351 | /* The host-usable pointer to the above */ | 355 | /* The host-usable pointer to the above */ |
| 352 | struct page *current_vmcs12_page; | 356 | struct page *current_vmcs12_page; |
| 353 | struct vmcs12 *current_vmcs12; | 357 | struct vmcs12 *current_vmcs12; |
| 358 | struct vmcs *current_shadow_vmcs; | ||
| 359 | /* | ||
| 360 | * Indicates if the shadow vmcs must be updated with the | ||
| 361 | * data hold by vmcs12 | ||
| 362 | */ | ||
| 363 | bool sync_shadow_vmcs; | ||
| 354 | 364 | ||
| 355 | /* vmcs02_list cache of VMCSs recently used to run L2 guests */ | 365 | /* vmcs02_list cache of VMCSs recently used to run L2 guests */ |
| 356 | struct list_head vmcs02_pool; | 366 | struct list_head vmcs02_pool; |
| @@ -365,6 +375,31 @@ struct nested_vmx { | |||
| 365 | struct page *apic_access_page; | 375 | struct page *apic_access_page; |
| 366 | }; | 376 | }; |
| 367 | 377 | ||
| 378 | #define POSTED_INTR_ON 0 | ||
| 379 | /* Posted-Interrupt Descriptor */ | ||
| 380 | struct pi_desc { | ||
| 381 | u32 pir[8]; /* Posted interrupt requested */ | ||
| 382 | u32 control; /* bit 0 of control is outstanding notification bit */ | ||
| 383 | u32 rsvd[7]; | ||
| 384 | } __aligned(64); | ||
| 385 | |||
| 386 | static bool pi_test_and_set_on(struct pi_desc *pi_desc) | ||
| 387 | { | ||
| 388 | return test_and_set_bit(POSTED_INTR_ON, | ||
| 389 | (unsigned long *)&pi_desc->control); | ||
| 390 | } | ||
| 391 | |||
| 392 | static bool pi_test_and_clear_on(struct pi_desc *pi_desc) | ||
| 393 | { | ||
| 394 | return test_and_clear_bit(POSTED_INTR_ON, | ||
| 395 | (unsigned long *)&pi_desc->control); | ||
| 396 | } | ||
| 397 | |||
| 398 | static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) | ||
| 399 | { | ||
| 400 | return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); | ||
| 401 | } | ||
| 402 | |||
| 368 | struct vcpu_vmx { | 403 | struct vcpu_vmx { |
| 369 | struct kvm_vcpu vcpu; | 404 | struct kvm_vcpu vcpu; |
| 370 | unsigned long host_rsp; | 405 | unsigned long host_rsp; |
| @@ -377,6 +412,7 @@ struct vcpu_vmx { | |||
| 377 | struct shared_msr_entry *guest_msrs; | 412 | struct shared_msr_entry *guest_msrs; |
| 378 | int nmsrs; | 413 | int nmsrs; |
| 379 | int save_nmsrs; | 414 | int save_nmsrs; |
| 415 | unsigned long host_idt_base; | ||
| 380 | #ifdef CONFIG_X86_64 | 416 | #ifdef CONFIG_X86_64 |
| 381 | u64 msr_host_kernel_gs_base; | 417 | u64 msr_host_kernel_gs_base; |
| 382 | u64 msr_guest_kernel_gs_base; | 418 | u64 msr_guest_kernel_gs_base; |
| @@ -428,6 +464,9 @@ struct vcpu_vmx { | |||
| 428 | 464 | ||
| 429 | bool rdtscp_enabled; | 465 | bool rdtscp_enabled; |
| 430 | 466 | ||
| 467 | /* Posted interrupt descriptor */ | ||
| 468 | struct pi_desc pi_desc; | ||
| 469 | |||
| 431 | /* Support for a guest hypervisor (nested VMX) */ | 470 | /* Support for a guest hypervisor (nested VMX) */ |
| 432 | struct nested_vmx nested; | 471 | struct nested_vmx nested; |
| 433 | }; | 472 | }; |
| @@ -451,6 +490,64 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | |||
| 451 | #define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \ | 490 | #define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \ |
| 452 | [number##_HIGH] = VMCS12_OFFSET(name)+4 | 491 | [number##_HIGH] = VMCS12_OFFSET(name)+4 |
| 453 | 492 | ||
| 493 | |||
| 494 | static const unsigned long shadow_read_only_fields[] = { | ||
| 495 | /* | ||
| 496 | * We do NOT shadow fields that are modified when L0 | ||
| 497 | * traps and emulates any vmx instruction (e.g. VMPTRLD, | ||
| 498 | * VMXON...) executed by L1. | ||
| 499 | * For example, VM_INSTRUCTION_ERROR is read | ||
| 500 | * by L1 if a vmx instruction fails (part of the error path). | ||
| 501 | * Note the code assumes this logic. If for some reason | ||
| 502 | * we start shadowing these fields then we need to | ||
| 503 | * force a shadow sync when L0 emulates vmx instructions | ||
| 504 | * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified | ||
| 505 | * by nested_vmx_failValid) | ||
| 506 | */ | ||
| 507 | VM_EXIT_REASON, | ||
| 508 | VM_EXIT_INTR_INFO, | ||
| 509 | VM_EXIT_INSTRUCTION_LEN, | ||
| 510 | IDT_VECTORING_INFO_FIELD, | ||
| 511 | IDT_VECTORING_ERROR_CODE, | ||
| 512 | VM_EXIT_INTR_ERROR_CODE, | ||
| 513 | EXIT_QUALIFICATION, | ||
| 514 | GUEST_LINEAR_ADDRESS, | ||
| 515 | GUEST_PHYSICAL_ADDRESS | ||
| 516 | }; | ||
| 517 | static const int max_shadow_read_only_fields = | ||
| 518 | ARRAY_SIZE(shadow_read_only_fields); | ||
| 519 | |||
| 520 | static const unsigned long shadow_read_write_fields[] = { | ||
| 521 | GUEST_RIP, | ||
| 522 | GUEST_RSP, | ||
| 523 | GUEST_CR0, | ||
| 524 | GUEST_CR3, | ||
| 525 | GUEST_CR4, | ||
| 526 | GUEST_INTERRUPTIBILITY_INFO, | ||
| 527 | GUEST_RFLAGS, | ||
| 528 | GUEST_CS_SELECTOR, | ||
| 529 | GUEST_CS_AR_BYTES, | ||
| 530 | GUEST_CS_LIMIT, | ||
| 531 | GUEST_CS_BASE, | ||
| 532 | GUEST_ES_BASE, | ||
| 533 | CR0_GUEST_HOST_MASK, | ||
| 534 | CR0_READ_SHADOW, | ||
| 535 | CR4_READ_SHADOW, | ||
| 536 | TSC_OFFSET, | ||
| 537 | EXCEPTION_BITMAP, | ||
| 538 | CPU_BASED_VM_EXEC_CONTROL, | ||
| 539 | VM_ENTRY_EXCEPTION_ERROR_CODE, | ||
| 540 | VM_ENTRY_INTR_INFO_FIELD, | ||
| 541 | VM_ENTRY_INSTRUCTION_LEN, | ||
| 542 | VM_ENTRY_EXCEPTION_ERROR_CODE, | ||
| 543 | HOST_FS_BASE, | ||
| 544 | HOST_GS_BASE, | ||
| 545 | HOST_FS_SELECTOR, | ||
| 546 | HOST_GS_SELECTOR | ||
| 547 | }; | ||
| 548 | static const int max_shadow_read_write_fields = | ||
| 549 | ARRAY_SIZE(shadow_read_write_fields); | ||
| 550 | |||
| 454 | static const unsigned short vmcs_field_to_offset_table[] = { | 551 | static const unsigned short vmcs_field_to_offset_table[] = { |
| 455 | FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id), | 552 | FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id), |
| 456 | FIELD(GUEST_ES_SELECTOR, guest_es_selector), | 553 | FIELD(GUEST_ES_SELECTOR, guest_es_selector), |
| @@ -537,6 +634,7 @@ static const unsigned short vmcs_field_to_offset_table[] = { | |||
| 537 | FIELD(GUEST_ACTIVITY_STATE, guest_activity_state), | 634 | FIELD(GUEST_ACTIVITY_STATE, guest_activity_state), |
| 538 | FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs), | 635 | FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs), |
| 539 | FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs), | 636 | FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs), |
| 637 | FIELD(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value), | ||
| 540 | FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask), | 638 | FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask), |
| 541 | FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask), | 639 | FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask), |
| 542 | FIELD(CR0_READ_SHADOW, cr0_read_shadow), | 640 | FIELD(CR0_READ_SHADOW, cr0_read_shadow), |
| @@ -624,6 +722,9 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
| 624 | struct kvm_segment *var, int seg); | 722 | struct kvm_segment *var, int seg); |
| 625 | static bool guest_state_valid(struct kvm_vcpu *vcpu); | 723 | static bool guest_state_valid(struct kvm_vcpu *vcpu); |
| 626 | static u32 vmx_segment_access_rights(struct kvm_segment *var); | 724 | static u32 vmx_segment_access_rights(struct kvm_segment *var); |
| 725 | static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); | ||
| 726 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); | ||
| 727 | static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); | ||
| 627 | 728 | ||
| 628 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 729 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
| 629 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 730 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
| @@ -640,6 +741,8 @@ static unsigned long *vmx_msr_bitmap_legacy; | |||
| 640 | static unsigned long *vmx_msr_bitmap_longmode; | 741 | static unsigned long *vmx_msr_bitmap_longmode; |
| 641 | static unsigned long *vmx_msr_bitmap_legacy_x2apic; | 742 | static unsigned long *vmx_msr_bitmap_legacy_x2apic; |
| 642 | static unsigned long *vmx_msr_bitmap_longmode_x2apic; | 743 | static unsigned long *vmx_msr_bitmap_longmode_x2apic; |
| 744 | static unsigned long *vmx_vmread_bitmap; | ||
| 745 | static unsigned long *vmx_vmwrite_bitmap; | ||
| 643 | 746 | ||
| 644 | static bool cpu_has_load_ia32_efer; | 747 | static bool cpu_has_load_ia32_efer; |
| 645 | static bool cpu_has_load_perf_global_ctrl; | 748 | static bool cpu_has_load_perf_global_ctrl; |
| @@ -782,6 +885,18 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void) | |||
| 782 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; | 885 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; |
| 783 | } | 886 | } |
| 784 | 887 | ||
| 888 | static inline bool cpu_has_vmx_posted_intr(void) | ||
| 889 | { | ||
| 890 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; | ||
| 891 | } | ||
| 892 | |||
| 893 | static inline bool cpu_has_vmx_apicv(void) | ||
| 894 | { | ||
| 895 | return cpu_has_vmx_apic_register_virt() && | ||
| 896 | cpu_has_vmx_virtual_intr_delivery() && | ||
| 897 | cpu_has_vmx_posted_intr(); | ||
| 898 | } | ||
| 899 | |||
| 785 | static inline bool cpu_has_vmx_flexpriority(void) | 900 | static inline bool cpu_has_vmx_flexpriority(void) |
| 786 | { | 901 | { |
| 787 | return cpu_has_vmx_tpr_shadow() && | 902 | return cpu_has_vmx_tpr_shadow() && |
| @@ -895,6 +1010,18 @@ static inline bool cpu_has_vmx_wbinvd_exit(void) | |||
| 895 | SECONDARY_EXEC_WBINVD_EXITING; | 1010 | SECONDARY_EXEC_WBINVD_EXITING; |
| 896 | } | 1011 | } |
| 897 | 1012 | ||
| 1013 | static inline bool cpu_has_vmx_shadow_vmcs(void) | ||
| 1014 | { | ||
| 1015 | u64 vmx_msr; | ||
| 1016 | rdmsrl(MSR_IA32_VMX_MISC, vmx_msr); | ||
| 1017 | /* check if the cpu supports writing r/o exit information fields */ | ||
| 1018 | if (!(vmx_msr & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS)) | ||
| 1019 | return false; | ||
| 1020 | |||
| 1021 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
| 1022 | SECONDARY_EXEC_SHADOW_VMCS; | ||
| 1023 | } | ||
| 1024 | |||
| 898 | static inline bool report_flexpriority(void) | 1025 | static inline bool report_flexpriority(void) |
| 899 | { | 1026 | { |
| 900 | return flexpriority_enabled; | 1027 | return flexpriority_enabled; |
| @@ -1790,7 +1917,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
| 1790 | u32 intr_info = nr | INTR_INFO_VALID_MASK; | 1917 | u32 intr_info = nr | INTR_INFO_VALID_MASK; |
| 1791 | 1918 | ||
| 1792 | if (nr == PF_VECTOR && is_guest_mode(vcpu) && | 1919 | if (nr == PF_VECTOR && is_guest_mode(vcpu) && |
| 1793 | nested_pf_handled(vcpu)) | 1920 | !vmx->nested.nested_run_pending && nested_pf_handled(vcpu)) |
| 1794 | return; | 1921 | return; |
| 1795 | 1922 | ||
| 1796 | if (has_error_code) { | 1923 | if (has_error_code) { |
| @@ -2022,6 +2149,7 @@ static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high; | |||
| 2022 | static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; | 2149 | static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; |
| 2023 | static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; | 2150 | static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; |
| 2024 | static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; | 2151 | static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; |
| 2152 | static u32 nested_vmx_misc_low, nested_vmx_misc_high; | ||
| 2025 | static __init void nested_vmx_setup_ctls_msrs(void) | 2153 | static __init void nested_vmx_setup_ctls_msrs(void) |
| 2026 | { | 2154 | { |
| 2027 | /* | 2155 | /* |
| @@ -2040,30 +2168,40 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
| 2040 | */ | 2168 | */ |
| 2041 | 2169 | ||
| 2042 | /* pin-based controls */ | 2170 | /* pin-based controls */ |
| 2171 | rdmsr(MSR_IA32_VMX_PINBASED_CTLS, | ||
| 2172 | nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high); | ||
| 2043 | /* | 2173 | /* |
| 2044 | * According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is | 2174 | * According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is |
| 2045 | * in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR. | 2175 | * in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR. |
| 2046 | */ | 2176 | */ |
| 2047 | nested_vmx_pinbased_ctls_low = 0x16 ; | 2177 | nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; |
| 2048 | nested_vmx_pinbased_ctls_high = 0x16 | | 2178 | nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | |
| 2049 | PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | | 2179 | PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS | |
| 2050 | PIN_BASED_VIRTUAL_NMIS; | 2180 | PIN_BASED_VMX_PREEMPTION_TIMER; |
| 2181 | nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; | ||
| 2051 | 2182 | ||
| 2052 | /* exit controls */ | 2183 | /* |
| 2053 | nested_vmx_exit_ctls_low = 0; | 2184 | * Exit controls |
| 2185 | * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and | ||
| 2186 | * 17 must be 1. | ||
| 2187 | */ | ||
| 2188 | nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; | ||
| 2054 | /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */ | 2189 | /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */ |
| 2055 | #ifdef CONFIG_X86_64 | 2190 | #ifdef CONFIG_X86_64 |
| 2056 | nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE; | 2191 | nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE; |
| 2057 | #else | 2192 | #else |
| 2058 | nested_vmx_exit_ctls_high = 0; | 2193 | nested_vmx_exit_ctls_high = 0; |
| 2059 | #endif | 2194 | #endif |
| 2195 | nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; | ||
| 2060 | 2196 | ||
| 2061 | /* entry controls */ | 2197 | /* entry controls */ |
| 2062 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, | 2198 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, |
| 2063 | nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); | 2199 | nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); |
| 2064 | nested_vmx_entry_ctls_low = 0; | 2200 | /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */ |
| 2201 | nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; | ||
| 2065 | nested_vmx_entry_ctls_high &= | 2202 | nested_vmx_entry_ctls_high &= |
| 2066 | VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE; | 2203 | VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE; |
| 2204 | nested_vmx_entry_ctls_high |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; | ||
| 2067 | 2205 | ||
| 2068 | /* cpu-based controls */ | 2206 | /* cpu-based controls */ |
| 2069 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, | 2207 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, |
| @@ -2080,6 +2218,7 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
| 2080 | CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | | 2218 | CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | |
| 2081 | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | | 2219 | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | |
| 2082 | CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING | | 2220 | CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING | |
| 2221 | CPU_BASED_PAUSE_EXITING | | ||
| 2083 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 2222 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
| 2084 | /* | 2223 | /* |
| 2085 | * We can allow some features even when not supported by the | 2224 | * We can allow some features even when not supported by the |
| @@ -2094,7 +2233,14 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
| 2094 | nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high); | 2233 | nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high); |
| 2095 | nested_vmx_secondary_ctls_low = 0; | 2234 | nested_vmx_secondary_ctls_low = 0; |
| 2096 | nested_vmx_secondary_ctls_high &= | 2235 | nested_vmx_secondary_ctls_high &= |
| 2097 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 2236 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
| 2237 | SECONDARY_EXEC_WBINVD_EXITING; | ||
| 2238 | |||
| 2239 | /* miscellaneous data */ | ||
| 2240 | rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); | ||
| 2241 | nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | | ||
| 2242 | VMX_MISC_SAVE_EFER_LMA; | ||
| 2243 | nested_vmx_misc_high = 0; | ||
| 2098 | } | 2244 | } |
| 2099 | 2245 | ||
| 2100 | static inline bool vmx_control_verify(u32 control, u32 low, u32 high) | 2246 | static inline bool vmx_control_verify(u32 control, u32 low, u32 high) |
| @@ -2165,7 +2311,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
| 2165 | nested_vmx_entry_ctls_high); | 2311 | nested_vmx_entry_ctls_high); |
| 2166 | break; | 2312 | break; |
| 2167 | case MSR_IA32_VMX_MISC: | 2313 | case MSR_IA32_VMX_MISC: |
| 2168 | *pdata = 0; | 2314 | *pdata = vmx_control_msr(nested_vmx_misc_low, |
| 2315 | nested_vmx_misc_high); | ||
| 2169 | break; | 2316 | break; |
| 2170 | /* | 2317 | /* |
| 2171 | * These MSRs specify bits which the guest must keep fixed (on or off) | 2318 | * These MSRs specify bits which the guest must keep fixed (on or off) |
| @@ -2529,12 +2676,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
| 2529 | u32 _vmexit_control = 0; | 2676 | u32 _vmexit_control = 0; |
| 2530 | u32 _vmentry_control = 0; | 2677 | u32 _vmentry_control = 0; |
| 2531 | 2678 | ||
| 2532 | min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; | ||
| 2533 | opt = PIN_BASED_VIRTUAL_NMIS; | ||
| 2534 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, | ||
| 2535 | &_pin_based_exec_control) < 0) | ||
| 2536 | return -EIO; | ||
| 2537 | |||
| 2538 | min = CPU_BASED_HLT_EXITING | | 2679 | min = CPU_BASED_HLT_EXITING | |
| 2539 | #ifdef CONFIG_X86_64 | 2680 | #ifdef CONFIG_X86_64 |
| 2540 | CPU_BASED_CR8_LOAD_EXITING | | 2681 | CPU_BASED_CR8_LOAD_EXITING | |
| @@ -2573,7 +2714,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
| 2573 | SECONDARY_EXEC_RDTSCP | | 2714 | SECONDARY_EXEC_RDTSCP | |
| 2574 | SECONDARY_EXEC_ENABLE_INVPCID | | 2715 | SECONDARY_EXEC_ENABLE_INVPCID | |
| 2575 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | 2716 | SECONDARY_EXEC_APIC_REGISTER_VIRT | |
| 2576 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; | 2717 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | |
| 2718 | SECONDARY_EXEC_SHADOW_VMCS; | ||
| 2577 | if (adjust_vmx_controls(min2, opt2, | 2719 | if (adjust_vmx_controls(min2, opt2, |
| 2578 | MSR_IA32_VMX_PROCBASED_CTLS2, | 2720 | MSR_IA32_VMX_PROCBASED_CTLS2, |
| 2579 | &_cpu_based_2nd_exec_control) < 0) | 2721 | &_cpu_based_2nd_exec_control) < 0) |
| @@ -2605,11 +2747,23 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
| 2605 | #ifdef CONFIG_X86_64 | 2747 | #ifdef CONFIG_X86_64 |
| 2606 | min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; | 2748 | min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; |
| 2607 | #endif | 2749 | #endif |
| 2608 | opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT; | 2750 | opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT | |
| 2751 | VM_EXIT_ACK_INTR_ON_EXIT; | ||
| 2609 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, | 2752 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, |
| 2610 | &_vmexit_control) < 0) | 2753 | &_vmexit_control) < 0) |
| 2611 | return -EIO; | 2754 | return -EIO; |
| 2612 | 2755 | ||
| 2756 | min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; | ||
| 2757 | opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR; | ||
| 2758 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, | ||
| 2759 | &_pin_based_exec_control) < 0) | ||
| 2760 | return -EIO; | ||
| 2761 | |||
| 2762 | if (!(_cpu_based_2nd_exec_control & | ||
| 2763 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) || | ||
| 2764 | !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT)) | ||
| 2765 | _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; | ||
| 2766 | |||
| 2613 | min = 0; | 2767 | min = 0; |
| 2614 | opt = VM_ENTRY_LOAD_IA32_PAT; | 2768 | opt = VM_ENTRY_LOAD_IA32_PAT; |
| 2615 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, | 2769 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, |
| @@ -2762,6 +2916,8 @@ static __init int hardware_setup(void) | |||
| 2762 | 2916 | ||
| 2763 | if (!cpu_has_vmx_vpid()) | 2917 | if (!cpu_has_vmx_vpid()) |
| 2764 | enable_vpid = 0; | 2918 | enable_vpid = 0; |
| 2919 | if (!cpu_has_vmx_shadow_vmcs()) | ||
| 2920 | enable_shadow_vmcs = 0; | ||
| 2765 | 2921 | ||
| 2766 | if (!cpu_has_vmx_ept() || | 2922 | if (!cpu_has_vmx_ept() || |
| 2767 | !cpu_has_vmx_ept_4levels()) { | 2923 | !cpu_has_vmx_ept_4levels()) { |
| @@ -2788,14 +2944,16 @@ static __init int hardware_setup(void) | |||
| 2788 | if (!cpu_has_vmx_ple()) | 2944 | if (!cpu_has_vmx_ple()) |
| 2789 | ple_gap = 0; | 2945 | ple_gap = 0; |
| 2790 | 2946 | ||
| 2791 | if (!cpu_has_vmx_apic_register_virt() || | 2947 | if (!cpu_has_vmx_apicv()) |
| 2792 | !cpu_has_vmx_virtual_intr_delivery()) | 2948 | enable_apicv = 0; |
| 2793 | enable_apicv_reg_vid = 0; | ||
| 2794 | 2949 | ||
| 2795 | if (enable_apicv_reg_vid) | 2950 | if (enable_apicv) |
| 2796 | kvm_x86_ops->update_cr8_intercept = NULL; | 2951 | kvm_x86_ops->update_cr8_intercept = NULL; |
| 2797 | else | 2952 | else { |
| 2798 | kvm_x86_ops->hwapic_irr_update = NULL; | 2953 | kvm_x86_ops->hwapic_irr_update = NULL; |
| 2954 | kvm_x86_ops->deliver_posted_interrupt = NULL; | ||
| 2955 | kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy; | ||
| 2956 | } | ||
| 2799 | 2957 | ||
| 2800 | if (nested) | 2958 | if (nested) |
| 2801 | nested_vmx_setup_ctls_msrs(); | 2959 | nested_vmx_setup_ctls_msrs(); |
| @@ -2876,22 +3034,6 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
| 2876 | vmx->cpl = 0; | 3034 | vmx->cpl = 0; |
| 2877 | } | 3035 | } |
| 2878 | 3036 | ||
| 2879 | static gva_t rmode_tss_base(struct kvm *kvm) | ||
| 2880 | { | ||
| 2881 | if (!kvm->arch.tss_addr) { | ||
| 2882 | struct kvm_memslots *slots; | ||
| 2883 | struct kvm_memory_slot *slot; | ||
| 2884 | gfn_t base_gfn; | ||
| 2885 | |||
| 2886 | slots = kvm_memslots(kvm); | ||
| 2887 | slot = id_to_memslot(slots, 0); | ||
| 2888 | base_gfn = slot->base_gfn + slot->npages - 3; | ||
| 2889 | |||
| 2890 | return base_gfn << PAGE_SHIFT; | ||
| 2891 | } | ||
| 2892 | return kvm->arch.tss_addr; | ||
| 2893 | } | ||
| 2894 | |||
| 2895 | static void fix_rmode_seg(int seg, struct kvm_segment *save) | 3037 | static void fix_rmode_seg(int seg, struct kvm_segment *save) |
| 2896 | { | 3038 | { |
| 2897 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 3039 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
| @@ -2942,19 +3084,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
| 2942 | 3084 | ||
| 2943 | /* | 3085 | /* |
| 2944 | * Very old userspace does not call KVM_SET_TSS_ADDR before entering | 3086 | * Very old userspace does not call KVM_SET_TSS_ADDR before entering |
| 2945 | * vcpu. Call it here with phys address pointing 16M below 4G. | 3087 | * vcpu. Warn the user that an update is overdue. |
| 2946 | */ | 3088 | */ |
| 2947 | if (!vcpu->kvm->arch.tss_addr) { | 3089 | if (!vcpu->kvm->arch.tss_addr) |
| 2948 | printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " | 3090 | printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " |
| 2949 | "called before entering vcpu\n"); | 3091 | "called before entering vcpu\n"); |
| 2950 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | ||
| 2951 | vmx_set_tss_addr(vcpu->kvm, 0xfeffd000); | ||
| 2952 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
| 2953 | } | ||
| 2954 | 3092 | ||
| 2955 | vmx_segment_cache_clear(vmx); | 3093 | vmx_segment_cache_clear(vmx); |
| 2956 | 3094 | ||
| 2957 | vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); | 3095 | vmcs_writel(GUEST_TR_BASE, vcpu->kvm->arch.tss_addr); |
| 2958 | vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); | 3096 | vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); |
| 2959 | vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); | 3097 | vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); |
| 2960 | 3098 | ||
| @@ -3214,7 +3352,9 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
| 3214 | */ | 3352 | */ |
| 3215 | if (!nested_vmx_allowed(vcpu)) | 3353 | if (!nested_vmx_allowed(vcpu)) |
| 3216 | return 1; | 3354 | return 1; |
| 3217 | } else if (to_vmx(vcpu)->nested.vmxon) | 3355 | } |
| 3356 | if (to_vmx(vcpu)->nested.vmxon && | ||
| 3357 | ((cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) | ||
| 3218 | return 1; | 3358 | return 1; |
| 3219 | 3359 | ||
| 3220 | vcpu->arch.cr4 = cr4; | 3360 | vcpu->arch.cr4 = cr4; |
| @@ -3550,7 +3690,7 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu) | |||
| 3550 | return true; | 3690 | return true; |
| 3551 | 3691 | ||
| 3552 | /* real mode guest state checks */ | 3692 | /* real mode guest state checks */ |
| 3553 | if (!is_protmode(vcpu)) { | 3693 | if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) { |
| 3554 | if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) | 3694 | if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) |
| 3555 | return false; | 3695 | return false; |
| 3556 | if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) | 3696 | if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) |
| @@ -3599,7 +3739,7 @@ static int init_rmode_tss(struct kvm *kvm) | |||
| 3599 | int r, idx, ret = 0; | 3739 | int r, idx, ret = 0; |
| 3600 | 3740 | ||
| 3601 | idx = srcu_read_lock(&kvm->srcu); | 3741 | idx = srcu_read_lock(&kvm->srcu); |
| 3602 | fn = rmode_tss_base(kvm) >> PAGE_SHIFT; | 3742 | fn = kvm->arch.tss_addr >> PAGE_SHIFT; |
| 3603 | r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); | 3743 | r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); |
| 3604 | if (r < 0) | 3744 | if (r < 0) |
| 3605 | goto out; | 3745 | goto out; |
| @@ -3692,7 +3832,7 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
| 3692 | kvm_userspace_mem.flags = 0; | 3832 | kvm_userspace_mem.flags = 0; |
| 3693 | kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; | 3833 | kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; |
| 3694 | kvm_userspace_mem.memory_size = PAGE_SIZE; | 3834 | kvm_userspace_mem.memory_size = PAGE_SIZE; |
| 3695 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); | 3835 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); |
| 3696 | if (r) | 3836 | if (r) |
| 3697 | goto out; | 3837 | goto out; |
| 3698 | 3838 | ||
| @@ -3722,7 +3862,7 @@ static int alloc_identity_pagetable(struct kvm *kvm) | |||
| 3722 | kvm_userspace_mem.guest_phys_addr = | 3862 | kvm_userspace_mem.guest_phys_addr = |
| 3723 | kvm->arch.ept_identity_map_addr; | 3863 | kvm->arch.ept_identity_map_addr; |
| 3724 | kvm_userspace_mem.memory_size = PAGE_SIZE; | 3864 | kvm_userspace_mem.memory_size = PAGE_SIZE; |
| 3725 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); | 3865 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); |
| 3726 | if (r) | 3866 | if (r) |
| 3727 | goto out; | 3867 | goto out; |
| 3728 | 3868 | ||
| @@ -3869,13 +4009,59 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr) | |||
| 3869 | msr, MSR_TYPE_W); | 4009 | msr, MSR_TYPE_W); |
| 3870 | } | 4010 | } |
| 3871 | 4011 | ||
| 4012 | static int vmx_vm_has_apicv(struct kvm *kvm) | ||
| 4013 | { | ||
| 4014 | return enable_apicv && irqchip_in_kernel(kvm); | ||
| 4015 | } | ||
| 4016 | |||
| 4017 | /* | ||
| 4018 | * Send interrupt to vcpu via posted interrupt way. | ||
| 4019 | * 1. If target vcpu is running(non-root mode), send posted interrupt | ||
| 4020 | * notification to vcpu and hardware will sync PIR to vIRR atomically. | ||
| 4021 | * 2. If target vcpu isn't running(root mode), kick it to pick up the | ||
| 4022 | * interrupt from PIR in next vmentry. | ||
| 4023 | */ | ||
| 4024 | static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) | ||
| 4025 | { | ||
| 4026 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
| 4027 | int r; | ||
| 4028 | |||
| 4029 | if (pi_test_and_set_pir(vector, &vmx->pi_desc)) | ||
| 4030 | return; | ||
| 4031 | |||
| 4032 | r = pi_test_and_set_on(&vmx->pi_desc); | ||
| 4033 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
| 4034 | #ifdef CONFIG_SMP | ||
| 4035 | if (!r && (vcpu->mode == IN_GUEST_MODE)) | ||
| 4036 | apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), | ||
| 4037 | POSTED_INTR_VECTOR); | ||
| 4038 | else | ||
| 4039 | #endif | ||
| 4040 | kvm_vcpu_kick(vcpu); | ||
| 4041 | } | ||
| 4042 | |||
| 4043 | static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) | ||
| 4044 | { | ||
| 4045 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
| 4046 | |||
| 4047 | if (!pi_test_and_clear_on(&vmx->pi_desc)) | ||
| 4048 | return; | ||
| 4049 | |||
| 4050 | kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); | ||
| 4051 | } | ||
| 4052 | |||
| 4053 | static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu) | ||
| 4054 | { | ||
| 4055 | return; | ||
| 4056 | } | ||
| 4057 | |||
| 3872 | /* | 4058 | /* |
| 3873 | * Set up the vmcs's constant host-state fields, i.e., host-state fields that | 4059 | * Set up the vmcs's constant host-state fields, i.e., host-state fields that |
| 3874 | * will not change in the lifetime of the guest. | 4060 | * will not change in the lifetime of the guest. |
| 3875 | * Note that host-state that does change is set elsewhere. E.g., host-state | 4061 | * Note that host-state that does change is set elsewhere. E.g., host-state |
| 3876 | * that is set differently for each CPU is set in vmx_vcpu_load(), not here. | 4062 | * that is set differently for each CPU is set in vmx_vcpu_load(), not here. |
| 3877 | */ | 4063 | */ |
| 3878 | static void vmx_set_constant_host_state(void) | 4064 | static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) |
| 3879 | { | 4065 | { |
| 3880 | u32 low32, high32; | 4066 | u32 low32, high32; |
| 3881 | unsigned long tmpl; | 4067 | unsigned long tmpl; |
| @@ -3903,6 +4089,7 @@ static void vmx_set_constant_host_state(void) | |||
| 3903 | 4089 | ||
| 3904 | native_store_idt(&dt); | 4090 | native_store_idt(&dt); |
| 3905 | vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ | 4091 | vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ |
| 4092 | vmx->host_idt_base = dt.address; | ||
| 3906 | 4093 | ||
| 3907 | vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */ | 4094 | vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */ |
| 3908 | 4095 | ||
| @@ -3928,6 +4115,15 @@ static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) | |||
| 3928 | vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); | 4115 | vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); |
| 3929 | } | 4116 | } |
| 3930 | 4117 | ||
| 4118 | static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) | ||
| 4119 | { | ||
| 4120 | u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; | ||
| 4121 | |||
| 4122 | if (!vmx_vm_has_apicv(vmx->vcpu.kvm)) | ||
| 4123 | pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; | ||
| 4124 | return pin_based_exec_ctrl; | ||
| 4125 | } | ||
| 4126 | |||
| 3931 | static u32 vmx_exec_control(struct vcpu_vmx *vmx) | 4127 | static u32 vmx_exec_control(struct vcpu_vmx *vmx) |
| 3932 | { | 4128 | { |
| 3933 | u32 exec_control = vmcs_config.cpu_based_exec_ctrl; | 4129 | u32 exec_control = vmcs_config.cpu_based_exec_ctrl; |
| @@ -3945,11 +4141,6 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) | |||
| 3945 | return exec_control; | 4141 | return exec_control; |
| 3946 | } | 4142 | } |
| 3947 | 4143 | ||
| 3948 | static int vmx_vm_has_apicv(struct kvm *kvm) | ||
| 3949 | { | ||
| 3950 | return enable_apicv_reg_vid && irqchip_in_kernel(kvm); | ||
| 3951 | } | ||
| 3952 | |||
| 3953 | static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | 4144 | static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) |
| 3954 | { | 4145 | { |
| 3955 | u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; | 4146 | u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; |
| @@ -3971,6 +4162,12 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | |||
| 3971 | exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | | 4162 | exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | |
| 3972 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); | 4163 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); |
| 3973 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | 4164 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; |
| 4165 | /* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD | ||
| 4166 | (handle_vmptrld). | ||
| 4167 | We can NOT enable shadow_vmcs here because we don't have yet | ||
| 4168 | a current VMCS12 | ||
| 4169 | */ | ||
| 4170 | exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; | ||
| 3974 | return exec_control; | 4171 | return exec_control; |
| 3975 | } | 4172 | } |
| 3976 | 4173 | ||
| @@ -3999,14 +4196,17 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
| 3999 | vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a)); | 4196 | vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a)); |
| 4000 | vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b)); | 4197 | vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b)); |
| 4001 | 4198 | ||
| 4199 | if (enable_shadow_vmcs) { | ||
| 4200 | vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap)); | ||
| 4201 | vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); | ||
| 4202 | } | ||
| 4002 | if (cpu_has_vmx_msr_bitmap()) | 4203 | if (cpu_has_vmx_msr_bitmap()) |
| 4003 | vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); | 4204 | vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); |
| 4004 | 4205 | ||
| 4005 | vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ | 4206 | vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ |
| 4006 | 4207 | ||
| 4007 | /* Control */ | 4208 | /* Control */ |
| 4008 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, | 4209 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); |
| 4009 | vmcs_config.pin_based_exec_ctrl); | ||
| 4010 | 4210 | ||
| 4011 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); | 4211 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); |
| 4012 | 4212 | ||
| @@ -4015,13 +4215,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
| 4015 | vmx_secondary_exec_control(vmx)); | 4215 | vmx_secondary_exec_control(vmx)); |
| 4016 | } | 4216 | } |
| 4017 | 4217 | ||
| 4018 | if (enable_apicv_reg_vid) { | 4218 | if (vmx_vm_has_apicv(vmx->vcpu.kvm)) { |
| 4019 | vmcs_write64(EOI_EXIT_BITMAP0, 0); | 4219 | vmcs_write64(EOI_EXIT_BITMAP0, 0); |
| 4020 | vmcs_write64(EOI_EXIT_BITMAP1, 0); | 4220 | vmcs_write64(EOI_EXIT_BITMAP1, 0); |
| 4021 | vmcs_write64(EOI_EXIT_BITMAP2, 0); | 4221 | vmcs_write64(EOI_EXIT_BITMAP2, 0); |
| 4022 | vmcs_write64(EOI_EXIT_BITMAP3, 0); | 4222 | vmcs_write64(EOI_EXIT_BITMAP3, 0); |
| 4023 | 4223 | ||
| 4024 | vmcs_write16(GUEST_INTR_STATUS, 0); | 4224 | vmcs_write16(GUEST_INTR_STATUS, 0); |
| 4225 | |||
| 4226 | vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR); | ||
| 4227 | vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); | ||
| 4025 | } | 4228 | } |
| 4026 | 4229 | ||
| 4027 | if (ple_gap) { | 4230 | if (ple_gap) { |
| @@ -4035,7 +4238,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
| 4035 | 4238 | ||
| 4036 | vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ | 4239 | vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ |
| 4037 | vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ | 4240 | vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ |
| 4038 | vmx_set_constant_host_state(); | 4241 | vmx_set_constant_host_state(vmx); |
| 4039 | #ifdef CONFIG_X86_64 | 4242 | #ifdef CONFIG_X86_64 |
| 4040 | rdmsrl(MSR_FS_BASE, a); | 4243 | rdmsrl(MSR_FS_BASE, a); |
| 4041 | vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */ | 4244 | vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */ |
| @@ -4089,11 +4292,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
| 4089 | return 0; | 4292 | return 0; |
| 4090 | } | 4293 | } |
| 4091 | 4294 | ||
| 4092 | static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | 4295 | static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) |
| 4093 | { | 4296 | { |
| 4094 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 4297 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 4095 | u64 msr; | 4298 | u64 msr; |
| 4096 | int ret; | ||
| 4097 | 4299 | ||
| 4098 | vmx->rmode.vm86_active = 0; | 4300 | vmx->rmode.vm86_active = 0; |
| 4099 | 4301 | ||
| @@ -4109,12 +4311,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 4109 | vmx_segment_cache_clear(vmx); | 4311 | vmx_segment_cache_clear(vmx); |
| 4110 | 4312 | ||
| 4111 | seg_setup(VCPU_SREG_CS); | 4313 | seg_setup(VCPU_SREG_CS); |
| 4112 | if (kvm_vcpu_is_bsp(&vmx->vcpu)) | 4314 | vmcs_write16(GUEST_CS_SELECTOR, 0xf000); |
| 4113 | vmcs_write16(GUEST_CS_SELECTOR, 0xf000); | 4315 | vmcs_write32(GUEST_CS_BASE, 0xffff0000); |
| 4114 | else { | ||
| 4115 | vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); | ||
| 4116 | vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); | ||
| 4117 | } | ||
| 4118 | 4316 | ||
| 4119 | seg_setup(VCPU_SREG_DS); | 4317 | seg_setup(VCPU_SREG_DS); |
| 4120 | seg_setup(VCPU_SREG_ES); | 4318 | seg_setup(VCPU_SREG_ES); |
| @@ -4137,10 +4335,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 4137 | vmcs_writel(GUEST_SYSENTER_EIP, 0); | 4335 | vmcs_writel(GUEST_SYSENTER_EIP, 0); |
| 4138 | 4336 | ||
| 4139 | vmcs_writel(GUEST_RFLAGS, 0x02); | 4337 | vmcs_writel(GUEST_RFLAGS, 0x02); |
| 4140 | if (kvm_vcpu_is_bsp(&vmx->vcpu)) | 4338 | kvm_rip_write(vcpu, 0xfff0); |
| 4141 | kvm_rip_write(vcpu, 0xfff0); | ||
| 4142 | else | ||
| 4143 | kvm_rip_write(vcpu, 0); | ||
| 4144 | 4339 | ||
| 4145 | vmcs_writel(GUEST_GDTR_BASE, 0); | 4340 | vmcs_writel(GUEST_GDTR_BASE, 0); |
| 4146 | vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); | 4341 | vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); |
| @@ -4171,23 +4366,20 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 4171 | vmcs_write64(APIC_ACCESS_ADDR, | 4366 | vmcs_write64(APIC_ACCESS_ADDR, |
| 4172 | page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); | 4367 | page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); |
| 4173 | 4368 | ||
| 4369 | if (vmx_vm_has_apicv(vcpu->kvm)) | ||
| 4370 | memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); | ||
| 4371 | |||
| 4174 | if (vmx->vpid != 0) | 4372 | if (vmx->vpid != 0) |
| 4175 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); | 4373 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); |
| 4176 | 4374 | ||
| 4177 | vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; | 4375 | vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
| 4178 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
| 4179 | vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ | 4376 | vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ |
| 4180 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | ||
| 4181 | vmx_set_cr4(&vmx->vcpu, 0); | 4377 | vmx_set_cr4(&vmx->vcpu, 0); |
| 4182 | vmx_set_efer(&vmx->vcpu, 0); | 4378 | vmx_set_efer(&vmx->vcpu, 0); |
| 4183 | vmx_fpu_activate(&vmx->vcpu); | 4379 | vmx_fpu_activate(&vmx->vcpu); |
| 4184 | update_exception_bitmap(&vmx->vcpu); | 4380 | update_exception_bitmap(&vmx->vcpu); |
| 4185 | 4381 | ||
| 4186 | vpid_sync_context(vmx); | 4382 | vpid_sync_context(vmx); |
| 4187 | |||
| 4188 | ret = 0; | ||
| 4189 | |||
| 4190 | return ret; | ||
| 4191 | } | 4383 | } |
| 4192 | 4384 | ||
| 4193 | /* | 4385 | /* |
| @@ -4200,40 +4392,45 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu) | |||
| 4200 | PIN_BASED_EXT_INTR_MASK; | 4392 | PIN_BASED_EXT_INTR_MASK; |
| 4201 | } | 4393 | } |
| 4202 | 4394 | ||
| 4203 | static void enable_irq_window(struct kvm_vcpu *vcpu) | 4395 | static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) |
| 4396 | { | ||
| 4397 | return get_vmcs12(vcpu)->pin_based_vm_exec_control & | ||
| 4398 | PIN_BASED_NMI_EXITING; | ||
| 4399 | } | ||
| 4400 | |||
| 4401 | static int enable_irq_window(struct kvm_vcpu *vcpu) | ||
| 4204 | { | 4402 | { |
| 4205 | u32 cpu_based_vm_exec_control; | 4403 | u32 cpu_based_vm_exec_control; |
| 4206 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { | 4404 | |
| 4405 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) | ||
| 4207 | /* | 4406 | /* |
| 4208 | * We get here if vmx_interrupt_allowed() said we can't | 4407 | * We get here if vmx_interrupt_allowed() said we can't |
| 4209 | * inject to L1 now because L2 must run. Ask L2 to exit | 4408 | * inject to L1 now because L2 must run. The caller will have |
| 4210 | * right after entry, so we can inject to L1 more promptly. | 4409 | * to make L2 exit right after entry, so we can inject to L1 |
| 4410 | * more promptly. | ||
| 4211 | */ | 4411 | */ |
| 4212 | kvm_make_request(KVM_REQ_IMMEDIATE_EXIT, vcpu); | 4412 | return -EBUSY; |
| 4213 | return; | ||
| 4214 | } | ||
| 4215 | 4413 | ||
| 4216 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 4414 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
| 4217 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; | 4415 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; |
| 4218 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | 4416 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
| 4417 | return 0; | ||
| 4219 | } | 4418 | } |
| 4220 | 4419 | ||
| 4221 | static void enable_nmi_window(struct kvm_vcpu *vcpu) | 4420 | static int enable_nmi_window(struct kvm_vcpu *vcpu) |
| 4222 | { | 4421 | { |
| 4223 | u32 cpu_based_vm_exec_control; | 4422 | u32 cpu_based_vm_exec_control; |
| 4224 | 4423 | ||
| 4225 | if (!cpu_has_virtual_nmis()) { | 4424 | if (!cpu_has_virtual_nmis()) |
| 4226 | enable_irq_window(vcpu); | 4425 | return enable_irq_window(vcpu); |
| 4227 | return; | 4426 | |
| 4228 | } | 4427 | if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) |
| 4428 | return enable_irq_window(vcpu); | ||
| 4229 | 4429 | ||
| 4230 | if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { | ||
| 4231 | enable_irq_window(vcpu); | ||
| 4232 | return; | ||
| 4233 | } | ||
| 4234 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 4430 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
| 4235 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; | 4431 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; |
| 4236 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | 4432 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
| 4433 | return 0; | ||
| 4237 | } | 4434 | } |
| 4238 | 4435 | ||
| 4239 | static void vmx_inject_irq(struct kvm_vcpu *vcpu) | 4436 | static void vmx_inject_irq(struct kvm_vcpu *vcpu) |
| @@ -4294,16 +4491,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | |||
| 4294 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); | 4491 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); |
| 4295 | } | 4492 | } |
| 4296 | 4493 | ||
| 4297 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | ||
| 4298 | { | ||
| 4299 | if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) | ||
| 4300 | return 0; | ||
| 4301 | |||
| 4302 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | ||
| 4303 | (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI | ||
| 4304 | | GUEST_INTR_STATE_NMI)); | ||
| 4305 | } | ||
| 4306 | |||
| 4307 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) | 4494 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) |
| 4308 | { | 4495 | { |
| 4309 | if (!cpu_has_virtual_nmis()) | 4496 | if (!cpu_has_virtual_nmis()) |
| @@ -4333,18 +4520,52 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | |||
| 4333 | } | 4520 | } |
| 4334 | } | 4521 | } |
| 4335 | 4522 | ||
| 4523 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | ||
| 4524 | { | ||
| 4525 | if (is_guest_mode(vcpu)) { | ||
| 4526 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
| 4527 | |||
| 4528 | if (to_vmx(vcpu)->nested.nested_run_pending) | ||
| 4529 | return 0; | ||
| 4530 | if (nested_exit_on_nmi(vcpu)) { | ||
| 4531 | nested_vmx_vmexit(vcpu); | ||
| 4532 | vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI; | ||
| 4533 | vmcs12->vm_exit_intr_info = NMI_VECTOR | | ||
| 4534 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK; | ||
| 4535 | /* | ||
| 4536 | * The NMI-triggered VM exit counts as injection: | ||
| 4537 | * clear this one and block further NMIs. | ||
| 4538 | */ | ||
| 4539 | vcpu->arch.nmi_pending = 0; | ||
| 4540 | vmx_set_nmi_mask(vcpu, true); | ||
| 4541 | return 0; | ||
| 4542 | } | ||
| 4543 | } | ||
| 4544 | |||
| 4545 | if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) | ||
| 4546 | return 0; | ||
| 4547 | |||
| 4548 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | ||
| 4549 | (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI | ||
| 4550 | | GUEST_INTR_STATE_NMI)); | ||
| 4551 | } | ||
| 4552 | |||
| 4336 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) | 4553 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) |
| 4337 | { | 4554 | { |
| 4338 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { | 4555 | if (is_guest_mode(vcpu)) { |
| 4339 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 4556 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
| 4340 | if (to_vmx(vcpu)->nested.nested_run_pending || | 4557 | |
| 4341 | (vmcs12->idt_vectoring_info_field & | 4558 | if (to_vmx(vcpu)->nested.nested_run_pending) |
| 4342 | VECTORING_INFO_VALID_MASK)) | ||
| 4343 | return 0; | 4559 | return 0; |
| 4344 | nested_vmx_vmexit(vcpu); | 4560 | if (nested_exit_on_intr(vcpu)) { |
| 4345 | vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; | 4561 | nested_vmx_vmexit(vcpu); |
| 4346 | vmcs12->vm_exit_intr_info = 0; | 4562 | vmcs12->vm_exit_reason = |
| 4347 | /* fall through to normal code, but now in L1, not L2 */ | 4563 | EXIT_REASON_EXTERNAL_INTERRUPT; |
| 4564 | vmcs12->vm_exit_intr_info = 0; | ||
| 4565 | /* | ||
| 4566 | * fall through to normal code, but now in L1, not L2 | ||
| 4567 | */ | ||
| 4568 | } | ||
| 4348 | } | 4569 | } |
| 4349 | 4570 | ||
| 4350 | return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && | 4571 | return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && |
| @@ -4362,7 +4583,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) | |||
| 4362 | .flags = 0, | 4583 | .flags = 0, |
| 4363 | }; | 4584 | }; |
| 4364 | 4585 | ||
| 4365 | ret = kvm_set_memory_region(kvm, &tss_mem, false); | 4586 | ret = kvm_set_memory_region(kvm, &tss_mem); |
| 4366 | if (ret) | 4587 | if (ret) |
| 4367 | return ret; | 4588 | return ret; |
| 4368 | kvm->arch.tss_addr = addr; | 4589 | kvm->arch.tss_addr = addr; |
| @@ -4603,34 +4824,50 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
| 4603 | /* called to set cr0 as appropriate for a mov-to-cr0 exit. */ | 4824 | /* called to set cr0 as appropriate for a mov-to-cr0 exit. */ |
| 4604 | static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) | 4825 | static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) |
| 4605 | { | 4826 | { |
| 4606 | if (to_vmx(vcpu)->nested.vmxon && | ||
| 4607 | ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)) | ||
| 4608 | return 1; | ||
| 4609 | |||
| 4610 | if (is_guest_mode(vcpu)) { | 4827 | if (is_guest_mode(vcpu)) { |
| 4828 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
| 4829 | unsigned long orig_val = val; | ||
| 4830 | |||
| 4611 | /* | 4831 | /* |
| 4612 | * We get here when L2 changed cr0 in a way that did not change | 4832 | * We get here when L2 changed cr0 in a way that did not change |
| 4613 | * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), | 4833 | * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), |
| 4614 | * but did change L0 shadowed bits. This can currently happen | 4834 | * but did change L0 shadowed bits. So we first calculate the |
| 4615 | * with the TS bit: L0 may want to leave TS on (for lazy fpu | 4835 | * effective cr0 value that L1 would like to write into the |
| 4616 | * loading) while pretending to allow the guest to change it. | 4836 | * hardware. It consists of the L2-owned bits from the new |
| 4837 | * value combined with the L1-owned bits from L1's guest_cr0. | ||
| 4617 | */ | 4838 | */ |
| 4618 | if (kvm_set_cr0(vcpu, (val & vcpu->arch.cr0_guest_owned_bits) | | 4839 | val = (val & ~vmcs12->cr0_guest_host_mask) | |
| 4619 | (vcpu->arch.cr0 & ~vcpu->arch.cr0_guest_owned_bits))) | 4840 | (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); |
| 4841 | |||
| 4842 | /* TODO: will have to take unrestricted guest mode into | ||
| 4843 | * account */ | ||
| 4844 | if ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) | ||
| 4620 | return 1; | 4845 | return 1; |
| 4621 | vmcs_writel(CR0_READ_SHADOW, val); | 4846 | |
| 4847 | if (kvm_set_cr0(vcpu, val)) | ||
| 4848 | return 1; | ||
| 4849 | vmcs_writel(CR0_READ_SHADOW, orig_val); | ||
| 4622 | return 0; | 4850 | return 0; |
| 4623 | } else | 4851 | } else { |
| 4852 | if (to_vmx(vcpu)->nested.vmxon && | ||
| 4853 | ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)) | ||
| 4854 | return 1; | ||
| 4624 | return kvm_set_cr0(vcpu, val); | 4855 | return kvm_set_cr0(vcpu, val); |
| 4856 | } | ||
| 4625 | } | 4857 | } |
| 4626 | 4858 | ||
| 4627 | static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) | 4859 | static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) |
| 4628 | { | 4860 | { |
| 4629 | if (is_guest_mode(vcpu)) { | 4861 | if (is_guest_mode(vcpu)) { |
| 4630 | if (kvm_set_cr4(vcpu, (val & vcpu->arch.cr4_guest_owned_bits) | | 4862 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
| 4631 | (vcpu->arch.cr4 & ~vcpu->arch.cr4_guest_owned_bits))) | 4863 | unsigned long orig_val = val; |
| 4864 | |||
| 4865 | /* analogously to handle_set_cr0 */ | ||
| 4866 | val = (val & ~vmcs12->cr4_guest_host_mask) | | ||
| 4867 | (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask); | ||
| 4868 | if (kvm_set_cr4(vcpu, val)) | ||
| 4632 | return 1; | 4869 | return 1; |
| 4633 | vmcs_writel(CR4_READ_SHADOW, val); | 4870 | vmcs_writel(CR4_READ_SHADOW, orig_val); |
| 4634 | return 0; | 4871 | return 0; |
| 4635 | } else | 4872 | } else |
| 4636 | return kvm_set_cr4(vcpu, val); | 4873 | return kvm_set_cr4(vcpu, val); |
| @@ -5183,7 +5420,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
| 5183 | if (test_bit(KVM_REQ_EVENT, &vcpu->requests)) | 5420 | if (test_bit(KVM_REQ_EVENT, &vcpu->requests)) |
| 5184 | return 1; | 5421 | return 1; |
| 5185 | 5422 | ||
| 5186 | err = emulate_instruction(vcpu, 0); | 5423 | err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); |
| 5187 | 5424 | ||
| 5188 | if (err == EMULATE_DO_MMIO) { | 5425 | if (err == EMULATE_DO_MMIO) { |
| 5189 | ret = 0; | 5426 | ret = 0; |
| @@ -5259,8 +5496,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) | |||
| 5259 | } | 5496 | } |
| 5260 | 5497 | ||
| 5261 | /* Create a new VMCS */ | 5498 | /* Create a new VMCS */ |
| 5262 | item = (struct vmcs02_list *) | 5499 | item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); |
| 5263 | kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); | ||
| 5264 | if (!item) | 5500 | if (!item) |
| 5265 | return NULL; | 5501 | return NULL; |
| 5266 | item->vmcs02.vmcs = alloc_vmcs(); | 5502 | item->vmcs02.vmcs = alloc_vmcs(); |
| @@ -5309,6 +5545,9 @@ static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) | |||
| 5309 | free_loaded_vmcs(&vmx->vmcs01); | 5545 | free_loaded_vmcs(&vmx->vmcs01); |
| 5310 | } | 5546 | } |
| 5311 | 5547 | ||
| 5548 | static void nested_vmx_failValid(struct kvm_vcpu *vcpu, | ||
| 5549 | u32 vm_instruction_error); | ||
| 5550 | |||
| 5312 | /* | 5551 | /* |
| 5313 | * Emulate the VMXON instruction. | 5552 | * Emulate the VMXON instruction. |
| 5314 | * Currently, we just remember that VMX is active, and do not save or even | 5553 | * Currently, we just remember that VMX is active, and do not save or even |
| @@ -5321,6 +5560,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
| 5321 | { | 5560 | { |
| 5322 | struct kvm_segment cs; | 5561 | struct kvm_segment cs; |
| 5323 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 5562 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 5563 | struct vmcs *shadow_vmcs; | ||
| 5324 | 5564 | ||
| 5325 | /* The Intel VMX Instruction Reference lists a bunch of bits that | 5565 | /* The Intel VMX Instruction Reference lists a bunch of bits that |
| 5326 | * are prerequisite to running VMXON, most notably cr4.VMXE must be | 5566 | * are prerequisite to running VMXON, most notably cr4.VMXE must be |
| @@ -5344,6 +5584,21 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
| 5344 | kvm_inject_gp(vcpu, 0); | 5584 | kvm_inject_gp(vcpu, 0); |
| 5345 | return 1; | 5585 | return 1; |
| 5346 | } | 5586 | } |
| 5587 | if (vmx->nested.vmxon) { | ||
| 5588 | nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION); | ||
| 5589 | skip_emulated_instruction(vcpu); | ||
| 5590 | return 1; | ||
| 5591 | } | ||
| 5592 | if (enable_shadow_vmcs) { | ||
| 5593 | shadow_vmcs = alloc_vmcs(); | ||
| 5594 | if (!shadow_vmcs) | ||
| 5595 | return -ENOMEM; | ||
| 5596 | /* mark vmcs as shadow */ | ||
| 5597 | shadow_vmcs->revision_id |= (1u << 31); | ||
| 5598 | /* init shadow vmcs */ | ||
| 5599 | vmcs_clear(shadow_vmcs); | ||
| 5600 | vmx->nested.current_shadow_vmcs = shadow_vmcs; | ||
| 5601 | } | ||
| 5347 | 5602 | ||
| 5348 | INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); | 5603 | INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); |
| 5349 | vmx->nested.vmcs02_num = 0; | 5604 | vmx->nested.vmcs02_num = 0; |
| @@ -5384,6 +5639,25 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) | |||
| 5384 | return 1; | 5639 | return 1; |
| 5385 | } | 5640 | } |
| 5386 | 5641 | ||
| 5642 | static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) | ||
| 5643 | { | ||
| 5644 | u32 exec_control; | ||
| 5645 | if (enable_shadow_vmcs) { | ||
| 5646 | if (vmx->nested.current_vmcs12 != NULL) { | ||
| 5647 | /* copy to memory all shadowed fields in case | ||
| 5648 | they were modified */ | ||
| 5649 | copy_shadow_to_vmcs12(vmx); | ||
| 5650 | vmx->nested.sync_shadow_vmcs = false; | ||
| 5651 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
| 5652 | exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; | ||
| 5653 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | ||
| 5654 | vmcs_write64(VMCS_LINK_POINTER, -1ull); | ||
| 5655 | } | ||
| 5656 | } | ||
| 5657 | kunmap(vmx->nested.current_vmcs12_page); | ||
| 5658 | nested_release_page(vmx->nested.current_vmcs12_page); | ||
| 5659 | } | ||
| 5660 | |||
| 5387 | /* | 5661 | /* |
| 5388 | * Free whatever needs to be freed from vmx->nested when L1 goes down, or | 5662 | * Free whatever needs to be freed from vmx->nested when L1 goes down, or |
| 5389 | * just stops using VMX. | 5663 | * just stops using VMX. |
| @@ -5394,11 +5668,12 @@ static void free_nested(struct vcpu_vmx *vmx) | |||
| 5394 | return; | 5668 | return; |
| 5395 | vmx->nested.vmxon = false; | 5669 | vmx->nested.vmxon = false; |
| 5396 | if (vmx->nested.current_vmptr != -1ull) { | 5670 | if (vmx->nested.current_vmptr != -1ull) { |
| 5397 | kunmap(vmx->nested.current_vmcs12_page); | 5671 | nested_release_vmcs12(vmx); |
| 5398 | nested_release_page(vmx->nested.current_vmcs12_page); | ||
| 5399 | vmx->nested.current_vmptr = -1ull; | 5672 | vmx->nested.current_vmptr = -1ull; |
| 5400 | vmx->nested.current_vmcs12 = NULL; | 5673 | vmx->nested.current_vmcs12 = NULL; |
| 5401 | } | 5674 | } |
| 5675 | if (enable_shadow_vmcs) | ||
| 5676 | free_vmcs(vmx->nested.current_shadow_vmcs); | ||
| 5402 | /* Unpin physical memory we referred to in current vmcs02 */ | 5677 | /* Unpin physical memory we referred to in current vmcs02 */ |
| 5403 | if (vmx->nested.apic_access_page) { | 5678 | if (vmx->nested.apic_access_page) { |
| 5404 | nested_release_page(vmx->nested.apic_access_page); | 5679 | nested_release_page(vmx->nested.apic_access_page); |
| @@ -5507,6 +5782,10 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu, | |||
| 5507 | X86_EFLAGS_SF | X86_EFLAGS_OF)) | 5782 | X86_EFLAGS_SF | X86_EFLAGS_OF)) |
| 5508 | | X86_EFLAGS_ZF); | 5783 | | X86_EFLAGS_ZF); |
| 5509 | get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error; | 5784 | get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error; |
| 5785 | /* | ||
| 5786 | * We don't need to force a shadow sync because | ||
| 5787 | * VM_INSTRUCTION_ERROR is not shadowed | ||
| 5788 | */ | ||
| 5510 | } | 5789 | } |
| 5511 | 5790 | ||
| 5512 | /* Emulate the VMCLEAR instruction */ | 5791 | /* Emulate the VMCLEAR instruction */ |
| @@ -5539,8 +5818,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) | |||
| 5539 | } | 5818 | } |
| 5540 | 5819 | ||
| 5541 | if (vmptr == vmx->nested.current_vmptr) { | 5820 | if (vmptr == vmx->nested.current_vmptr) { |
| 5542 | kunmap(vmx->nested.current_vmcs12_page); | 5821 | nested_release_vmcs12(vmx); |
| 5543 | nested_release_page(vmx->nested.current_vmcs12_page); | ||
| 5544 | vmx->nested.current_vmptr = -1ull; | 5822 | vmx->nested.current_vmptr = -1ull; |
| 5545 | vmx->nested.current_vmcs12 = NULL; | 5823 | vmx->nested.current_vmcs12 = NULL; |
| 5546 | } | 5824 | } |
| @@ -5639,6 +5917,111 @@ static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu, | |||
| 5639 | } | 5917 | } |
| 5640 | } | 5918 | } |
| 5641 | 5919 | ||
| 5920 | |||
| 5921 | static inline bool vmcs12_write_any(struct kvm_vcpu *vcpu, | ||
| 5922 | unsigned long field, u64 field_value){ | ||
| 5923 | short offset = vmcs_field_to_offset(field); | ||
| 5924 | char *p = ((char *) get_vmcs12(vcpu)) + offset; | ||
| 5925 | if (offset < 0) | ||
| 5926 | return false; | ||
| 5927 | |||
| 5928 | switch (vmcs_field_type(field)) { | ||
| 5929 | case VMCS_FIELD_TYPE_U16: | ||
| 5930 | *(u16 *)p = field_value; | ||
| 5931 | return true; | ||
| 5932 | case VMCS_FIELD_TYPE_U32: | ||
| 5933 | *(u32 *)p = field_value; | ||
| 5934 | return true; | ||
| 5935 | case VMCS_FIELD_TYPE_U64: | ||
| 5936 | *(u64 *)p = field_value; | ||
| 5937 | return true; | ||
| 5938 | case VMCS_FIELD_TYPE_NATURAL_WIDTH: | ||
| 5939 | *(natural_width *)p = field_value; | ||
| 5940 | return true; | ||
| 5941 | default: | ||
| 5942 | return false; /* can never happen. */ | ||
| 5943 | } | ||
| 5944 | |||
| 5945 | } | ||
| 5946 | |||
| 5947 | static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) | ||
| 5948 | { | ||
| 5949 | int i; | ||
| 5950 | unsigned long field; | ||
| 5951 | u64 field_value; | ||
| 5952 | struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; | ||
| 5953 | unsigned long *fields = (unsigned long *)shadow_read_write_fields; | ||
| 5954 | int num_fields = max_shadow_read_write_fields; | ||
| 5955 | |||
| 5956 | vmcs_load(shadow_vmcs); | ||
| 5957 | |||
| 5958 | for (i = 0; i < num_fields; i++) { | ||
| 5959 | field = fields[i]; | ||
| 5960 | switch (vmcs_field_type(field)) { | ||
| 5961 | case VMCS_FIELD_TYPE_U16: | ||
| 5962 | field_value = vmcs_read16(field); | ||
| 5963 | break; | ||
| 5964 | case VMCS_FIELD_TYPE_U32: | ||
| 5965 | field_value = vmcs_read32(field); | ||
| 5966 | break; | ||
| 5967 | case VMCS_FIELD_TYPE_U64: | ||
| 5968 | field_value = vmcs_read64(field); | ||
| 5969 | break; | ||
| 5970 | case VMCS_FIELD_TYPE_NATURAL_WIDTH: | ||
| 5971 | field_value = vmcs_readl(field); | ||
| 5972 | break; | ||
| 5973 | } | ||
| 5974 | vmcs12_write_any(&vmx->vcpu, field, field_value); | ||
| 5975 | } | ||
| 5976 | |||
| 5977 | vmcs_clear(shadow_vmcs); | ||
| 5978 | vmcs_load(vmx->loaded_vmcs->vmcs); | ||
| 5979 | } | ||
| 5980 | |||
| 5981 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) | ||
| 5982 | { | ||
| 5983 | unsigned long *fields[] = { | ||
| 5984 | (unsigned long *)shadow_read_write_fields, | ||
| 5985 | (unsigned long *)shadow_read_only_fields | ||
| 5986 | }; | ||
| 5987 | int num_lists = ARRAY_SIZE(fields); | ||
| 5988 | int max_fields[] = { | ||
| 5989 | max_shadow_read_write_fields, | ||
| 5990 | max_shadow_read_only_fields | ||
| 5991 | }; | ||
| 5992 | int i, q; | ||
| 5993 | unsigned long field; | ||
| 5994 | u64 field_value = 0; | ||
| 5995 | struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; | ||
| 5996 | |||
| 5997 | vmcs_load(shadow_vmcs); | ||
| 5998 | |||
| 5999 | for (q = 0; q < num_lists; q++) { | ||
| 6000 | for (i = 0; i < max_fields[q]; i++) { | ||
| 6001 | field = fields[q][i]; | ||
| 6002 | vmcs12_read_any(&vmx->vcpu, field, &field_value); | ||
| 6003 | |||
| 6004 | switch (vmcs_field_type(field)) { | ||
| 6005 | case VMCS_FIELD_TYPE_U16: | ||
| 6006 | vmcs_write16(field, (u16)field_value); | ||
| 6007 | break; | ||
| 6008 | case VMCS_FIELD_TYPE_U32: | ||
| 6009 | vmcs_write32(field, (u32)field_value); | ||
| 6010 | break; | ||
| 6011 | case VMCS_FIELD_TYPE_U64: | ||
| 6012 | vmcs_write64(field, (u64)field_value); | ||
| 6013 | break; | ||
| 6014 | case VMCS_FIELD_TYPE_NATURAL_WIDTH: | ||
| 6015 | vmcs_writel(field, (long)field_value); | ||
| 6016 | break; | ||
| 6017 | } | ||
| 6018 | } | ||
| 6019 | } | ||
| 6020 | |||
| 6021 | vmcs_clear(shadow_vmcs); | ||
| 6022 | vmcs_load(vmx->loaded_vmcs->vmcs); | ||
| 6023 | } | ||
| 6024 | |||
| 5642 | /* | 6025 | /* |
| 5643 | * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was | 6026 | * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was |
| 5644 | * used before) all generate the same failure when it is missing. | 6027 | * used before) all generate the same failure when it is missing. |
| @@ -5703,8 +6086,6 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) | |||
| 5703 | gva_t gva; | 6086 | gva_t gva; |
| 5704 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 6087 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
| 5705 | u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); | 6088 | u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); |
| 5706 | char *p; | ||
| 5707 | short offset; | ||
| 5708 | /* The value to write might be 32 or 64 bits, depending on L1's long | 6089 | /* The value to write might be 32 or 64 bits, depending on L1's long |
| 5709 | * mode, and eventually we need to write that into a field of several | 6090 | * mode, and eventually we need to write that into a field of several |
| 5710 | * possible lengths. The code below first zero-extends the value to 64 | 6091 | * possible lengths. The code below first zero-extends the value to 64 |
| @@ -5741,28 +6122,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) | |||
| 5741 | return 1; | 6122 | return 1; |
| 5742 | } | 6123 | } |
| 5743 | 6124 | ||
| 5744 | offset = vmcs_field_to_offset(field); | 6125 | if (!vmcs12_write_any(vcpu, field, field_value)) { |
| 5745 | if (offset < 0) { | ||
| 5746 | nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); | ||
| 5747 | skip_emulated_instruction(vcpu); | ||
| 5748 | return 1; | ||
| 5749 | } | ||
| 5750 | p = ((char *) get_vmcs12(vcpu)) + offset; | ||
| 5751 | |||
| 5752 | switch (vmcs_field_type(field)) { | ||
| 5753 | case VMCS_FIELD_TYPE_U16: | ||
| 5754 | *(u16 *)p = field_value; | ||
| 5755 | break; | ||
| 5756 | case VMCS_FIELD_TYPE_U32: | ||
| 5757 | *(u32 *)p = field_value; | ||
| 5758 | break; | ||
| 5759 | case VMCS_FIELD_TYPE_U64: | ||
| 5760 | *(u64 *)p = field_value; | ||
| 5761 | break; | ||
| 5762 | case VMCS_FIELD_TYPE_NATURAL_WIDTH: | ||
| 5763 | *(natural_width *)p = field_value; | ||
| 5764 | break; | ||
| 5765 | default: | ||
| 5766 | nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); | 6126 | nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); |
| 5767 | skip_emulated_instruction(vcpu); | 6127 | skip_emulated_instruction(vcpu); |
| 5768 | return 1; | 6128 | return 1; |
| @@ -5780,6 +6140,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) | |||
| 5780 | gva_t gva; | 6140 | gva_t gva; |
| 5781 | gpa_t vmptr; | 6141 | gpa_t vmptr; |
| 5782 | struct x86_exception e; | 6142 | struct x86_exception e; |
| 6143 | u32 exec_control; | ||
| 5783 | 6144 | ||
| 5784 | if (!nested_vmx_check_permission(vcpu)) | 6145 | if (!nested_vmx_check_permission(vcpu)) |
| 5785 | return 1; | 6146 | return 1; |
| @@ -5818,14 +6179,20 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) | |||
| 5818 | skip_emulated_instruction(vcpu); | 6179 | skip_emulated_instruction(vcpu); |
| 5819 | return 1; | 6180 | return 1; |
| 5820 | } | 6181 | } |
| 5821 | if (vmx->nested.current_vmptr != -1ull) { | 6182 | if (vmx->nested.current_vmptr != -1ull) |
| 5822 | kunmap(vmx->nested.current_vmcs12_page); | 6183 | nested_release_vmcs12(vmx); |
| 5823 | nested_release_page(vmx->nested.current_vmcs12_page); | ||
| 5824 | } | ||
| 5825 | 6184 | ||
| 5826 | vmx->nested.current_vmptr = vmptr; | 6185 | vmx->nested.current_vmptr = vmptr; |
| 5827 | vmx->nested.current_vmcs12 = new_vmcs12; | 6186 | vmx->nested.current_vmcs12 = new_vmcs12; |
| 5828 | vmx->nested.current_vmcs12_page = page; | 6187 | vmx->nested.current_vmcs12_page = page; |
| 6188 | if (enable_shadow_vmcs) { | ||
| 6189 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
| 6190 | exec_control |= SECONDARY_EXEC_SHADOW_VMCS; | ||
| 6191 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | ||
| 6192 | vmcs_write64(VMCS_LINK_POINTER, | ||
| 6193 | __pa(vmx->nested.current_shadow_vmcs)); | ||
| 6194 | vmx->nested.sync_shadow_vmcs = true; | ||
| 6195 | } | ||
| 5829 | } | 6196 | } |
| 5830 | 6197 | ||
| 5831 | nested_vmx_succeed(vcpu); | 6198 | nested_vmx_succeed(vcpu); |
| @@ -5908,6 +6275,52 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
| 5908 | static const int kvm_vmx_max_exit_handlers = | 6275 | static const int kvm_vmx_max_exit_handlers = |
| 5909 | ARRAY_SIZE(kvm_vmx_exit_handlers); | 6276 | ARRAY_SIZE(kvm_vmx_exit_handlers); |
| 5910 | 6277 | ||
| 6278 | static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, | ||
| 6279 | struct vmcs12 *vmcs12) | ||
| 6280 | { | ||
| 6281 | unsigned long exit_qualification; | ||
| 6282 | gpa_t bitmap, last_bitmap; | ||
| 6283 | unsigned int port; | ||
| 6284 | int size; | ||
| 6285 | u8 b; | ||
| 6286 | |||
| 6287 | if (nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING)) | ||
| 6288 | return 1; | ||
| 6289 | |||
| 6290 | if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) | ||
| 6291 | return 0; | ||
| 6292 | |||
| 6293 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | ||
| 6294 | |||
| 6295 | port = exit_qualification >> 16; | ||
| 6296 | size = (exit_qualification & 7) + 1; | ||
| 6297 | |||
| 6298 | last_bitmap = (gpa_t)-1; | ||
| 6299 | b = -1; | ||
| 6300 | |||
| 6301 | while (size > 0) { | ||
| 6302 | if (port < 0x8000) | ||
| 6303 | bitmap = vmcs12->io_bitmap_a; | ||
| 6304 | else if (port < 0x10000) | ||
| 6305 | bitmap = vmcs12->io_bitmap_b; | ||
| 6306 | else | ||
| 6307 | return 1; | ||
| 6308 | bitmap += (port & 0x7fff) / 8; | ||
| 6309 | |||
| 6310 | if (last_bitmap != bitmap) | ||
| 6311 | if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1)) | ||
| 6312 | return 1; | ||
| 6313 | if (b & (1 << (port & 7))) | ||
| 6314 | return 1; | ||
| 6315 | |||
| 6316 | port++; | ||
| 6317 | size--; | ||
| 6318 | last_bitmap = bitmap; | ||
| 6319 | } | ||
| 6320 | |||
| 6321 | return 0; | ||
| 6322 | } | ||
| 6323 | |||
| 5911 | /* | 6324 | /* |
| 5912 | * Return 1 if we should exit from L2 to L1 to handle an MSR access access, | 6325 | * Return 1 if we should exit from L2 to L1 to handle an MSR access access, |
| 5913 | * rather than handle it ourselves in L0. I.e., check whether L1 expressed | 6326 | * rather than handle it ourselves in L0. I.e., check whether L1 expressed |
| @@ -5939,7 +6352,8 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, | |||
| 5939 | /* Then read the msr_index'th bit from this bitmap: */ | 6352 | /* Then read the msr_index'th bit from this bitmap: */ |
| 5940 | if (msr_index < 1024*8) { | 6353 | if (msr_index < 1024*8) { |
| 5941 | unsigned char b; | 6354 | unsigned char b; |
| 5942 | kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1); | 6355 | if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1)) |
| 6356 | return 1; | ||
| 5943 | return 1 & (b >> (msr_index & 7)); | 6357 | return 1 & (b >> (msr_index & 7)); |
| 5944 | } else | 6358 | } else |
| 5945 | return 1; /* let L1 handle the wrong parameter */ | 6359 | return 1; /* let L1 handle the wrong parameter */ |
| @@ -6033,10 +6447,10 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, | |||
| 6033 | */ | 6447 | */ |
| 6034 | static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | 6448 | static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) |
| 6035 | { | 6449 | { |
| 6036 | u32 exit_reason = vmcs_read32(VM_EXIT_REASON); | ||
| 6037 | u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 6450 | u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
| 6038 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 6451 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 6039 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 6452 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
| 6453 | u32 exit_reason = vmx->exit_reason; | ||
| 6040 | 6454 | ||
| 6041 | if (vmx->nested.nested_run_pending) | 6455 | if (vmx->nested.nested_run_pending) |
| 6042 | return 0; | 6456 | return 0; |
| @@ -6060,14 +6474,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
| 6060 | case EXIT_REASON_TRIPLE_FAULT: | 6474 | case EXIT_REASON_TRIPLE_FAULT: |
| 6061 | return 1; | 6475 | return 1; |
| 6062 | case EXIT_REASON_PENDING_INTERRUPT: | 6476 | case EXIT_REASON_PENDING_INTERRUPT: |
| 6477 | return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING); | ||
| 6063 | case EXIT_REASON_NMI_WINDOW: | 6478 | case EXIT_REASON_NMI_WINDOW: |
| 6064 | /* | 6479 | return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING); |
| 6065 | * prepare_vmcs02() set the CPU_BASED_VIRTUAL_INTR_PENDING bit | ||
| 6066 | * (aka Interrupt Window Exiting) only when L1 turned it on, | ||
| 6067 | * so if we got a PENDING_INTERRUPT exit, this must be for L1. | ||
| 6068 | * Same for NMI Window Exiting. | ||
| 6069 | */ | ||
| 6070 | return 1; | ||
| 6071 | case EXIT_REASON_TASK_SWITCH: | 6480 | case EXIT_REASON_TASK_SWITCH: |
| 6072 | return 1; | 6481 | return 1; |
| 6073 | case EXIT_REASON_CPUID: | 6482 | case EXIT_REASON_CPUID: |
| @@ -6097,8 +6506,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
| 6097 | case EXIT_REASON_DR_ACCESS: | 6506 | case EXIT_REASON_DR_ACCESS: |
| 6098 | return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING); | 6507 | return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING); |
| 6099 | case EXIT_REASON_IO_INSTRUCTION: | 6508 | case EXIT_REASON_IO_INSTRUCTION: |
| 6100 | /* TODO: support IO bitmaps */ | 6509 | return nested_vmx_exit_handled_io(vcpu, vmcs12); |
| 6101 | return 1; | ||
| 6102 | case EXIT_REASON_MSR_READ: | 6510 | case EXIT_REASON_MSR_READ: |
| 6103 | case EXIT_REASON_MSR_WRITE: | 6511 | case EXIT_REASON_MSR_WRITE: |
| 6104 | return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason); | 6512 | return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason); |
| @@ -6122,6 +6530,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
| 6122 | case EXIT_REASON_EPT_VIOLATION: | 6530 | case EXIT_REASON_EPT_VIOLATION: |
| 6123 | case EXIT_REASON_EPT_MISCONFIG: | 6531 | case EXIT_REASON_EPT_MISCONFIG: |
| 6124 | return 0; | 6532 | return 0; |
| 6533 | case EXIT_REASON_PREEMPTION_TIMER: | ||
| 6534 | return vmcs12->pin_based_vm_exec_control & | ||
| 6535 | PIN_BASED_VMX_PREEMPTION_TIMER; | ||
| 6125 | case EXIT_REASON_WBINVD: | 6536 | case EXIT_REASON_WBINVD: |
| 6126 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); | 6537 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); |
| 6127 | case EXIT_REASON_XSETBV: | 6538 | case EXIT_REASON_XSETBV: |
| @@ -6316,6 +6727,9 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) | |||
| 6316 | 6727 | ||
| 6317 | static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | 6728 | static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) |
| 6318 | { | 6729 | { |
| 6730 | if (!vmx_vm_has_apicv(vcpu->kvm)) | ||
| 6731 | return; | ||
| 6732 | |||
| 6319 | vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); | 6733 | vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); |
| 6320 | vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); | 6734 | vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); |
| 6321 | vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); | 6735 | vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); |
| @@ -6346,6 +6760,52 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) | |||
| 6346 | } | 6760 | } |
| 6347 | } | 6761 | } |
| 6348 | 6762 | ||
| 6763 | static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) | ||
| 6764 | { | ||
| 6765 | u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | ||
| 6766 | |||
| 6767 | /* | ||
| 6768 | * If external interrupt exists, IF bit is set in rflags/eflags on the | ||
| 6769 | * interrupt stack frame, and interrupt will be enabled on a return | ||
| 6770 | * from interrupt handler. | ||
| 6771 | */ | ||
| 6772 | if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK)) | ||
| 6773 | == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) { | ||
| 6774 | unsigned int vector; | ||
| 6775 | unsigned long entry; | ||
| 6776 | gate_desc *desc; | ||
| 6777 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
| 6778 | #ifdef CONFIG_X86_64 | ||
| 6779 | unsigned long tmp; | ||
| 6780 | #endif | ||
| 6781 | |||
| 6782 | vector = exit_intr_info & INTR_INFO_VECTOR_MASK; | ||
| 6783 | desc = (gate_desc *)vmx->host_idt_base + vector; | ||
| 6784 | entry = gate_offset(*desc); | ||
| 6785 | asm volatile( | ||
| 6786 | #ifdef CONFIG_X86_64 | ||
| 6787 | "mov %%" _ASM_SP ", %[sp]\n\t" | ||
| 6788 | "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t" | ||
| 6789 | "push $%c[ss]\n\t" | ||
| 6790 | "push %[sp]\n\t" | ||
| 6791 | #endif | ||
| 6792 | "pushf\n\t" | ||
| 6793 | "orl $0x200, (%%" _ASM_SP ")\n\t" | ||
| 6794 | __ASM_SIZE(push) " $%c[cs]\n\t" | ||
| 6795 | "call *%[entry]\n\t" | ||
| 6796 | : | ||
| 6797 | #ifdef CONFIG_X86_64 | ||
| 6798 | [sp]"=&r"(tmp) | ||
| 6799 | #endif | ||
| 6800 | : | ||
| 6801 | [entry]"r"(entry), | ||
| 6802 | [ss]"i"(__KERNEL_DS), | ||
| 6803 | [cs]"i"(__KERNEL_CS) | ||
| 6804 | ); | ||
| 6805 | } else | ||
| 6806 | local_irq_enable(); | ||
| 6807 | } | ||
| 6808 | |||
| 6349 | static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) | 6809 | static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) |
| 6350 | { | 6810 | { |
| 6351 | u32 exit_intr_info; | 6811 | u32 exit_intr_info; |
| @@ -6388,7 +6848,7 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) | |||
| 6388 | ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); | 6848 | ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); |
| 6389 | } | 6849 | } |
| 6390 | 6850 | ||
| 6391 | static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, | 6851 | static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, |
| 6392 | u32 idt_vectoring_info, | 6852 | u32 idt_vectoring_info, |
| 6393 | int instr_len_field, | 6853 | int instr_len_field, |
| 6394 | int error_code_field) | 6854 | int error_code_field) |
| @@ -6399,46 +6859,43 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, | |||
| 6399 | 6859 | ||
| 6400 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; | 6860 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; |
| 6401 | 6861 | ||
| 6402 | vmx->vcpu.arch.nmi_injected = false; | 6862 | vcpu->arch.nmi_injected = false; |
| 6403 | kvm_clear_exception_queue(&vmx->vcpu); | 6863 | kvm_clear_exception_queue(vcpu); |
| 6404 | kvm_clear_interrupt_queue(&vmx->vcpu); | 6864 | kvm_clear_interrupt_queue(vcpu); |
| 6405 | 6865 | ||
| 6406 | if (!idtv_info_valid) | 6866 | if (!idtv_info_valid) |
| 6407 | return; | 6867 | return; |
| 6408 | 6868 | ||
| 6409 | kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu); | 6869 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
| 6410 | 6870 | ||
| 6411 | vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; | 6871 | vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; |
| 6412 | type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; | 6872 | type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; |
| 6413 | 6873 | ||
| 6414 | switch (type) { | 6874 | switch (type) { |
| 6415 | case INTR_TYPE_NMI_INTR: | 6875 | case INTR_TYPE_NMI_INTR: |
| 6416 | vmx->vcpu.arch.nmi_injected = true; | 6876 | vcpu->arch.nmi_injected = true; |
| 6417 | /* | 6877 | /* |
| 6418 | * SDM 3: 27.7.1.2 (September 2008) | 6878 | * SDM 3: 27.7.1.2 (September 2008) |
| 6419 | * Clear bit "block by NMI" before VM entry if a NMI | 6879 | * Clear bit "block by NMI" before VM entry if a NMI |
| 6420 | * delivery faulted. | 6880 | * delivery faulted. |
| 6421 | */ | 6881 | */ |
| 6422 | vmx_set_nmi_mask(&vmx->vcpu, false); | 6882 | vmx_set_nmi_mask(vcpu, false); |
| 6423 | break; | 6883 | break; |
| 6424 | case INTR_TYPE_SOFT_EXCEPTION: | 6884 | case INTR_TYPE_SOFT_EXCEPTION: |
| 6425 | vmx->vcpu.arch.event_exit_inst_len = | 6885 | vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); |
| 6426 | vmcs_read32(instr_len_field); | ||
| 6427 | /* fall through */ | 6886 | /* fall through */ |
| 6428 | case INTR_TYPE_HARD_EXCEPTION: | 6887 | case INTR_TYPE_HARD_EXCEPTION: |
| 6429 | if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { | 6888 | if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { |
| 6430 | u32 err = vmcs_read32(error_code_field); | 6889 | u32 err = vmcs_read32(error_code_field); |
| 6431 | kvm_queue_exception_e(&vmx->vcpu, vector, err); | 6890 | kvm_queue_exception_e(vcpu, vector, err); |
| 6432 | } else | 6891 | } else |
| 6433 | kvm_queue_exception(&vmx->vcpu, vector); | 6892 | kvm_queue_exception(vcpu, vector); |
| 6434 | break; | 6893 | break; |
| 6435 | case INTR_TYPE_SOFT_INTR: | 6894 | case INTR_TYPE_SOFT_INTR: |
| 6436 | vmx->vcpu.arch.event_exit_inst_len = | 6895 | vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); |
| 6437 | vmcs_read32(instr_len_field); | ||
| 6438 | /* fall through */ | 6896 | /* fall through */ |
| 6439 | case INTR_TYPE_EXT_INTR: | 6897 | case INTR_TYPE_EXT_INTR: |
| 6440 | kvm_queue_interrupt(&vmx->vcpu, vector, | 6898 | kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR); |
| 6441 | type == INTR_TYPE_SOFT_INTR); | ||
| 6442 | break; | 6899 | break; |
| 6443 | default: | 6900 | default: |
| 6444 | break; | 6901 | break; |
| @@ -6447,18 +6904,14 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, | |||
| 6447 | 6904 | ||
| 6448 | static void vmx_complete_interrupts(struct vcpu_vmx *vmx) | 6905 | static void vmx_complete_interrupts(struct vcpu_vmx *vmx) |
| 6449 | { | 6906 | { |
| 6450 | if (is_guest_mode(&vmx->vcpu)) | 6907 | __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info, |
| 6451 | return; | ||
| 6452 | __vmx_complete_interrupts(vmx, vmx->idt_vectoring_info, | ||
| 6453 | VM_EXIT_INSTRUCTION_LEN, | 6908 | VM_EXIT_INSTRUCTION_LEN, |
| 6454 | IDT_VECTORING_ERROR_CODE); | 6909 | IDT_VECTORING_ERROR_CODE); |
| 6455 | } | 6910 | } |
| 6456 | 6911 | ||
| 6457 | static void vmx_cancel_injection(struct kvm_vcpu *vcpu) | 6912 | static void vmx_cancel_injection(struct kvm_vcpu *vcpu) |
| 6458 | { | 6913 | { |
| 6459 | if (is_guest_mode(vcpu)) | 6914 | __vmx_complete_interrupts(vcpu, |
| 6460 | return; | ||
| 6461 | __vmx_complete_interrupts(to_vmx(vcpu), | ||
| 6462 | vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), | 6915 | vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), |
| 6463 | VM_ENTRY_INSTRUCTION_LEN, | 6916 | VM_ENTRY_INSTRUCTION_LEN, |
| 6464 | VM_ENTRY_EXCEPTION_ERROR_CODE); | 6917 | VM_ENTRY_EXCEPTION_ERROR_CODE); |
| @@ -6489,21 +6942,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
| 6489 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 6942 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 6490 | unsigned long debugctlmsr; | 6943 | unsigned long debugctlmsr; |
| 6491 | 6944 | ||
| 6492 | if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) { | ||
| 6493 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
| 6494 | if (vmcs12->idt_vectoring_info_field & | ||
| 6495 | VECTORING_INFO_VALID_MASK) { | ||
| 6496 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | ||
| 6497 | vmcs12->idt_vectoring_info_field); | ||
| 6498 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, | ||
| 6499 | vmcs12->vm_exit_instruction_len); | ||
| 6500 | if (vmcs12->idt_vectoring_info_field & | ||
| 6501 | VECTORING_INFO_DELIVER_CODE_MASK) | ||
| 6502 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, | ||
| 6503 | vmcs12->idt_vectoring_error_code); | ||
| 6504 | } | ||
| 6505 | } | ||
| 6506 | |||
| 6507 | /* Record the guest's net vcpu time for enforced NMI injections. */ | 6945 | /* Record the guest's net vcpu time for enforced NMI injections. */ |
| 6508 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) | 6946 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) |
| 6509 | vmx->entry_time = ktime_get(); | 6947 | vmx->entry_time = ktime_get(); |
| @@ -6513,6 +6951,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
| 6513 | if (vmx->emulation_required) | 6951 | if (vmx->emulation_required) |
| 6514 | return; | 6952 | return; |
| 6515 | 6953 | ||
| 6954 | if (vmx->nested.sync_shadow_vmcs) { | ||
| 6955 | copy_vmcs12_to_shadow(vmx); | ||
| 6956 | vmx->nested.sync_shadow_vmcs = false; | ||
| 6957 | } | ||
| 6958 | |||
| 6516 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) | 6959 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) |
| 6517 | vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); | 6960 | vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); |
| 6518 | if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) | 6961 | if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) |
| @@ -6662,17 +7105,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
| 6662 | 7105 | ||
| 6663 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); | 7106 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); |
| 6664 | 7107 | ||
| 6665 | if (is_guest_mode(vcpu)) { | ||
| 6666 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
| 6667 | vmcs12->idt_vectoring_info_field = vmx->idt_vectoring_info; | ||
| 6668 | if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) { | ||
| 6669 | vmcs12->idt_vectoring_error_code = | ||
| 6670 | vmcs_read32(IDT_VECTORING_ERROR_CODE); | ||
| 6671 | vmcs12->vm_exit_instruction_len = | ||
| 6672 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | ||
| 6673 | } | ||
| 6674 | } | ||
| 6675 | |||
| 6676 | vmx->loaded_vmcs->launched = 1; | 7108 | vmx->loaded_vmcs->launched = 1; |
| 6677 | 7109 | ||
| 6678 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); | 7110 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); |
| @@ -6734,10 +7166,11 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
| 6734 | put_cpu(); | 7166 | put_cpu(); |
| 6735 | if (err) | 7167 | if (err) |
| 6736 | goto free_vmcs; | 7168 | goto free_vmcs; |
| 6737 | if (vm_need_virtualize_apic_accesses(kvm)) | 7169 | if (vm_need_virtualize_apic_accesses(kvm)) { |
| 6738 | err = alloc_apic_access_page(kvm); | 7170 | err = alloc_apic_access_page(kvm); |
| 6739 | if (err) | 7171 | if (err) |
| 6740 | goto free_vmcs; | 7172 | goto free_vmcs; |
| 7173 | } | ||
| 6741 | 7174 | ||
| 6742 | if (enable_ept) { | 7175 | if (enable_ept) { |
| 6743 | if (!kvm->arch.ept_identity_map_addr) | 7176 | if (!kvm->arch.ept_identity_map_addr) |
| @@ -6931,9 +7364,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
| 6931 | vmcs12->vm_entry_instruction_len); | 7364 | vmcs12->vm_entry_instruction_len); |
| 6932 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, | 7365 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, |
| 6933 | vmcs12->guest_interruptibility_info); | 7366 | vmcs12->guest_interruptibility_info); |
| 6934 | vmcs_write32(GUEST_ACTIVITY_STATE, vmcs12->guest_activity_state); | ||
| 6935 | vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); | 7367 | vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); |
| 6936 | vmcs_writel(GUEST_DR7, vmcs12->guest_dr7); | 7368 | kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); |
| 6937 | vmcs_writel(GUEST_RFLAGS, vmcs12->guest_rflags); | 7369 | vmcs_writel(GUEST_RFLAGS, vmcs12->guest_rflags); |
| 6938 | vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, | 7370 | vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, |
| 6939 | vmcs12->guest_pending_dbg_exceptions); | 7371 | vmcs12->guest_pending_dbg_exceptions); |
| @@ -6946,6 +7378,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
| 6946 | (vmcs_config.pin_based_exec_ctrl | | 7378 | (vmcs_config.pin_based_exec_ctrl | |
| 6947 | vmcs12->pin_based_vm_exec_control)); | 7379 | vmcs12->pin_based_vm_exec_control)); |
| 6948 | 7380 | ||
| 7381 | if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) | ||
| 7382 | vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, | ||
| 7383 | vmcs12->vmx_preemption_timer_value); | ||
| 7384 | |||
| 6949 | /* | 7385 | /* |
| 6950 | * Whether page-faults are trapped is determined by a combination of | 7386 | * Whether page-faults are trapped is determined by a combination of |
| 6951 | * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF. | 7387 | * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF. |
| @@ -7016,7 +7452,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
| 7016 | * Other fields are different per CPU, and will be set later when | 7452 | * Other fields are different per CPU, and will be set later when |
| 7017 | * vmx_vcpu_load() is called, and when vmx_save_host_state() is called. | 7453 | * vmx_vcpu_load() is called, and when vmx_save_host_state() is called. |
| 7018 | */ | 7454 | */ |
| 7019 | vmx_set_constant_host_state(); | 7455 | vmx_set_constant_host_state(vmx); |
| 7020 | 7456 | ||
| 7021 | /* | 7457 | /* |
| 7022 | * HOST_RSP is normally set correctly in vmx_vcpu_run() just before | 7458 | * HOST_RSP is normally set correctly in vmx_vcpu_run() just before |
| @@ -7082,7 +7518,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
| 7082 | 7518 | ||
| 7083 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) | 7519 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) |
| 7084 | vcpu->arch.efer = vmcs12->guest_ia32_efer; | 7520 | vcpu->arch.efer = vmcs12->guest_ia32_efer; |
| 7085 | if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) | 7521 | else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) |
| 7086 | vcpu->arch.efer |= (EFER_LMA | EFER_LME); | 7522 | vcpu->arch.efer |= (EFER_LMA | EFER_LME); |
| 7087 | else | 7523 | else |
| 7088 | vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); | 7524 | vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); |
| @@ -7121,6 +7557,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
| 7121 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 7557 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 7122 | int cpu; | 7558 | int cpu; |
| 7123 | struct loaded_vmcs *vmcs02; | 7559 | struct loaded_vmcs *vmcs02; |
| 7560 | bool ia32e; | ||
| 7124 | 7561 | ||
| 7125 | if (!nested_vmx_check_permission(vcpu) || | 7562 | if (!nested_vmx_check_permission(vcpu) || |
| 7126 | !nested_vmx_check_vmcs12(vcpu)) | 7563 | !nested_vmx_check_vmcs12(vcpu)) |
| @@ -7129,6 +7566,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
| 7129 | skip_emulated_instruction(vcpu); | 7566 | skip_emulated_instruction(vcpu); |
| 7130 | vmcs12 = get_vmcs12(vcpu); | 7567 | vmcs12 = get_vmcs12(vcpu); |
| 7131 | 7568 | ||
| 7569 | if (enable_shadow_vmcs) | ||
| 7570 | copy_shadow_to_vmcs12(vmx); | ||
| 7571 | |||
| 7132 | /* | 7572 | /* |
| 7133 | * The nested entry process starts with enforcing various prerequisites | 7573 | * The nested entry process starts with enforcing various prerequisites |
| 7134 | * on vmcs12 as required by the Intel SDM, and act appropriately when | 7574 | * on vmcs12 as required by the Intel SDM, and act appropriately when |
| @@ -7146,6 +7586,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
| 7146 | return 1; | 7586 | return 1; |
| 7147 | } | 7587 | } |
| 7148 | 7588 | ||
| 7589 | if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE) { | ||
| 7590 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | ||
| 7591 | return 1; | ||
| 7592 | } | ||
| 7593 | |||
| 7149 | if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) && | 7594 | if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) && |
| 7150 | !IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) { | 7595 | !IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) { |
| 7151 | /*TODO: Also verify bits beyond physical address width are 0*/ | 7596 | /*TODO: Also verify bits beyond physical address width are 0*/ |
| @@ -7204,6 +7649,45 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
| 7204 | } | 7649 | } |
| 7205 | 7650 | ||
| 7206 | /* | 7651 | /* |
| 7652 | * If the load IA32_EFER VM-entry control is 1, the following checks | ||
| 7653 | * are performed on the field for the IA32_EFER MSR: | ||
| 7654 | * - Bits reserved in the IA32_EFER MSR must be 0. | ||
| 7655 | * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of | ||
| 7656 | * the IA-32e mode guest VM-exit control. It must also be identical | ||
| 7657 | * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to | ||
| 7658 | * CR0.PG) is 1. | ||
| 7659 | */ | ||
| 7660 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) { | ||
| 7661 | ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0; | ||
| 7662 | if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) || | ||
| 7663 | ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) || | ||
| 7664 | ((vmcs12->guest_cr0 & X86_CR0_PG) && | ||
| 7665 | ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) { | ||
| 7666 | nested_vmx_entry_failure(vcpu, vmcs12, | ||
| 7667 | EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); | ||
| 7668 | return 1; | ||
| 7669 | } | ||
| 7670 | } | ||
| 7671 | |||
| 7672 | /* | ||
| 7673 | * If the load IA32_EFER VM-exit control is 1, bits reserved in the | ||
| 7674 | * IA32_EFER MSR must be 0 in the field for that register. In addition, | ||
| 7675 | * the values of the LMA and LME bits in the field must each be that of | ||
| 7676 | * the host address-space size VM-exit control. | ||
| 7677 | */ | ||
| 7678 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) { | ||
| 7679 | ia32e = (vmcs12->vm_exit_controls & | ||
| 7680 | VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0; | ||
| 7681 | if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) || | ||
| 7682 | ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) || | ||
| 7683 | ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) { | ||
| 7684 | nested_vmx_entry_failure(vcpu, vmcs12, | ||
| 7685 | EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); | ||
| 7686 | return 1; | ||
| 7687 | } | ||
| 7688 | } | ||
| 7689 | |||
| 7690 | /* | ||
| 7207 | * We're finally done with prerequisite checking, and can start with | 7691 | * We're finally done with prerequisite checking, and can start with |
| 7208 | * the nested entry. | 7692 | * the nested entry. |
| 7209 | */ | 7693 | */ |
| @@ -7223,6 +7707,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
| 7223 | vcpu->cpu = cpu; | 7707 | vcpu->cpu = cpu; |
| 7224 | put_cpu(); | 7708 | put_cpu(); |
| 7225 | 7709 | ||
| 7710 | vmx_segment_cache_clear(vmx); | ||
| 7711 | |||
| 7226 | vmcs12->launch_state = 1; | 7712 | vmcs12->launch_state = 1; |
| 7227 | 7713 | ||
| 7228 | prepare_vmcs02(vcpu, vmcs12); | 7714 | prepare_vmcs02(vcpu, vmcs12); |
| @@ -7273,6 +7759,48 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
| 7273 | vcpu->arch.cr4_guest_owned_bits)); | 7759 | vcpu->arch.cr4_guest_owned_bits)); |
| 7274 | } | 7760 | } |
| 7275 | 7761 | ||
| 7762 | static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, | ||
| 7763 | struct vmcs12 *vmcs12) | ||
| 7764 | { | ||
| 7765 | u32 idt_vectoring; | ||
| 7766 | unsigned int nr; | ||
| 7767 | |||
| 7768 | if (vcpu->arch.exception.pending) { | ||
| 7769 | nr = vcpu->arch.exception.nr; | ||
| 7770 | idt_vectoring = nr | VECTORING_INFO_VALID_MASK; | ||
| 7771 | |||
| 7772 | if (kvm_exception_is_soft(nr)) { | ||
| 7773 | vmcs12->vm_exit_instruction_len = | ||
| 7774 | vcpu->arch.event_exit_inst_len; | ||
| 7775 | idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION; | ||
| 7776 | } else | ||
| 7777 | idt_vectoring |= INTR_TYPE_HARD_EXCEPTION; | ||
| 7778 | |||
| 7779 | if (vcpu->arch.exception.has_error_code) { | ||
| 7780 | idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK; | ||
| 7781 | vmcs12->idt_vectoring_error_code = | ||
| 7782 | vcpu->arch.exception.error_code; | ||
| 7783 | } | ||
| 7784 | |||
| 7785 | vmcs12->idt_vectoring_info_field = idt_vectoring; | ||
| 7786 | } else if (vcpu->arch.nmi_pending) { | ||
| 7787 | vmcs12->idt_vectoring_info_field = | ||
| 7788 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR; | ||
| 7789 | } else if (vcpu->arch.interrupt.pending) { | ||
| 7790 | nr = vcpu->arch.interrupt.nr; | ||
| 7791 | idt_vectoring = nr | VECTORING_INFO_VALID_MASK; | ||
| 7792 | |||
| 7793 | if (vcpu->arch.interrupt.soft) { | ||
| 7794 | idt_vectoring |= INTR_TYPE_SOFT_INTR; | ||
| 7795 | vmcs12->vm_entry_instruction_len = | ||
| 7796 | vcpu->arch.event_exit_inst_len; | ||
| 7797 | } else | ||
| 7798 | idt_vectoring |= INTR_TYPE_EXT_INTR; | ||
| 7799 | |||
| 7800 | vmcs12->idt_vectoring_info_field = idt_vectoring; | ||
| 7801 | } | ||
| 7802 | } | ||
| 7803 | |||
| 7276 | /* | 7804 | /* |
| 7277 | * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits | 7805 | * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits |
| 7278 | * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), | 7806 | * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), |
| @@ -7284,7 +7812,7 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
| 7284 | * exit-information fields only. Other fields are modified by L1 with VMWRITE, | 7812 | * exit-information fields only. Other fields are modified by L1 with VMWRITE, |
| 7285 | * which already writes to vmcs12 directly. | 7813 | * which already writes to vmcs12 directly. |
| 7286 | */ | 7814 | */ |
| 7287 | void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | 7815 | static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) |
| 7288 | { | 7816 | { |
| 7289 | /* update guest state fields: */ | 7817 | /* update guest state fields: */ |
| 7290 | vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); | 7818 | vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); |
| @@ -7332,16 +7860,19 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
| 7332 | vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE); | 7860 | vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE); |
| 7333 | vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE); | 7861 | vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE); |
| 7334 | 7862 | ||
| 7335 | vmcs12->guest_activity_state = vmcs_read32(GUEST_ACTIVITY_STATE); | ||
| 7336 | vmcs12->guest_interruptibility_info = | 7863 | vmcs12->guest_interruptibility_info = |
| 7337 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | 7864 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); |
| 7338 | vmcs12->guest_pending_dbg_exceptions = | 7865 | vmcs12->guest_pending_dbg_exceptions = |
| 7339 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); | 7866 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); |
| 7340 | 7867 | ||
| 7868 | vmcs12->vm_entry_controls = | ||
| 7869 | (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | | ||
| 7870 | (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE); | ||
| 7871 | |||
| 7341 | /* TODO: These cannot have changed unless we have MSR bitmaps and | 7872 | /* TODO: These cannot have changed unless we have MSR bitmaps and |
| 7342 | * the relevant bit asks not to trap the change */ | 7873 | * the relevant bit asks not to trap the change */ |
| 7343 | vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); | 7874 | vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); |
| 7344 | if (vmcs12->vm_entry_controls & VM_EXIT_SAVE_IA32_PAT) | 7875 | if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) |
| 7345 | vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); | 7876 | vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); |
| 7346 | vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); | 7877 | vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); |
| 7347 | vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); | 7878 | vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); |
| @@ -7349,21 +7880,38 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
| 7349 | 7880 | ||
| 7350 | /* update exit information fields: */ | 7881 | /* update exit information fields: */ |
| 7351 | 7882 | ||
| 7352 | vmcs12->vm_exit_reason = vmcs_read32(VM_EXIT_REASON); | 7883 | vmcs12->vm_exit_reason = to_vmx(vcpu)->exit_reason; |
| 7353 | vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 7884 | vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
| 7354 | 7885 | ||
| 7355 | vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 7886 | vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
| 7356 | vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); | 7887 | if ((vmcs12->vm_exit_intr_info & |
| 7357 | vmcs12->idt_vectoring_info_field = | 7888 | (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == |
| 7358 | vmcs_read32(IDT_VECTORING_INFO_FIELD); | 7889 | (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) |
| 7359 | vmcs12->idt_vectoring_error_code = | 7890 | vmcs12->vm_exit_intr_error_code = |
| 7360 | vmcs_read32(IDT_VECTORING_ERROR_CODE); | 7891 | vmcs_read32(VM_EXIT_INTR_ERROR_CODE); |
| 7892 | vmcs12->idt_vectoring_info_field = 0; | ||
| 7361 | vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | 7893 | vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); |
| 7362 | vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); | 7894 | vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); |
| 7363 | 7895 | ||
| 7364 | /* clear vm-entry fields which are to be cleared on exit */ | 7896 | if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) { |
| 7365 | if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) | 7897 | /* vm_entry_intr_info_field is cleared on exit. Emulate this |
| 7898 | * instead of reading the real value. */ | ||
| 7366 | vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK; | 7899 | vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK; |
| 7900 | |||
| 7901 | /* | ||
| 7902 | * Transfer the event that L0 or L1 may wanted to inject into | ||
| 7903 | * L2 to IDT_VECTORING_INFO_FIELD. | ||
| 7904 | */ | ||
| 7905 | vmcs12_save_pending_event(vcpu, vmcs12); | ||
| 7906 | } | ||
| 7907 | |||
| 7908 | /* | ||
| 7909 | * Drop what we picked up for L2 via vmx_complete_interrupts. It is | ||
| 7910 | * preserved above and would only end up incorrectly in L1. | ||
| 7911 | */ | ||
| 7912 | vcpu->arch.nmi_injected = false; | ||
| 7913 | kvm_clear_exception_queue(vcpu); | ||
| 7914 | kvm_clear_interrupt_queue(vcpu); | ||
| 7367 | } | 7915 | } |
| 7368 | 7916 | ||
| 7369 | /* | 7917 | /* |
| @@ -7375,11 +7923,12 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
| 7375 | * Failures During or After Loading Guest State"). | 7923 | * Failures During or After Loading Guest State"). |
| 7376 | * This function should be called when the active VMCS is L1's (vmcs01). | 7924 | * This function should be called when the active VMCS is L1's (vmcs01). |
| 7377 | */ | 7925 | */ |
| 7378 | void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | 7926 | static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, |
| 7927 | struct vmcs12 *vmcs12) | ||
| 7379 | { | 7928 | { |
| 7380 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) | 7929 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) |
| 7381 | vcpu->arch.efer = vmcs12->host_ia32_efer; | 7930 | vcpu->arch.efer = vmcs12->host_ia32_efer; |
| 7382 | if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) | 7931 | else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) |
| 7383 | vcpu->arch.efer |= (EFER_LMA | EFER_LME); | 7932 | vcpu->arch.efer |= (EFER_LMA | EFER_LME); |
| 7384 | else | 7933 | else |
| 7385 | vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); | 7934 | vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); |
| @@ -7387,6 +7936,7 @@ void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
| 7387 | 7936 | ||
| 7388 | kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp); | 7937 | kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp); |
| 7389 | kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip); | 7938 | kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip); |
| 7939 | vmx_set_rflags(vcpu, X86_EFLAGS_BIT1); | ||
| 7390 | /* | 7940 | /* |
| 7391 | * Note that calling vmx_set_cr0 is important, even if cr0 hasn't | 7941 | * Note that calling vmx_set_cr0 is important, even if cr0 hasn't |
| 7392 | * actually changed, because it depends on the current state of | 7942 | * actually changed, because it depends on the current state of |
| @@ -7445,6 +7995,9 @@ void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
| 7445 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) | 7995 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) |
| 7446 | vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, | 7996 | vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, |
| 7447 | vmcs12->host_ia32_perf_global_ctrl); | 7997 | vmcs12->host_ia32_perf_global_ctrl); |
| 7998 | |||
| 7999 | kvm_set_dr(vcpu, 7, 0x400); | ||
| 8000 | vmcs_write64(GUEST_IA32_DEBUGCTL, 0); | ||
| 7448 | } | 8001 | } |
| 7449 | 8002 | ||
| 7450 | /* | 8003 | /* |
| @@ -7458,6 +8011,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | |||
| 7458 | int cpu; | 8011 | int cpu; |
| 7459 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 8012 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
| 7460 | 8013 | ||
| 8014 | /* trying to cancel vmlaunch/vmresume is a bug */ | ||
| 8015 | WARN_ON_ONCE(vmx->nested.nested_run_pending); | ||
| 8016 | |||
| 7461 | leave_guest_mode(vcpu); | 8017 | leave_guest_mode(vcpu); |
| 7462 | prepare_vmcs12(vcpu, vmcs12); | 8018 | prepare_vmcs12(vcpu, vmcs12); |
| 7463 | 8019 | ||
| @@ -7468,6 +8024,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | |||
| 7468 | vcpu->cpu = cpu; | 8024 | vcpu->cpu = cpu; |
| 7469 | put_cpu(); | 8025 | put_cpu(); |
| 7470 | 8026 | ||
| 8027 | vmx_segment_cache_clear(vmx); | ||
| 8028 | |||
| 7471 | /* if no vmcs02 cache requested, remove the one we used */ | 8029 | /* if no vmcs02 cache requested, remove the one we used */ |
| 7472 | if (VMCS02_POOL_SIZE == 0) | 8030 | if (VMCS02_POOL_SIZE == 0) |
| 7473 | nested_free_vmcs02(vmx, vmx->nested.current_vmptr); | 8031 | nested_free_vmcs02(vmx, vmx->nested.current_vmptr); |
| @@ -7496,6 +8054,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | |||
| 7496 | nested_vmx_failValid(vcpu, vmcs_read32(VM_INSTRUCTION_ERROR)); | 8054 | nested_vmx_failValid(vcpu, vmcs_read32(VM_INSTRUCTION_ERROR)); |
| 7497 | } else | 8055 | } else |
| 7498 | nested_vmx_succeed(vcpu); | 8056 | nested_vmx_succeed(vcpu); |
| 8057 | if (enable_shadow_vmcs) | ||
| 8058 | vmx->nested.sync_shadow_vmcs = true; | ||
| 7499 | } | 8059 | } |
| 7500 | 8060 | ||
| 7501 | /* | 8061 | /* |
| @@ -7513,6 +8073,8 @@ static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, | |||
| 7513 | vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY; | 8073 | vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY; |
| 7514 | vmcs12->exit_qualification = qualification; | 8074 | vmcs12->exit_qualification = qualification; |
| 7515 | nested_vmx_succeed(vcpu); | 8075 | nested_vmx_succeed(vcpu); |
| 8076 | if (enable_shadow_vmcs) | ||
| 8077 | to_vmx(vcpu)->nested.sync_shadow_vmcs = true; | ||
| 7516 | } | 8078 | } |
| 7517 | 8079 | ||
| 7518 | static int vmx_check_intercept(struct kvm_vcpu *vcpu, | 8080 | static int vmx_check_intercept(struct kvm_vcpu *vcpu, |
| @@ -7590,6 +8152,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
| 7590 | .load_eoi_exitmap = vmx_load_eoi_exitmap, | 8152 | .load_eoi_exitmap = vmx_load_eoi_exitmap, |
| 7591 | .hwapic_irr_update = vmx_hwapic_irr_update, | 8153 | .hwapic_irr_update = vmx_hwapic_irr_update, |
| 7592 | .hwapic_isr_update = vmx_hwapic_isr_update, | 8154 | .hwapic_isr_update = vmx_hwapic_isr_update, |
| 8155 | .sync_pir_to_irr = vmx_sync_pir_to_irr, | ||
| 8156 | .deliver_posted_interrupt = vmx_deliver_posted_interrupt, | ||
| 7593 | 8157 | ||
| 7594 | .set_tss_addr = vmx_set_tss_addr, | 8158 | .set_tss_addr = vmx_set_tss_addr, |
| 7595 | .get_tdp_level = get_ept_level, | 8159 | .get_tdp_level = get_ept_level, |
| @@ -7618,6 +8182,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
| 7618 | .set_tdp_cr3 = vmx_set_cr3, | 8182 | .set_tdp_cr3 = vmx_set_cr3, |
| 7619 | 8183 | ||
| 7620 | .check_intercept = vmx_check_intercept, | 8184 | .check_intercept = vmx_check_intercept, |
| 8185 | .handle_external_intr = vmx_handle_external_intr, | ||
| 7621 | }; | 8186 | }; |
| 7622 | 8187 | ||
| 7623 | static int __init vmx_init(void) | 8188 | static int __init vmx_init(void) |
| @@ -7656,6 +8221,24 @@ static int __init vmx_init(void) | |||
| 7656 | (unsigned long *)__get_free_page(GFP_KERNEL); | 8221 | (unsigned long *)__get_free_page(GFP_KERNEL); |
| 7657 | if (!vmx_msr_bitmap_longmode_x2apic) | 8222 | if (!vmx_msr_bitmap_longmode_x2apic) |
| 7658 | goto out4; | 8223 | goto out4; |
| 8224 | vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
| 8225 | if (!vmx_vmread_bitmap) | ||
| 8226 | goto out5; | ||
| 8227 | |||
| 8228 | vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
| 8229 | if (!vmx_vmwrite_bitmap) | ||
| 8230 | goto out6; | ||
| 8231 | |||
| 8232 | memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); | ||
| 8233 | memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); | ||
| 8234 | /* shadowed read/write fields */ | ||
| 8235 | for (i = 0; i < max_shadow_read_write_fields; i++) { | ||
| 8236 | clear_bit(shadow_read_write_fields[i], vmx_vmwrite_bitmap); | ||
| 8237 | clear_bit(shadow_read_write_fields[i], vmx_vmread_bitmap); | ||
| 8238 | } | ||
| 8239 | /* shadowed read only fields */ | ||
| 8240 | for (i = 0; i < max_shadow_read_only_fields; i++) | ||
| 8241 | clear_bit(shadow_read_only_fields[i], vmx_vmread_bitmap); | ||
| 7659 | 8242 | ||
| 7660 | /* | 8243 | /* |
| 7661 | * Allow direct access to the PC debug port (it is often used for I/O | 8244 | * Allow direct access to the PC debug port (it is often used for I/O |
| @@ -7674,7 +8257,7 @@ static int __init vmx_init(void) | |||
| 7674 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), | 8257 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), |
| 7675 | __alignof__(struct vcpu_vmx), THIS_MODULE); | 8258 | __alignof__(struct vcpu_vmx), THIS_MODULE); |
| 7676 | if (r) | 8259 | if (r) |
| 7677 | goto out3; | 8260 | goto out7; |
| 7678 | 8261 | ||
| 7679 | #ifdef CONFIG_KEXEC | 8262 | #ifdef CONFIG_KEXEC |
| 7680 | rcu_assign_pointer(crash_vmclear_loaded_vmcss, | 8263 | rcu_assign_pointer(crash_vmclear_loaded_vmcss, |
| @@ -7692,7 +8275,7 @@ static int __init vmx_init(void) | |||
| 7692 | memcpy(vmx_msr_bitmap_longmode_x2apic, | 8275 | memcpy(vmx_msr_bitmap_longmode_x2apic, |
| 7693 | vmx_msr_bitmap_longmode, PAGE_SIZE); | 8276 | vmx_msr_bitmap_longmode, PAGE_SIZE); |
| 7694 | 8277 | ||
| 7695 | if (enable_apicv_reg_vid) { | 8278 | if (enable_apicv) { |
| 7696 | for (msr = 0x800; msr <= 0x8ff; msr++) | 8279 | for (msr = 0x800; msr <= 0x8ff; msr++) |
| 7697 | vmx_disable_intercept_msr_read_x2apic(msr); | 8280 | vmx_disable_intercept_msr_read_x2apic(msr); |
| 7698 | 8281 | ||
| @@ -7722,6 +8305,12 @@ static int __init vmx_init(void) | |||
| 7722 | 8305 | ||
| 7723 | return 0; | 8306 | return 0; |
| 7724 | 8307 | ||
| 8308 | out7: | ||
| 8309 | free_page((unsigned long)vmx_vmwrite_bitmap); | ||
| 8310 | out6: | ||
| 8311 | free_page((unsigned long)vmx_vmread_bitmap); | ||
| 8312 | out5: | ||
| 8313 | free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); | ||
| 7725 | out4: | 8314 | out4: |
| 7726 | free_page((unsigned long)vmx_msr_bitmap_longmode); | 8315 | free_page((unsigned long)vmx_msr_bitmap_longmode); |
| 7727 | out3: | 8316 | out3: |
| @@ -7743,6 +8332,8 @@ static void __exit vmx_exit(void) | |||
| 7743 | free_page((unsigned long)vmx_msr_bitmap_longmode); | 8332 | free_page((unsigned long)vmx_msr_bitmap_longmode); |
| 7744 | free_page((unsigned long)vmx_io_bitmap_b); | 8333 | free_page((unsigned long)vmx_io_bitmap_b); |
| 7745 | free_page((unsigned long)vmx_io_bitmap_a); | 8334 | free_page((unsigned long)vmx_io_bitmap_a); |
| 8335 | free_page((unsigned long)vmx_vmwrite_bitmap); | ||
| 8336 | free_page((unsigned long)vmx_vmread_bitmap); | ||
| 7746 | 8337 | ||
| 7747 | #ifdef CONFIG_KEXEC | 8338 | #ifdef CONFIG_KEXEC |
| 7748 | rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL); | 8339 | rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL); |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e1721324c271..05a8b1a2300d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -162,8 +162,6 @@ u64 __read_mostly host_xcr0; | |||
| 162 | 162 | ||
| 163 | static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); | 163 | static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); |
| 164 | 164 | ||
| 165 | static int kvm_vcpu_reset(struct kvm_vcpu *vcpu); | ||
| 166 | |||
| 167 | static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) | 165 | static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) |
| 168 | { | 166 | { |
| 169 | int i; | 167 | int i; |
| @@ -263,6 +261,13 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) | |||
| 263 | } | 261 | } |
| 264 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); | 262 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); |
| 265 | 263 | ||
| 264 | asmlinkage void kvm_spurious_fault(void) | ||
| 265 | { | ||
| 266 | /* Fault while not rebooting. We want the trace. */ | ||
| 267 | BUG(); | ||
| 268 | } | ||
| 269 | EXPORT_SYMBOL_GPL(kvm_spurious_fault); | ||
| 270 | |||
| 266 | #define EXCPT_BENIGN 0 | 271 | #define EXCPT_BENIGN 0 |
| 267 | #define EXCPT_CONTRIBUTORY 1 | 272 | #define EXCPT_CONTRIBUTORY 1 |
| 268 | #define EXCPT_PF 2 | 273 | #define EXCPT_PF 2 |
| @@ -840,23 +845,17 @@ static const u32 emulated_msrs[] = { | |||
| 840 | MSR_IA32_MCG_CTL, | 845 | MSR_IA32_MCG_CTL, |
| 841 | }; | 846 | }; |
| 842 | 847 | ||
| 843 | static int set_efer(struct kvm_vcpu *vcpu, u64 efer) | 848 | bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) |
| 844 | { | 849 | { |
| 845 | u64 old_efer = vcpu->arch.efer; | ||
| 846 | |||
| 847 | if (efer & efer_reserved_bits) | 850 | if (efer & efer_reserved_bits) |
| 848 | return 1; | 851 | return false; |
| 849 | |||
| 850 | if (is_paging(vcpu) | ||
| 851 | && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) | ||
| 852 | return 1; | ||
| 853 | 852 | ||
| 854 | if (efer & EFER_FFXSR) { | 853 | if (efer & EFER_FFXSR) { |
| 855 | struct kvm_cpuid_entry2 *feat; | 854 | struct kvm_cpuid_entry2 *feat; |
| 856 | 855 | ||
| 857 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 856 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); |
| 858 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) | 857 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) |
| 859 | return 1; | 858 | return false; |
| 860 | } | 859 | } |
| 861 | 860 | ||
| 862 | if (efer & EFER_SVME) { | 861 | if (efer & EFER_SVME) { |
| @@ -864,9 +863,24 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
| 864 | 863 | ||
| 865 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 864 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); |
| 866 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) | 865 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) |
| 867 | return 1; | 866 | return false; |
| 868 | } | 867 | } |
| 869 | 868 | ||
| 869 | return true; | ||
| 870 | } | ||
| 871 | EXPORT_SYMBOL_GPL(kvm_valid_efer); | ||
| 872 | |||
| 873 | static int set_efer(struct kvm_vcpu *vcpu, u64 efer) | ||
| 874 | { | ||
| 875 | u64 old_efer = vcpu->arch.efer; | ||
| 876 | |||
| 877 | if (!kvm_valid_efer(vcpu, efer)) | ||
| 878 | return 1; | ||
| 879 | |||
| 880 | if (is_paging(vcpu) | ||
| 881 | && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) | ||
| 882 | return 1; | ||
| 883 | |||
| 870 | efer &= ~EFER_LMA; | 884 | efer &= ~EFER_LMA; |
| 871 | efer |= vcpu->arch.efer & EFER_LMA; | 885 | efer |= vcpu->arch.efer & EFER_LMA; |
| 872 | 886 | ||
| @@ -1079,6 +1093,10 @@ static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) | |||
| 1079 | u32 thresh_lo, thresh_hi; | 1093 | u32 thresh_lo, thresh_hi; |
| 1080 | int use_scaling = 0; | 1094 | int use_scaling = 0; |
| 1081 | 1095 | ||
| 1096 | /* tsc_khz can be zero if TSC calibration fails */ | ||
| 1097 | if (this_tsc_khz == 0) | ||
| 1098 | return; | ||
| 1099 | |||
| 1082 | /* Compute a scale to convert nanoseconds in TSC cycles */ | 1100 | /* Compute a scale to convert nanoseconds in TSC cycles */ |
| 1083 | kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, | 1101 | kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, |
| 1084 | &vcpu->arch.virtual_tsc_shift, | 1102 | &vcpu->arch.virtual_tsc_shift, |
| @@ -1156,20 +1174,23 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
| 1156 | ns = get_kernel_ns(); | 1174 | ns = get_kernel_ns(); |
| 1157 | elapsed = ns - kvm->arch.last_tsc_nsec; | 1175 | elapsed = ns - kvm->arch.last_tsc_nsec; |
| 1158 | 1176 | ||
| 1159 | /* n.b - signed multiplication and division required */ | 1177 | if (vcpu->arch.virtual_tsc_khz) { |
| 1160 | usdiff = data - kvm->arch.last_tsc_write; | 1178 | /* n.b - signed multiplication and division required */ |
| 1179 | usdiff = data - kvm->arch.last_tsc_write; | ||
| 1161 | #ifdef CONFIG_X86_64 | 1180 | #ifdef CONFIG_X86_64 |
| 1162 | usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz; | 1181 | usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz; |
| 1163 | #else | 1182 | #else |
| 1164 | /* do_div() only does unsigned */ | 1183 | /* do_div() only does unsigned */ |
| 1165 | asm("idivl %2; xor %%edx, %%edx" | 1184 | asm("idivl %2; xor %%edx, %%edx" |
| 1166 | : "=A"(usdiff) | 1185 | : "=A"(usdiff) |
| 1167 | : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz)); | 1186 | : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz)); |
| 1168 | #endif | 1187 | #endif |
| 1169 | do_div(elapsed, 1000); | 1188 | do_div(elapsed, 1000); |
| 1170 | usdiff -= elapsed; | 1189 | usdiff -= elapsed; |
| 1171 | if (usdiff < 0) | 1190 | if (usdiff < 0) |
| 1172 | usdiff = -usdiff; | 1191 | usdiff = -usdiff; |
| 1192 | } else | ||
| 1193 | usdiff = USEC_PER_SEC; /* disable TSC match window below */ | ||
| 1173 | 1194 | ||
| 1174 | /* | 1195 | /* |
| 1175 | * Special case: TSC write with a small delta (1 second) of virtual | 1196 | * Special case: TSC write with a small delta (1 second) of virtual |
| @@ -2034,7 +2055,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 2034 | case MSR_P6_EVNTSEL0: | 2055 | case MSR_P6_EVNTSEL0: |
| 2035 | case MSR_P6_EVNTSEL1: | 2056 | case MSR_P6_EVNTSEL1: |
| 2036 | if (kvm_pmu_msr(vcpu, msr)) | 2057 | if (kvm_pmu_msr(vcpu, msr)) |
| 2037 | return kvm_pmu_set_msr(vcpu, msr, data); | 2058 | return kvm_pmu_set_msr(vcpu, msr_info); |
| 2038 | 2059 | ||
| 2039 | if (pr || data != 0) | 2060 | if (pr || data != 0) |
| 2040 | vcpu_unimpl(vcpu, "disabled perfctr wrmsr: " | 2061 | vcpu_unimpl(vcpu, "disabled perfctr wrmsr: " |
| @@ -2080,7 +2101,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 2080 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) | 2101 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) |
| 2081 | return xen_hvm_config(vcpu, data); | 2102 | return xen_hvm_config(vcpu, data); |
| 2082 | if (kvm_pmu_msr(vcpu, msr)) | 2103 | if (kvm_pmu_msr(vcpu, msr)) |
| 2083 | return kvm_pmu_set_msr(vcpu, msr, data); | 2104 | return kvm_pmu_set_msr(vcpu, msr_info); |
| 2084 | if (!ignore_msrs) { | 2105 | if (!ignore_msrs) { |
| 2085 | vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", | 2106 | vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", |
| 2086 | msr, data); | 2107 | msr, data); |
| @@ -2479,7 +2500,6 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 2479 | case KVM_CAP_USER_NMI: | 2500 | case KVM_CAP_USER_NMI: |
| 2480 | case KVM_CAP_REINJECT_CONTROL: | 2501 | case KVM_CAP_REINJECT_CONTROL: |
| 2481 | case KVM_CAP_IRQ_INJECT_STATUS: | 2502 | case KVM_CAP_IRQ_INJECT_STATUS: |
| 2482 | case KVM_CAP_ASSIGN_DEV_IRQ: | ||
| 2483 | case KVM_CAP_IRQFD: | 2503 | case KVM_CAP_IRQFD: |
| 2484 | case KVM_CAP_IOEVENTFD: | 2504 | case KVM_CAP_IOEVENTFD: |
| 2485 | case KVM_CAP_PIT2: | 2505 | case KVM_CAP_PIT2: |
| @@ -2497,10 +2517,12 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 2497 | case KVM_CAP_XSAVE: | 2517 | case KVM_CAP_XSAVE: |
| 2498 | case KVM_CAP_ASYNC_PF: | 2518 | case KVM_CAP_ASYNC_PF: |
| 2499 | case KVM_CAP_GET_TSC_KHZ: | 2519 | case KVM_CAP_GET_TSC_KHZ: |
| 2500 | case KVM_CAP_PCI_2_3: | ||
| 2501 | case KVM_CAP_KVMCLOCK_CTRL: | 2520 | case KVM_CAP_KVMCLOCK_CTRL: |
| 2502 | case KVM_CAP_READONLY_MEM: | 2521 | case KVM_CAP_READONLY_MEM: |
| 2503 | case KVM_CAP_IRQFD_RESAMPLE: | 2522 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT |
| 2523 | case KVM_CAP_ASSIGN_DEV_IRQ: | ||
| 2524 | case KVM_CAP_PCI_2_3: | ||
| 2525 | #endif | ||
| 2504 | r = 1; | 2526 | r = 1; |
| 2505 | break; | 2527 | break; |
| 2506 | case KVM_CAP_COALESCED_MMIO: | 2528 | case KVM_CAP_COALESCED_MMIO: |
| @@ -2521,9 +2543,11 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 2521 | case KVM_CAP_PV_MMU: /* obsolete */ | 2543 | case KVM_CAP_PV_MMU: /* obsolete */ |
| 2522 | r = 0; | 2544 | r = 0; |
| 2523 | break; | 2545 | break; |
| 2546 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT | ||
| 2524 | case KVM_CAP_IOMMU: | 2547 | case KVM_CAP_IOMMU: |
| 2525 | r = iommu_present(&pci_bus_type); | 2548 | r = iommu_present(&pci_bus_type); |
| 2526 | break; | 2549 | break; |
| 2550 | #endif | ||
| 2527 | case KVM_CAP_MCE: | 2551 | case KVM_CAP_MCE: |
| 2528 | r = KVM_MAX_MCE_BANKS; | 2552 | r = KVM_MAX_MCE_BANKS; |
| 2529 | break; | 2553 | break; |
| @@ -2679,6 +2703,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | |||
| 2679 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, | 2703 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, |
| 2680 | struct kvm_lapic_state *s) | 2704 | struct kvm_lapic_state *s) |
| 2681 | { | 2705 | { |
| 2706 | kvm_x86_ops->sync_pir_to_irr(vcpu); | ||
| 2682 | memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); | 2707 | memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); |
| 2683 | 2708 | ||
| 2684 | return 0; | 2709 | return 0; |
| @@ -2696,7 +2721,7 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, | |||
| 2696 | static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, | 2721 | static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, |
| 2697 | struct kvm_interrupt *irq) | 2722 | struct kvm_interrupt *irq) |
| 2698 | { | 2723 | { |
| 2699 | if (irq->irq < 0 || irq->irq >= KVM_NR_INTERRUPTS) | 2724 | if (irq->irq >= KVM_NR_INTERRUPTS) |
| 2700 | return -EINVAL; | 2725 | return -EINVAL; |
| 2701 | if (irqchip_in_kernel(vcpu->kvm)) | 2726 | if (irqchip_in_kernel(vcpu->kvm)) |
| 2702 | return -ENXIO; | 2727 | return -ENXIO; |
| @@ -2819,10 +2844,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
| 2819 | events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); | 2844 | events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); |
| 2820 | events->nmi.pad = 0; | 2845 | events->nmi.pad = 0; |
| 2821 | 2846 | ||
| 2822 | events->sipi_vector = vcpu->arch.sipi_vector; | 2847 | events->sipi_vector = 0; /* never valid when reporting to user space */ |
| 2823 | 2848 | ||
| 2824 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING | 2849 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING |
| 2825 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR | ||
| 2826 | | KVM_VCPUEVENT_VALID_SHADOW); | 2850 | | KVM_VCPUEVENT_VALID_SHADOW); |
| 2827 | memset(&events->reserved, 0, sizeof(events->reserved)); | 2851 | memset(&events->reserved, 0, sizeof(events->reserved)); |
| 2828 | } | 2852 | } |
| @@ -2853,8 +2877,9 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
| 2853 | vcpu->arch.nmi_pending = events->nmi.pending; | 2877 | vcpu->arch.nmi_pending = events->nmi.pending; |
| 2854 | kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); | 2878 | kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); |
| 2855 | 2879 | ||
| 2856 | if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) | 2880 | if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR && |
| 2857 | vcpu->arch.sipi_vector = events->sipi_vector; | 2881 | kvm_vcpu_has_lapic(vcpu)) |
| 2882 | vcpu->arch.apic->sipi_vector = events->sipi_vector; | ||
| 2858 | 2883 | ||
| 2859 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 2884 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
| 2860 | 2885 | ||
| @@ -3478,13 +3503,15 @@ out: | |||
| 3478 | return r; | 3503 | return r; |
| 3479 | } | 3504 | } |
| 3480 | 3505 | ||
| 3481 | int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event) | 3506 | int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, |
| 3507 | bool line_status) | ||
| 3482 | { | 3508 | { |
| 3483 | if (!irqchip_in_kernel(kvm)) | 3509 | if (!irqchip_in_kernel(kvm)) |
| 3484 | return -ENXIO; | 3510 | return -ENXIO; |
| 3485 | 3511 | ||
| 3486 | irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, | 3512 | irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, |
| 3487 | irq_event->irq, irq_event->level); | 3513 | irq_event->irq, irq_event->level, |
| 3514 | line_status); | ||
| 3488 | return 0; | 3515 | return 0; |
| 3489 | } | 3516 | } |
| 3490 | 3517 | ||
| @@ -4752,11 +4779,15 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) | |||
| 4752 | } | 4779 | } |
| 4753 | 4780 | ||
| 4754 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, | 4781 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, |
| 4755 | bool write_fault_to_shadow_pgtable) | 4782 | bool write_fault_to_shadow_pgtable, |
| 4783 | int emulation_type) | ||
| 4756 | { | 4784 | { |
| 4757 | gpa_t gpa = cr2; | 4785 | gpa_t gpa = cr2; |
| 4758 | pfn_t pfn; | 4786 | pfn_t pfn; |
| 4759 | 4787 | ||
| 4788 | if (emulation_type & EMULTYPE_NO_REEXECUTE) | ||
| 4789 | return false; | ||
| 4790 | |||
| 4760 | if (!vcpu->arch.mmu.direct_map) { | 4791 | if (!vcpu->arch.mmu.direct_map) { |
| 4761 | /* | 4792 | /* |
| 4762 | * Write permission should be allowed since only | 4793 | * Write permission should be allowed since only |
| @@ -4899,8 +4930,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 4899 | if (r != EMULATION_OK) { | 4930 | if (r != EMULATION_OK) { |
| 4900 | if (emulation_type & EMULTYPE_TRAP_UD) | 4931 | if (emulation_type & EMULTYPE_TRAP_UD) |
| 4901 | return EMULATE_FAIL; | 4932 | return EMULATE_FAIL; |
| 4902 | if (reexecute_instruction(vcpu, cr2, | 4933 | if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, |
| 4903 | write_fault_to_spt)) | 4934 | emulation_type)) |
| 4904 | return EMULATE_DONE; | 4935 | return EMULATE_DONE; |
| 4905 | if (emulation_type & EMULTYPE_SKIP) | 4936 | if (emulation_type & EMULTYPE_SKIP) |
| 4906 | return EMULATE_FAIL; | 4937 | return EMULATE_FAIL; |
| @@ -4930,7 +4961,8 @@ restart: | |||
| 4930 | return EMULATE_DONE; | 4961 | return EMULATE_DONE; |
| 4931 | 4962 | ||
| 4932 | if (r == EMULATION_FAILED) { | 4963 | if (r == EMULATION_FAILED) { |
| 4933 | if (reexecute_instruction(vcpu, cr2, write_fault_to_spt)) | 4964 | if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, |
| 4965 | emulation_type)) | ||
| 4934 | return EMULATE_DONE; | 4966 | return EMULATE_DONE; |
| 4935 | 4967 | ||
| 4936 | return handle_emulation_failure(vcpu); | 4968 | return handle_emulation_failure(vcpu); |
| @@ -5641,14 +5673,20 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) | |||
| 5641 | #endif | 5673 | #endif |
| 5642 | } | 5674 | } |
| 5643 | 5675 | ||
| 5644 | static void update_eoi_exitmap(struct kvm_vcpu *vcpu) | 5676 | static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) |
| 5645 | { | 5677 | { |
| 5646 | u64 eoi_exit_bitmap[4]; | 5678 | u64 eoi_exit_bitmap[4]; |
| 5679 | u32 tmr[8]; | ||
| 5680 | |||
| 5681 | if (!kvm_apic_hw_enabled(vcpu->arch.apic)) | ||
| 5682 | return; | ||
| 5647 | 5683 | ||
| 5648 | memset(eoi_exit_bitmap, 0, 32); | 5684 | memset(eoi_exit_bitmap, 0, 32); |
| 5685 | memset(tmr, 0, 32); | ||
| 5649 | 5686 | ||
| 5650 | kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap); | 5687 | kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr); |
| 5651 | kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); | 5688 | kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); |
| 5689 | kvm_apic_update_tmr(vcpu, tmr); | ||
| 5652 | } | 5690 | } |
| 5653 | 5691 | ||
| 5654 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | 5692 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
| @@ -5656,7 +5694,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
| 5656 | int r; | 5694 | int r; |
| 5657 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && | 5695 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && |
| 5658 | vcpu->run->request_interrupt_window; | 5696 | vcpu->run->request_interrupt_window; |
| 5659 | bool req_immediate_exit = 0; | 5697 | bool req_immediate_exit = false; |
| 5660 | 5698 | ||
| 5661 | if (vcpu->requests) { | 5699 | if (vcpu->requests) { |
| 5662 | if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) | 5700 | if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) |
| @@ -5698,24 +5736,30 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
| 5698 | record_steal_time(vcpu); | 5736 | record_steal_time(vcpu); |
| 5699 | if (kvm_check_request(KVM_REQ_NMI, vcpu)) | 5737 | if (kvm_check_request(KVM_REQ_NMI, vcpu)) |
| 5700 | process_nmi(vcpu); | 5738 | process_nmi(vcpu); |
| 5701 | req_immediate_exit = | ||
| 5702 | kvm_check_request(KVM_REQ_IMMEDIATE_EXIT, vcpu); | ||
| 5703 | if (kvm_check_request(KVM_REQ_PMU, vcpu)) | 5739 | if (kvm_check_request(KVM_REQ_PMU, vcpu)) |
| 5704 | kvm_handle_pmu_event(vcpu); | 5740 | kvm_handle_pmu_event(vcpu); |
| 5705 | if (kvm_check_request(KVM_REQ_PMI, vcpu)) | 5741 | if (kvm_check_request(KVM_REQ_PMI, vcpu)) |
| 5706 | kvm_deliver_pmi(vcpu); | 5742 | kvm_deliver_pmi(vcpu); |
| 5707 | if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu)) | 5743 | if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) |
| 5708 | update_eoi_exitmap(vcpu); | 5744 | vcpu_scan_ioapic(vcpu); |
| 5709 | } | 5745 | } |
| 5710 | 5746 | ||
| 5711 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { | 5747 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { |
| 5748 | kvm_apic_accept_events(vcpu); | ||
| 5749 | if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { | ||
| 5750 | r = 1; | ||
| 5751 | goto out; | ||
| 5752 | } | ||
| 5753 | |||
| 5712 | inject_pending_event(vcpu); | 5754 | inject_pending_event(vcpu); |
| 5713 | 5755 | ||
| 5714 | /* enable NMI/IRQ window open exits if needed */ | 5756 | /* enable NMI/IRQ window open exits if needed */ |
| 5715 | if (vcpu->arch.nmi_pending) | 5757 | if (vcpu->arch.nmi_pending) |
| 5716 | kvm_x86_ops->enable_nmi_window(vcpu); | 5758 | req_immediate_exit = |
| 5759 | kvm_x86_ops->enable_nmi_window(vcpu) != 0; | ||
| 5717 | else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) | 5760 | else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) |
| 5718 | kvm_x86_ops->enable_irq_window(vcpu); | 5761 | req_immediate_exit = |
| 5762 | kvm_x86_ops->enable_irq_window(vcpu) != 0; | ||
| 5719 | 5763 | ||
| 5720 | if (kvm_lapic_enabled(vcpu)) { | 5764 | if (kvm_lapic_enabled(vcpu)) { |
| 5721 | /* | 5765 | /* |
| @@ -5794,7 +5838,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
| 5794 | 5838 | ||
| 5795 | vcpu->mode = OUTSIDE_GUEST_MODE; | 5839 | vcpu->mode = OUTSIDE_GUEST_MODE; |
| 5796 | smp_wmb(); | 5840 | smp_wmb(); |
| 5797 | local_irq_enable(); | 5841 | |
| 5842 | /* Interrupt is enabled by handle_external_intr() */ | ||
| 5843 | kvm_x86_ops->handle_external_intr(vcpu); | ||
| 5798 | 5844 | ||
| 5799 | ++vcpu->stat.exits; | 5845 | ++vcpu->stat.exits; |
| 5800 | 5846 | ||
| @@ -5843,16 +5889,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
| 5843 | int r; | 5889 | int r; |
| 5844 | struct kvm *kvm = vcpu->kvm; | 5890 | struct kvm *kvm = vcpu->kvm; |
| 5845 | 5891 | ||
| 5846 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { | ||
| 5847 | pr_debug("vcpu %d received sipi with vector # %x\n", | ||
| 5848 | vcpu->vcpu_id, vcpu->arch.sipi_vector); | ||
| 5849 | kvm_lapic_reset(vcpu); | ||
| 5850 | r = kvm_vcpu_reset(vcpu); | ||
| 5851 | if (r) | ||
| 5852 | return r; | ||
| 5853 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
| 5854 | } | ||
| 5855 | |||
| 5856 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); | 5892 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
| 5857 | r = vapic_enter(vcpu); | 5893 | r = vapic_enter(vcpu); |
| 5858 | if (r) { | 5894 | if (r) { |
| @@ -5869,8 +5905,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
| 5869 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | 5905 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
| 5870 | kvm_vcpu_block(vcpu); | 5906 | kvm_vcpu_block(vcpu); |
| 5871 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); | 5907 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
| 5872 | if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) | 5908 | if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) { |
| 5873 | { | 5909 | kvm_apic_accept_events(vcpu); |
| 5874 | switch(vcpu->arch.mp_state) { | 5910 | switch(vcpu->arch.mp_state) { |
| 5875 | case KVM_MP_STATE_HALTED: | 5911 | case KVM_MP_STATE_HALTED: |
| 5876 | vcpu->arch.mp_state = | 5912 | vcpu->arch.mp_state = |
| @@ -5878,7 +5914,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
| 5878 | case KVM_MP_STATE_RUNNABLE: | 5914 | case KVM_MP_STATE_RUNNABLE: |
| 5879 | vcpu->arch.apf.halted = false; | 5915 | vcpu->arch.apf.halted = false; |
| 5880 | break; | 5916 | break; |
| 5881 | case KVM_MP_STATE_SIPI_RECEIVED: | 5917 | case KVM_MP_STATE_INIT_RECEIVED: |
| 5918 | break; | ||
| 5882 | default: | 5919 | default: |
| 5883 | r = -EINTR; | 5920 | r = -EINTR; |
| 5884 | break; | 5921 | break; |
| @@ -6013,6 +6050,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 6013 | 6050 | ||
| 6014 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { | 6051 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { |
| 6015 | kvm_vcpu_block(vcpu); | 6052 | kvm_vcpu_block(vcpu); |
| 6053 | kvm_apic_accept_events(vcpu); | ||
| 6016 | clear_bit(KVM_REQ_UNHALT, &vcpu->requests); | 6054 | clear_bit(KVM_REQ_UNHALT, &vcpu->requests); |
| 6017 | r = -EAGAIN; | 6055 | r = -EAGAIN; |
| 6018 | goto out; | 6056 | goto out; |
| @@ -6169,6 +6207,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
| 6169 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | 6207 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, |
| 6170 | struct kvm_mp_state *mp_state) | 6208 | struct kvm_mp_state *mp_state) |
| 6171 | { | 6209 | { |
| 6210 | kvm_apic_accept_events(vcpu); | ||
| 6172 | mp_state->mp_state = vcpu->arch.mp_state; | 6211 | mp_state->mp_state = vcpu->arch.mp_state; |
| 6173 | return 0; | 6212 | return 0; |
| 6174 | } | 6213 | } |
| @@ -6176,7 +6215,15 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | |||
| 6176 | int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | 6215 | int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, |
| 6177 | struct kvm_mp_state *mp_state) | 6216 | struct kvm_mp_state *mp_state) |
| 6178 | { | 6217 | { |
| 6179 | vcpu->arch.mp_state = mp_state->mp_state; | 6218 | if (!kvm_vcpu_has_lapic(vcpu) && |
| 6219 | mp_state->mp_state != KVM_MP_STATE_RUNNABLE) | ||
| 6220 | return -EINVAL; | ||
| 6221 | |||
| 6222 | if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) { | ||
| 6223 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; | ||
| 6224 | set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events); | ||
| 6225 | } else | ||
| 6226 | vcpu->arch.mp_state = mp_state->mp_state; | ||
| 6180 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 6227 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
| 6181 | return 0; | 6228 | return 0; |
| 6182 | } | 6229 | } |
| @@ -6475,9 +6522,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
| 6475 | r = vcpu_load(vcpu); | 6522 | r = vcpu_load(vcpu); |
| 6476 | if (r) | 6523 | if (r) |
| 6477 | return r; | 6524 | return r; |
| 6478 | r = kvm_vcpu_reset(vcpu); | 6525 | kvm_vcpu_reset(vcpu); |
| 6479 | if (r == 0) | 6526 | r = kvm_mmu_setup(vcpu); |
| 6480 | r = kvm_mmu_setup(vcpu); | ||
| 6481 | vcpu_put(vcpu); | 6527 | vcpu_put(vcpu); |
| 6482 | 6528 | ||
| 6483 | return r; | 6529 | return r; |
| @@ -6514,7 +6560,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | |||
| 6514 | kvm_x86_ops->vcpu_free(vcpu); | 6560 | kvm_x86_ops->vcpu_free(vcpu); |
| 6515 | } | 6561 | } |
| 6516 | 6562 | ||
| 6517 | static int kvm_vcpu_reset(struct kvm_vcpu *vcpu) | 6563 | void kvm_vcpu_reset(struct kvm_vcpu *vcpu) |
| 6518 | { | 6564 | { |
| 6519 | atomic_set(&vcpu->arch.nmi_queued, 0); | 6565 | atomic_set(&vcpu->arch.nmi_queued, 0); |
| 6520 | vcpu->arch.nmi_pending = 0; | 6566 | vcpu->arch.nmi_pending = 0; |
| @@ -6541,7 +6587,18 @@ static int kvm_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 6541 | vcpu->arch.regs_avail = ~0; | 6587 | vcpu->arch.regs_avail = ~0; |
| 6542 | vcpu->arch.regs_dirty = ~0; | 6588 | vcpu->arch.regs_dirty = ~0; |
| 6543 | 6589 | ||
| 6544 | return kvm_x86_ops->vcpu_reset(vcpu); | 6590 | kvm_x86_ops->vcpu_reset(vcpu); |
| 6591 | } | ||
| 6592 | |||
| 6593 | void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector) | ||
| 6594 | { | ||
| 6595 | struct kvm_segment cs; | ||
| 6596 | |||
| 6597 | kvm_get_segment(vcpu, &cs, VCPU_SREG_CS); | ||
| 6598 | cs.selector = vector << 8; | ||
| 6599 | cs.base = vector << 12; | ||
| 6600 | kvm_set_segment(vcpu, &cs, VCPU_SREG_CS); | ||
| 6601 | kvm_rip_write(vcpu, 0); | ||
| 6545 | } | 6602 | } |
| 6546 | 6603 | ||
| 6547 | int kvm_arch_hardware_enable(void *garbage) | 6604 | int kvm_arch_hardware_enable(void *garbage) |
| @@ -6706,8 +6763,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
| 6706 | } | 6763 | } |
| 6707 | vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; | 6764 | vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; |
| 6708 | 6765 | ||
| 6709 | if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) | 6766 | if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) { |
| 6767 | r = -ENOMEM; | ||
| 6710 | goto fail_free_mce_banks; | 6768 | goto fail_free_mce_banks; |
| 6769 | } | ||
| 6711 | 6770 | ||
| 6712 | r = fx_init(vcpu); | 6771 | r = fx_init(vcpu); |
| 6713 | if (r) | 6772 | if (r) |
| @@ -6811,6 +6870,23 @@ void kvm_arch_sync_events(struct kvm *kvm) | |||
| 6811 | 6870 | ||
| 6812 | void kvm_arch_destroy_vm(struct kvm *kvm) | 6871 | void kvm_arch_destroy_vm(struct kvm *kvm) |
| 6813 | { | 6872 | { |
| 6873 | if (current->mm == kvm->mm) { | ||
| 6874 | /* | ||
| 6875 | * Free memory regions allocated on behalf of userspace, | ||
| 6876 | * unless the the memory map has changed due to process exit | ||
| 6877 | * or fd copying. | ||
| 6878 | */ | ||
| 6879 | struct kvm_userspace_memory_region mem; | ||
| 6880 | memset(&mem, 0, sizeof(mem)); | ||
| 6881 | mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; | ||
| 6882 | kvm_set_memory_region(kvm, &mem); | ||
| 6883 | |||
| 6884 | mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; | ||
| 6885 | kvm_set_memory_region(kvm, &mem); | ||
| 6886 | |||
| 6887 | mem.slot = TSS_PRIVATE_MEMSLOT; | ||
| 6888 | kvm_set_memory_region(kvm, &mem); | ||
| 6889 | } | ||
| 6814 | kvm_iommu_unmap_guest(kvm); | 6890 | kvm_iommu_unmap_guest(kvm); |
| 6815 | kfree(kvm->arch.vpic); | 6891 | kfree(kvm->arch.vpic); |
| 6816 | kfree(kvm->arch.vioapic); | 6892 | kfree(kvm->arch.vioapic); |
| @@ -6903,24 +6979,21 @@ out_free: | |||
| 6903 | 6979 | ||
| 6904 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 6980 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
| 6905 | struct kvm_memory_slot *memslot, | 6981 | struct kvm_memory_slot *memslot, |
| 6906 | struct kvm_memory_slot old, | ||
| 6907 | struct kvm_userspace_memory_region *mem, | 6982 | struct kvm_userspace_memory_region *mem, |
| 6908 | bool user_alloc) | 6983 | enum kvm_mr_change change) |
| 6909 | { | 6984 | { |
| 6910 | int npages = memslot->npages; | ||
| 6911 | |||
| 6912 | /* | 6985 | /* |
| 6913 | * Only private memory slots need to be mapped here since | 6986 | * Only private memory slots need to be mapped here since |
| 6914 | * KVM_SET_MEMORY_REGION ioctl is no longer supported. | 6987 | * KVM_SET_MEMORY_REGION ioctl is no longer supported. |
| 6915 | */ | 6988 | */ |
| 6916 | if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) { | 6989 | if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) { |
| 6917 | unsigned long userspace_addr; | 6990 | unsigned long userspace_addr; |
| 6918 | 6991 | ||
| 6919 | /* | 6992 | /* |
| 6920 | * MAP_SHARED to prevent internal slot pages from being moved | 6993 | * MAP_SHARED to prevent internal slot pages from being moved |
| 6921 | * by fork()/COW. | 6994 | * by fork()/COW. |
| 6922 | */ | 6995 | */ |
| 6923 | userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE, | 6996 | userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE, |
| 6924 | PROT_READ | PROT_WRITE, | 6997 | PROT_READ | PROT_WRITE, |
| 6925 | MAP_SHARED | MAP_ANONYMOUS, 0); | 6998 | MAP_SHARED | MAP_ANONYMOUS, 0); |
| 6926 | 6999 | ||
| @@ -6935,17 +7008,17 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
| 6935 | 7008 | ||
| 6936 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 7009 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
| 6937 | struct kvm_userspace_memory_region *mem, | 7010 | struct kvm_userspace_memory_region *mem, |
| 6938 | struct kvm_memory_slot old, | 7011 | const struct kvm_memory_slot *old, |
| 6939 | bool user_alloc) | 7012 | enum kvm_mr_change change) |
| 6940 | { | 7013 | { |
| 6941 | 7014 | ||
| 6942 | int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; | 7015 | int nr_mmu_pages = 0; |
| 6943 | 7016 | ||
| 6944 | if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) { | 7017 | if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) { |
| 6945 | int ret; | 7018 | int ret; |
| 6946 | 7019 | ||
| 6947 | ret = vm_munmap(old.userspace_addr, | 7020 | ret = vm_munmap(old->userspace_addr, |
| 6948 | old.npages * PAGE_SIZE); | 7021 | old->npages * PAGE_SIZE); |
| 6949 | if (ret < 0) | 7022 | if (ret < 0) |
| 6950 | printk(KERN_WARNING | 7023 | printk(KERN_WARNING |
| 6951 | "kvm_vm_ioctl_set_memory_region: " | 7024 | "kvm_vm_ioctl_set_memory_region: " |
| @@ -6962,14 +7035,14 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
| 6962 | * Existing largepage mappings are destroyed here and new ones will | 7035 | * Existing largepage mappings are destroyed here and new ones will |
| 6963 | * not be created until the end of the logging. | 7036 | * not be created until the end of the logging. |
| 6964 | */ | 7037 | */ |
| 6965 | if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) | 7038 | if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) |
| 6966 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 7039 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); |
| 6967 | /* | 7040 | /* |
| 6968 | * If memory slot is created, or moved, we need to clear all | 7041 | * If memory slot is created, or moved, we need to clear all |
| 6969 | * mmio sptes. | 7042 | * mmio sptes. |
| 6970 | */ | 7043 | */ |
| 6971 | if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) { | 7044 | if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { |
| 6972 | kvm_mmu_zap_all(kvm); | 7045 | kvm_mmu_zap_mmio_sptes(kvm); |
| 6973 | kvm_reload_remote_mmus(kvm); | 7046 | kvm_reload_remote_mmus(kvm); |
| 6974 | } | 7047 | } |
| 6975 | } | 7048 | } |
| @@ -6991,7 +7064,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
| 6991 | return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && | 7064 | return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && |
| 6992 | !vcpu->arch.apf.halted) | 7065 | !vcpu->arch.apf.halted) |
| 6993 | || !list_empty_careful(&vcpu->async_pf.done) | 7066 | || !list_empty_careful(&vcpu->async_pf.done) |
| 6994 | || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED | 7067 | || kvm_apic_has_events(vcpu) |
| 6995 | || atomic_read(&vcpu->arch.nmi_queued) || | 7068 | || atomic_read(&vcpu->arch.nmi_queued) || |
| 6996 | (kvm_arch_interrupt_allowed(vcpu) && | 7069 | (kvm_arch_interrupt_allowed(vcpu) && |
| 6997 | kvm_cpu_has_interrupt(vcpu)); | 7070 | kvm_cpu_has_interrupt(vcpu)); |
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index 6711e65764b5..2ea6165366b6 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c | |||
| @@ -443,29 +443,30 @@ static int __init test_devices_support(unsigned long addr) | |||
| 443 | } | 443 | } |
| 444 | /* | 444 | /* |
| 445 | * Init function for virtio | 445 | * Init function for virtio |
| 446 | * devices are in a single page above top of "normal" mem | 446 | * devices are in a single page above top of "normal" + standby mem |
| 447 | */ | 447 | */ |
| 448 | static int __init kvm_devices_init(void) | 448 | static int __init kvm_devices_init(void) |
| 449 | { | 449 | { |
| 450 | int rc; | 450 | int rc; |
| 451 | unsigned long total_memory_size = sclp_get_rzm() * sclp_get_rnmax(); | ||
| 451 | 452 | ||
| 452 | if (!MACHINE_IS_KVM) | 453 | if (!MACHINE_IS_KVM) |
| 453 | return -ENODEV; | 454 | return -ENODEV; |
| 454 | 455 | ||
| 455 | if (test_devices_support(real_memory_size) < 0) | 456 | if (test_devices_support(total_memory_size) < 0) |
| 456 | return -ENODEV; | 457 | return -ENODEV; |
| 457 | 458 | ||
| 458 | rc = vmem_add_mapping(real_memory_size, PAGE_SIZE); | 459 | rc = vmem_add_mapping(total_memory_size, PAGE_SIZE); |
| 459 | if (rc) | 460 | if (rc) |
| 460 | return rc; | 461 | return rc; |
| 461 | 462 | ||
| 462 | kvm_devices = (void *) real_memory_size; | 463 | kvm_devices = (void *) total_memory_size; |
| 463 | 464 | ||
| 464 | kvm_root = root_device_register("kvm_s390"); | 465 | kvm_root = root_device_register("kvm_s390"); |
| 465 | if (IS_ERR(kvm_root)) { | 466 | if (IS_ERR(kvm_root)) { |
| 466 | rc = PTR_ERR(kvm_root); | 467 | rc = PTR_ERR(kvm_root); |
| 467 | printk(KERN_ERR "Could not register kvm_s390 root device"); | 468 | printk(KERN_ERR "Could not register kvm_s390 root device"); |
| 468 | vmem_remove_mapping(real_memory_size, PAGE_SIZE); | 469 | vmem_remove_mapping(total_memory_size, PAGE_SIZE); |
| 469 | return rc; | 470 | return rc; |
| 470 | } | 471 | } |
| 471 | 472 | ||
diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c index fb877b59ec57..779dc5136291 100644 --- a/drivers/s390/kvm/virtio_ccw.c +++ b/drivers/s390/kvm/virtio_ccw.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include <asm/irq.h> | 31 | #include <asm/irq.h> |
| 32 | #include <asm/cio.h> | 32 | #include <asm/cio.h> |
| 33 | #include <asm/ccwdev.h> | 33 | #include <asm/ccwdev.h> |
| 34 | #include <asm/virtio-ccw.h> | ||
| 34 | 35 | ||
| 35 | /* | 36 | /* |
| 36 | * virtio related functions | 37 | * virtio related functions |
| @@ -77,12 +78,9 @@ struct virtio_ccw_vq_info { | |||
| 77 | void *queue; | 78 | void *queue; |
| 78 | struct vq_info_block *info_block; | 79 | struct vq_info_block *info_block; |
| 79 | struct list_head node; | 80 | struct list_head node; |
| 81 | long cookie; | ||
| 80 | }; | 82 | }; |
| 81 | 83 | ||
| 82 | #define KVM_VIRTIO_CCW_RING_ALIGN 4096 | ||
| 83 | |||
| 84 | #define KVM_S390_VIRTIO_CCW_NOTIFY 3 | ||
| 85 | |||
| 86 | #define CCW_CMD_SET_VQ 0x13 | 84 | #define CCW_CMD_SET_VQ 0x13 |
| 87 | #define CCW_CMD_VDEV_RESET 0x33 | 85 | #define CCW_CMD_VDEV_RESET 0x33 |
| 88 | #define CCW_CMD_SET_IND 0x43 | 86 | #define CCW_CMD_SET_IND 0x43 |
| @@ -135,8 +133,11 @@ static int ccw_io_helper(struct virtio_ccw_device *vcdev, | |||
| 135 | do { | 133 | do { |
| 136 | spin_lock_irqsave(get_ccwdev_lock(vcdev->cdev), flags); | 134 | spin_lock_irqsave(get_ccwdev_lock(vcdev->cdev), flags); |
| 137 | ret = ccw_device_start(vcdev->cdev, ccw, intparm, 0, 0); | 135 | ret = ccw_device_start(vcdev->cdev, ccw, intparm, 0, 0); |
| 138 | if (!ret) | 136 | if (!ret) { |
| 137 | if (!vcdev->curr_io) | ||
| 138 | vcdev->err = 0; | ||
| 139 | vcdev->curr_io |= flag; | 139 | vcdev->curr_io |= flag; |
| 140 | } | ||
| 140 | spin_unlock_irqrestore(get_ccwdev_lock(vcdev->cdev), flags); | 141 | spin_unlock_irqrestore(get_ccwdev_lock(vcdev->cdev), flags); |
| 141 | cpu_relax(); | 142 | cpu_relax(); |
| 142 | } while (ret == -EBUSY); | 143 | } while (ret == -EBUSY); |
| @@ -145,15 +146,18 @@ static int ccw_io_helper(struct virtio_ccw_device *vcdev, | |||
| 145 | } | 146 | } |
| 146 | 147 | ||
| 147 | static inline long do_kvm_notify(struct subchannel_id schid, | 148 | static inline long do_kvm_notify(struct subchannel_id schid, |
| 148 | unsigned long queue_index) | 149 | unsigned long queue_index, |
| 150 | long cookie) | ||
| 149 | { | 151 | { |
| 150 | register unsigned long __nr asm("1") = KVM_S390_VIRTIO_CCW_NOTIFY; | 152 | register unsigned long __nr asm("1") = KVM_S390_VIRTIO_CCW_NOTIFY; |
| 151 | register struct subchannel_id __schid asm("2") = schid; | 153 | register struct subchannel_id __schid asm("2") = schid; |
| 152 | register unsigned long __index asm("3") = queue_index; | 154 | register unsigned long __index asm("3") = queue_index; |
| 153 | register long __rc asm("2"); | 155 | register long __rc asm("2"); |
| 156 | register long __cookie asm("4") = cookie; | ||
| 154 | 157 | ||
| 155 | asm volatile ("diag 2,4,0x500\n" | 158 | asm volatile ("diag 2,4,0x500\n" |
| 156 | : "=d" (__rc) : "d" (__nr), "d" (__schid), "d" (__index) | 159 | : "=d" (__rc) : "d" (__nr), "d" (__schid), "d" (__index), |
| 160 | "d"(__cookie) | ||
| 157 | : "memory", "cc"); | 161 | : "memory", "cc"); |
| 158 | return __rc; | 162 | return __rc; |
| 159 | } | 163 | } |
| @@ -166,7 +170,7 @@ static void virtio_ccw_kvm_notify(struct virtqueue *vq) | |||
| 166 | 170 | ||
| 167 | vcdev = to_vc_device(info->vq->vdev); | 171 | vcdev = to_vc_device(info->vq->vdev); |
| 168 | ccw_device_get_schid(vcdev->cdev, &schid); | 172 | ccw_device_get_schid(vcdev->cdev, &schid); |
| 169 | do_kvm_notify(schid, vq->index); | 173 | info->cookie = do_kvm_notify(schid, vq->index, info->cookie); |
| 170 | } | 174 | } |
| 171 | 175 | ||
| 172 | static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev, | 176 | static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev, |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c13958251927..f0eea07d2c2b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
| @@ -117,14 +117,13 @@ static inline bool is_error_page(struct page *page) | |||
| 117 | #define KVM_REQ_APF_HALT 12 | 117 | #define KVM_REQ_APF_HALT 12 |
| 118 | #define KVM_REQ_STEAL_UPDATE 13 | 118 | #define KVM_REQ_STEAL_UPDATE 13 |
| 119 | #define KVM_REQ_NMI 14 | 119 | #define KVM_REQ_NMI 14 |
| 120 | #define KVM_REQ_IMMEDIATE_EXIT 15 | 120 | #define KVM_REQ_PMU 15 |
| 121 | #define KVM_REQ_PMU 16 | 121 | #define KVM_REQ_PMI 16 |
| 122 | #define KVM_REQ_PMI 17 | 122 | #define KVM_REQ_WATCHDOG 17 |
| 123 | #define KVM_REQ_WATCHDOG 18 | 123 | #define KVM_REQ_MASTERCLOCK_UPDATE 18 |
| 124 | #define KVM_REQ_MASTERCLOCK_UPDATE 19 | 124 | #define KVM_REQ_MCLOCK_INPROGRESS 19 |
| 125 | #define KVM_REQ_MCLOCK_INPROGRESS 20 | 125 | #define KVM_REQ_EPR_EXIT 20 |
| 126 | #define KVM_REQ_EPR_EXIT 21 | 126 | #define KVM_REQ_SCAN_IOAPIC 21 |
| 127 | #define KVM_REQ_EOIBITMAP 22 | ||
| 128 | 127 | ||
| 129 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 | 128 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 |
| 130 | #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 | 129 | #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 |
| @@ -133,6 +132,9 @@ struct kvm; | |||
| 133 | struct kvm_vcpu; | 132 | struct kvm_vcpu; |
| 134 | extern struct kmem_cache *kvm_vcpu_cache; | 133 | extern struct kmem_cache *kvm_vcpu_cache; |
| 135 | 134 | ||
| 135 | extern raw_spinlock_t kvm_lock; | ||
| 136 | extern struct list_head vm_list; | ||
| 137 | |||
| 136 | struct kvm_io_range { | 138 | struct kvm_io_range { |
| 137 | gpa_t addr; | 139 | gpa_t addr; |
| 138 | int len; | 140 | int len; |
| @@ -149,6 +151,7 @@ struct kvm_io_bus { | |||
| 149 | enum kvm_bus { | 151 | enum kvm_bus { |
| 150 | KVM_MMIO_BUS, | 152 | KVM_MMIO_BUS, |
| 151 | KVM_PIO_BUS, | 153 | KVM_PIO_BUS, |
| 154 | KVM_VIRTIO_CCW_NOTIFY_BUS, | ||
| 152 | KVM_NR_BUSES | 155 | KVM_NR_BUSES |
| 153 | }; | 156 | }; |
| 154 | 157 | ||
| @@ -252,6 +255,7 @@ struct kvm_vcpu { | |||
| 252 | bool dy_eligible; | 255 | bool dy_eligible; |
| 253 | } spin_loop; | 256 | } spin_loop; |
| 254 | #endif | 257 | #endif |
| 258 | bool preempted; | ||
| 255 | struct kvm_vcpu_arch arch; | 259 | struct kvm_vcpu_arch arch; |
| 256 | }; | 260 | }; |
| 257 | 261 | ||
| @@ -285,7 +289,8 @@ struct kvm_kernel_irq_routing_entry { | |||
| 285 | u32 gsi; | 289 | u32 gsi; |
| 286 | u32 type; | 290 | u32 type; |
| 287 | int (*set)(struct kvm_kernel_irq_routing_entry *e, | 291 | int (*set)(struct kvm_kernel_irq_routing_entry *e, |
| 288 | struct kvm *kvm, int irq_source_id, int level); | 292 | struct kvm *kvm, int irq_source_id, int level, |
| 293 | bool line_status); | ||
| 289 | union { | 294 | union { |
| 290 | struct { | 295 | struct { |
| 291 | unsigned irqchip; | 296 | unsigned irqchip; |
| @@ -296,10 +301,10 @@ struct kvm_kernel_irq_routing_entry { | |||
| 296 | struct hlist_node link; | 301 | struct hlist_node link; |
| 297 | }; | 302 | }; |
| 298 | 303 | ||
| 299 | #ifdef __KVM_HAVE_IOAPIC | 304 | #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING |
| 300 | 305 | ||
| 301 | struct kvm_irq_routing_table { | 306 | struct kvm_irq_routing_table { |
| 302 | int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS]; | 307 | int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; |
| 303 | struct kvm_kernel_irq_routing_entry *rt_entries; | 308 | struct kvm_kernel_irq_routing_entry *rt_entries; |
| 304 | u32 nr_rt_entries; | 309 | u32 nr_rt_entries; |
| 305 | /* | 310 | /* |
| @@ -385,6 +390,7 @@ struct kvm { | |||
| 385 | long mmu_notifier_count; | 390 | long mmu_notifier_count; |
| 386 | #endif | 391 | #endif |
| 387 | long tlbs_dirty; | 392 | long tlbs_dirty; |
| 393 | struct list_head devices; | ||
| 388 | }; | 394 | }; |
| 389 | 395 | ||
| 390 | #define kvm_err(fmt, ...) \ | 396 | #define kvm_err(fmt, ...) \ |
| @@ -424,6 +430,19 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); | |||
| 424 | int __must_check vcpu_load(struct kvm_vcpu *vcpu); | 430 | int __must_check vcpu_load(struct kvm_vcpu *vcpu); |
| 425 | void vcpu_put(struct kvm_vcpu *vcpu); | 431 | void vcpu_put(struct kvm_vcpu *vcpu); |
| 426 | 432 | ||
| 433 | #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING | ||
| 434 | int kvm_irqfd_init(void); | ||
| 435 | void kvm_irqfd_exit(void); | ||
| 436 | #else | ||
| 437 | static inline int kvm_irqfd_init(void) | ||
| 438 | { | ||
| 439 | return 0; | ||
| 440 | } | ||
| 441 | |||
| 442 | static inline void kvm_irqfd_exit(void) | ||
| 443 | { | ||
| 444 | } | ||
| 445 | #endif | ||
| 427 | int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, | 446 | int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, |
| 428 | struct module *module); | 447 | struct module *module); |
| 429 | void kvm_exit(void); | 448 | void kvm_exit(void); |
| @@ -452,24 +471,39 @@ id_to_memslot(struct kvm_memslots *slots, int id) | |||
| 452 | return slot; | 471 | return slot; |
| 453 | } | 472 | } |
| 454 | 473 | ||
| 474 | /* | ||
| 475 | * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: | ||
| 476 | * - create a new memory slot | ||
| 477 | * - delete an existing memory slot | ||
| 478 | * - modify an existing memory slot | ||
| 479 | * -- move it in the guest physical memory space | ||
| 480 | * -- just change its flags | ||
| 481 | * | ||
| 482 | * Since flags can be changed by some of these operations, the following | ||
| 483 | * differentiation is the best we can do for __kvm_set_memory_region(): | ||
| 484 | */ | ||
| 485 | enum kvm_mr_change { | ||
| 486 | KVM_MR_CREATE, | ||
| 487 | KVM_MR_DELETE, | ||
| 488 | KVM_MR_MOVE, | ||
| 489 | KVM_MR_FLAGS_ONLY, | ||
| 490 | }; | ||
| 491 | |||
| 455 | int kvm_set_memory_region(struct kvm *kvm, | 492 | int kvm_set_memory_region(struct kvm *kvm, |
| 456 | struct kvm_userspace_memory_region *mem, | 493 | struct kvm_userspace_memory_region *mem); |
| 457 | bool user_alloc); | ||
| 458 | int __kvm_set_memory_region(struct kvm *kvm, | 494 | int __kvm_set_memory_region(struct kvm *kvm, |
| 459 | struct kvm_userspace_memory_region *mem, | 495 | struct kvm_userspace_memory_region *mem); |
| 460 | bool user_alloc); | ||
| 461 | void kvm_arch_free_memslot(struct kvm_memory_slot *free, | 496 | void kvm_arch_free_memslot(struct kvm_memory_slot *free, |
| 462 | struct kvm_memory_slot *dont); | 497 | struct kvm_memory_slot *dont); |
| 463 | int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); | 498 | int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); |
| 464 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 499 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
| 465 | struct kvm_memory_slot *memslot, | 500 | struct kvm_memory_slot *memslot, |
| 466 | struct kvm_memory_slot old, | ||
| 467 | struct kvm_userspace_memory_region *mem, | 501 | struct kvm_userspace_memory_region *mem, |
| 468 | bool user_alloc); | 502 | enum kvm_mr_change change); |
| 469 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 503 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
| 470 | struct kvm_userspace_memory_region *mem, | 504 | struct kvm_userspace_memory_region *mem, |
| 471 | struct kvm_memory_slot old, | 505 | const struct kvm_memory_slot *old, |
| 472 | bool user_alloc); | 506 | enum kvm_mr_change change); |
| 473 | bool kvm_largepages_enabled(void); | 507 | bool kvm_largepages_enabled(void); |
| 474 | void kvm_disable_largepages(void); | 508 | void kvm_disable_largepages(void); |
| 475 | /* flush all memory translations */ | 509 | /* flush all memory translations */ |
| @@ -539,7 +573,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); | |||
| 539 | void kvm_flush_remote_tlbs(struct kvm *kvm); | 573 | void kvm_flush_remote_tlbs(struct kvm *kvm); |
| 540 | void kvm_reload_remote_mmus(struct kvm *kvm); | 574 | void kvm_reload_remote_mmus(struct kvm *kvm); |
| 541 | void kvm_make_mclock_inprogress_request(struct kvm *kvm); | 575 | void kvm_make_mclock_inprogress_request(struct kvm *kvm); |
| 542 | void kvm_make_update_eoibitmap_request(struct kvm *kvm); | 576 | void kvm_make_scan_ioapic_request(struct kvm *kvm); |
| 543 | 577 | ||
| 544 | long kvm_arch_dev_ioctl(struct file *filp, | 578 | long kvm_arch_dev_ioctl(struct file *filp, |
| 545 | unsigned int ioctl, unsigned long arg); | 579 | unsigned int ioctl, unsigned long arg); |
| @@ -555,10 +589,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
| 555 | struct kvm_dirty_log *log); | 589 | struct kvm_dirty_log *log); |
| 556 | 590 | ||
| 557 | int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, | 591 | int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, |
| 558 | struct | 592 | struct kvm_userspace_memory_region *mem); |
| 559 | kvm_userspace_memory_region *mem, | 593 | int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, |
| 560 | bool user_alloc); | 594 | bool line_status); |
| 561 | int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level); | ||
| 562 | long kvm_arch_vm_ioctl(struct file *filp, | 595 | long kvm_arch_vm_ioctl(struct file *filp, |
| 563 | unsigned int ioctl, unsigned long arg); | 596 | unsigned int ioctl, unsigned long arg); |
| 564 | 597 | ||
| @@ -632,7 +665,6 @@ static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) | |||
| 632 | 665 | ||
| 633 | int kvm_arch_init_vm(struct kvm *kvm, unsigned long type); | 666 | int kvm_arch_init_vm(struct kvm *kvm, unsigned long type); |
| 634 | void kvm_arch_destroy_vm(struct kvm *kvm); | 667 | void kvm_arch_destroy_vm(struct kvm *kvm); |
| 635 | void kvm_free_all_assigned_devices(struct kvm *kvm); | ||
| 636 | void kvm_arch_sync_events(struct kvm *kvm); | 668 | void kvm_arch_sync_events(struct kvm *kvm); |
| 637 | 669 | ||
| 638 | int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); | 670 | int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); |
| @@ -684,15 +716,11 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, | |||
| 684 | void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, | 716 | void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, |
| 685 | bool mask); | 717 | bool mask); |
| 686 | 718 | ||
| 687 | #ifdef __KVM_HAVE_IOAPIC | 719 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, |
| 688 | void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, | 720 | bool line_status); |
| 689 | union kvm_ioapic_redirect_entry *entry, | ||
| 690 | unsigned long *deliver_bitmask); | ||
| 691 | #endif | ||
| 692 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level); | ||
| 693 | int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); | 721 | int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); |
| 694 | int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, | 722 | int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, |
| 695 | int irq_source_id, int level); | 723 | int irq_source_id, int level, bool line_status); |
| 696 | bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin); | 724 | bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin); |
| 697 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); | 725 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); |
| 698 | void kvm_register_irq_ack_notifier(struct kvm *kvm, | 726 | void kvm_register_irq_ack_notifier(struct kvm *kvm, |
| @@ -705,7 +733,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); | |||
| 705 | /* For vcpu->arch.iommu_flags */ | 733 | /* For vcpu->arch.iommu_flags */ |
| 706 | #define KVM_IOMMU_CACHE_COHERENCY 0x1 | 734 | #define KVM_IOMMU_CACHE_COHERENCY 0x1 |
| 707 | 735 | ||
| 708 | #ifdef CONFIG_IOMMU_API | 736 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT |
| 709 | int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot); | 737 | int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot); |
| 710 | void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot); | 738 | void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot); |
| 711 | int kvm_iommu_map_guest(struct kvm *kvm); | 739 | int kvm_iommu_map_guest(struct kvm *kvm); |
| @@ -714,7 +742,7 @@ int kvm_assign_device(struct kvm *kvm, | |||
| 714 | struct kvm_assigned_dev_kernel *assigned_dev); | 742 | struct kvm_assigned_dev_kernel *assigned_dev); |
| 715 | int kvm_deassign_device(struct kvm *kvm, | 743 | int kvm_deassign_device(struct kvm *kvm, |
| 716 | struct kvm_assigned_dev_kernel *assigned_dev); | 744 | struct kvm_assigned_dev_kernel *assigned_dev); |
| 717 | #else /* CONFIG_IOMMU_API */ | 745 | #else |
| 718 | static inline int kvm_iommu_map_pages(struct kvm *kvm, | 746 | static inline int kvm_iommu_map_pages(struct kvm *kvm, |
| 719 | struct kvm_memory_slot *slot) | 747 | struct kvm_memory_slot *slot) |
| 720 | { | 748 | { |
| @@ -726,28 +754,11 @@ static inline void kvm_iommu_unmap_pages(struct kvm *kvm, | |||
| 726 | { | 754 | { |
| 727 | } | 755 | } |
| 728 | 756 | ||
| 729 | static inline int kvm_iommu_map_guest(struct kvm *kvm) | ||
| 730 | { | ||
| 731 | return -ENODEV; | ||
| 732 | } | ||
| 733 | |||
| 734 | static inline int kvm_iommu_unmap_guest(struct kvm *kvm) | 757 | static inline int kvm_iommu_unmap_guest(struct kvm *kvm) |
| 735 | { | 758 | { |
| 736 | return 0; | 759 | return 0; |
| 737 | } | 760 | } |
| 738 | 761 | #endif | |
| 739 | static inline int kvm_assign_device(struct kvm *kvm, | ||
| 740 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
| 741 | { | ||
| 742 | return 0; | ||
| 743 | } | ||
| 744 | |||
| 745 | static inline int kvm_deassign_device(struct kvm *kvm, | ||
| 746 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
| 747 | { | ||
| 748 | return 0; | ||
| 749 | } | ||
| 750 | #endif /* CONFIG_IOMMU_API */ | ||
| 751 | 762 | ||
| 752 | static inline void __guest_enter(void) | 763 | static inline void __guest_enter(void) |
| 753 | { | 764 | { |
| @@ -921,7 +932,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) | |||
| 921 | } | 932 | } |
| 922 | #endif | 933 | #endif |
| 923 | 934 | ||
| 924 | #ifdef KVM_CAP_IRQ_ROUTING | 935 | #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING |
| 925 | 936 | ||
| 926 | #define KVM_MAX_IRQ_ROUTES 1024 | 937 | #define KVM_MAX_IRQ_ROUTES 1024 |
| 927 | 938 | ||
| @@ -930,6 +941,9 @@ int kvm_set_irq_routing(struct kvm *kvm, | |||
| 930 | const struct kvm_irq_routing_entry *entries, | 941 | const struct kvm_irq_routing_entry *entries, |
| 931 | unsigned nr, | 942 | unsigned nr, |
| 932 | unsigned flags); | 943 | unsigned flags); |
| 944 | int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, | ||
| 945 | struct kvm_kernel_irq_routing_entry *e, | ||
| 946 | const struct kvm_irq_routing_entry *ue); | ||
| 933 | void kvm_free_irq_routing(struct kvm *kvm); | 947 | void kvm_free_irq_routing(struct kvm *kvm); |
| 934 | 948 | ||
| 935 | int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); | 949 | int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); |
| @@ -998,11 +1012,13 @@ static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; } | |||
| 998 | 1012 | ||
| 999 | #endif | 1013 | #endif |
| 1000 | 1014 | ||
| 1001 | #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT | 1015 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT |
| 1002 | 1016 | ||
| 1003 | long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | 1017 | long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, |
| 1004 | unsigned long arg); | 1018 | unsigned long arg); |
| 1005 | 1019 | ||
| 1020 | void kvm_free_all_assigned_devices(struct kvm *kvm); | ||
| 1021 | |||
| 1006 | #else | 1022 | #else |
| 1007 | 1023 | ||
| 1008 | static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | 1024 | static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, |
| @@ -1011,6 +1027,8 @@ static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | |||
| 1011 | return -ENOTTY; | 1027 | return -ENOTTY; |
| 1012 | } | 1028 | } |
| 1013 | 1029 | ||
| 1030 | static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {} | ||
| 1031 | |||
| 1014 | #endif | 1032 | #endif |
| 1015 | 1033 | ||
| 1016 | static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) | 1034 | static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) |
| @@ -1028,6 +1046,46 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) | |||
| 1028 | } | 1046 | } |
| 1029 | } | 1047 | } |
| 1030 | 1048 | ||
| 1049 | extern bool kvm_rebooting; | ||
| 1050 | |||
| 1051 | struct kvm_device_ops; | ||
| 1052 | |||
| 1053 | struct kvm_device { | ||
| 1054 | struct kvm_device_ops *ops; | ||
| 1055 | struct kvm *kvm; | ||
| 1056 | void *private; | ||
| 1057 | struct list_head vm_node; | ||
| 1058 | }; | ||
| 1059 | |||
| 1060 | /* create, destroy, and name are mandatory */ | ||
| 1061 | struct kvm_device_ops { | ||
| 1062 | const char *name; | ||
| 1063 | int (*create)(struct kvm_device *dev, u32 type); | ||
| 1064 | |||
| 1065 | /* | ||
| 1066 | * Destroy is responsible for freeing dev. | ||
| 1067 | * | ||
| 1068 | * Destroy may be called before or after destructors are called | ||
| 1069 | * on emulated I/O regions, depending on whether a reference is | ||
| 1070 | * held by a vcpu or other kvm component that gets destroyed | ||
| 1071 | * after the emulated I/O. | ||
| 1072 | */ | ||
| 1073 | void (*destroy)(struct kvm_device *dev); | ||
| 1074 | |||
| 1075 | int (*set_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); | ||
| 1076 | int (*get_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); | ||
| 1077 | int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); | ||
| 1078 | long (*ioctl)(struct kvm_device *dev, unsigned int ioctl, | ||
| 1079 | unsigned long arg); | ||
| 1080 | }; | ||
| 1081 | |||
| 1082 | void kvm_device_get(struct kvm_device *dev); | ||
| 1083 | void kvm_device_put(struct kvm_device *dev); | ||
| 1084 | struct kvm_device *kvm_device_from_filp(struct file *filp); | ||
| 1085 | |||
| 1086 | extern struct kvm_device_ops kvm_mpic_ops; | ||
| 1087 | extern struct kvm_device_ops kvm_xics_ops; | ||
| 1088 | |||
| 1031 | #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT | 1089 | #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT |
| 1032 | 1090 | ||
| 1033 | static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) | 1091 | static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) |
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 19911dddaeb7..7005d1109ec9 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h | |||
| @@ -37,7 +37,7 @@ TRACE_EVENT(kvm_userspace_exit, | |||
| 37 | __entry->errno < 0 ? -__entry->errno : __entry->reason) | 37 | __entry->errno < 0 ? -__entry->errno : __entry->reason) |
| 38 | ); | 38 | ); |
| 39 | 39 | ||
| 40 | #if defined(__KVM_HAVE_IRQ_LINE) | 40 | #if defined(CONFIG_HAVE_KVM_IRQCHIP) |
| 41 | TRACE_EVENT(kvm_set_irq, | 41 | TRACE_EVENT(kvm_set_irq, |
| 42 | TP_PROTO(unsigned int gsi, int level, int irq_source_id), | 42 | TP_PROTO(unsigned int gsi, int level, int irq_source_id), |
| 43 | TP_ARGS(gsi, level, irq_source_id), | 43 | TP_ARGS(gsi, level, irq_source_id), |
| @@ -122,6 +122,10 @@ TRACE_EVENT(kvm_msi_set_irq, | |||
| 122 | {KVM_IRQCHIP_PIC_SLAVE, "PIC slave"}, \ | 122 | {KVM_IRQCHIP_PIC_SLAVE, "PIC slave"}, \ |
| 123 | {KVM_IRQCHIP_IOAPIC, "IOAPIC"} | 123 | {KVM_IRQCHIP_IOAPIC, "IOAPIC"} |
| 124 | 124 | ||
| 125 | #endif /* defined(__KVM_HAVE_IOAPIC) */ | ||
| 126 | |||
| 127 | #if defined(CONFIG_HAVE_KVM_IRQCHIP) | ||
| 128 | |||
| 125 | TRACE_EVENT(kvm_ack_irq, | 129 | TRACE_EVENT(kvm_ack_irq, |
| 126 | TP_PROTO(unsigned int irqchip, unsigned int pin), | 130 | TP_PROTO(unsigned int irqchip, unsigned int pin), |
| 127 | TP_ARGS(irqchip, pin), | 131 | TP_ARGS(irqchip, pin), |
| @@ -136,14 +140,18 @@ TRACE_EVENT(kvm_ack_irq, | |||
| 136 | __entry->pin = pin; | 140 | __entry->pin = pin; |
| 137 | ), | 141 | ), |
| 138 | 142 | ||
| 143 | #ifdef kvm_irqchips | ||
| 139 | TP_printk("irqchip %s pin %u", | 144 | TP_printk("irqchip %s pin %u", |
| 140 | __print_symbolic(__entry->irqchip, kvm_irqchips), | 145 | __print_symbolic(__entry->irqchip, kvm_irqchips), |
| 141 | __entry->pin) | 146 | __entry->pin) |
| 147 | #else | ||
| 148 | TP_printk("irqchip %d pin %u", __entry->irqchip, __entry->pin) | ||
| 149 | #endif | ||
| 142 | ); | 150 | ); |
| 143 | 151 | ||
| 152 | #endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */ | ||
| 144 | 153 | ||
| 145 | 154 | ||
| 146 | #endif /* defined(__KVM_HAVE_IOAPIC) */ | ||
| 147 | 155 | ||
| 148 | #define KVM_TRACE_MMIO_READ_UNSATISFIED 0 | 156 | #define KVM_TRACE_MMIO_READ_UNSATISFIED 0 |
| 149 | #define KVM_TRACE_MMIO_READ 1 | 157 | #define KVM_TRACE_MMIO_READ 1 |
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 3c56ba3d80c1..a5c86fc34a37 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h | |||
| @@ -449,12 +449,15 @@ enum { | |||
| 449 | kvm_ioeventfd_flag_nr_datamatch, | 449 | kvm_ioeventfd_flag_nr_datamatch, |
| 450 | kvm_ioeventfd_flag_nr_pio, | 450 | kvm_ioeventfd_flag_nr_pio, |
| 451 | kvm_ioeventfd_flag_nr_deassign, | 451 | kvm_ioeventfd_flag_nr_deassign, |
| 452 | kvm_ioeventfd_flag_nr_virtio_ccw_notify, | ||
| 452 | kvm_ioeventfd_flag_nr_max, | 453 | kvm_ioeventfd_flag_nr_max, |
| 453 | }; | 454 | }; |
| 454 | 455 | ||
| 455 | #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch) | 456 | #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch) |
| 456 | #define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio) | 457 | #define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio) |
| 457 | #define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign) | 458 | #define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign) |
| 459 | #define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \ | ||
| 460 | (1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify) | ||
| 458 | 461 | ||
| 459 | #define KVM_IOEVENTFD_VALID_FLAG_MASK ((1 << kvm_ioeventfd_flag_nr_max) - 1) | 462 | #define KVM_IOEVENTFD_VALID_FLAG_MASK ((1 << kvm_ioeventfd_flag_nr_max) - 1) |
| 460 | 463 | ||
| @@ -558,9 +561,7 @@ struct kvm_ppc_smmu_info { | |||
| 558 | #define KVM_CAP_MP_STATE 14 | 561 | #define KVM_CAP_MP_STATE 14 |
| 559 | #define KVM_CAP_COALESCED_MMIO 15 | 562 | #define KVM_CAP_COALESCED_MMIO 15 |
| 560 | #define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ | 563 | #define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ |
| 561 | #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT | ||
| 562 | #define KVM_CAP_DEVICE_ASSIGNMENT 17 | 564 | #define KVM_CAP_DEVICE_ASSIGNMENT 17 |
| 563 | #endif | ||
| 564 | #define KVM_CAP_IOMMU 18 | 565 | #define KVM_CAP_IOMMU 18 |
| 565 | #ifdef __KVM_HAVE_MSI | 566 | #ifdef __KVM_HAVE_MSI |
| 566 | #define KVM_CAP_DEVICE_MSI 20 | 567 | #define KVM_CAP_DEVICE_MSI 20 |
| @@ -576,13 +577,9 @@ struct kvm_ppc_smmu_info { | |||
| 576 | #ifdef __KVM_HAVE_PIT | 577 | #ifdef __KVM_HAVE_PIT |
| 577 | #define KVM_CAP_REINJECT_CONTROL 24 | 578 | #define KVM_CAP_REINJECT_CONTROL 24 |
| 578 | #endif | 579 | #endif |
| 579 | #ifdef __KVM_HAVE_IOAPIC | ||
| 580 | #define KVM_CAP_IRQ_ROUTING 25 | 580 | #define KVM_CAP_IRQ_ROUTING 25 |
| 581 | #endif | ||
| 582 | #define KVM_CAP_IRQ_INJECT_STATUS 26 | 581 | #define KVM_CAP_IRQ_INJECT_STATUS 26 |
| 583 | #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT | ||
| 584 | #define KVM_CAP_DEVICE_DEASSIGNMENT 27 | 582 | #define KVM_CAP_DEVICE_DEASSIGNMENT 27 |
| 585 | #endif | ||
| 586 | #ifdef __KVM_HAVE_MSIX | 583 | #ifdef __KVM_HAVE_MSIX |
| 587 | #define KVM_CAP_DEVICE_MSIX 28 | 584 | #define KVM_CAP_DEVICE_MSIX 28 |
| 588 | #endif | 585 | #endif |
| @@ -665,6 +662,10 @@ struct kvm_ppc_smmu_info { | |||
| 665 | #define KVM_CAP_PPC_EPR 86 | 662 | #define KVM_CAP_PPC_EPR 86 |
| 666 | #define KVM_CAP_ARM_PSCI 87 | 663 | #define KVM_CAP_ARM_PSCI 87 |
| 667 | #define KVM_CAP_ARM_SET_DEVICE_ADDR 88 | 664 | #define KVM_CAP_ARM_SET_DEVICE_ADDR 88 |
| 665 | #define KVM_CAP_DEVICE_CTRL 89 | ||
| 666 | #define KVM_CAP_IRQ_MPIC 90 | ||
| 667 | #define KVM_CAP_PPC_RTAS 91 | ||
| 668 | #define KVM_CAP_IRQ_XICS 92 | ||
| 668 | 669 | ||
| 669 | #ifdef KVM_CAP_IRQ_ROUTING | 670 | #ifdef KVM_CAP_IRQ_ROUTING |
| 670 | 671 | ||
| @@ -818,6 +819,28 @@ struct kvm_arm_device_addr { | |||
| 818 | }; | 819 | }; |
| 819 | 820 | ||
| 820 | /* | 821 | /* |
| 822 | * Device control API, available with KVM_CAP_DEVICE_CTRL | ||
| 823 | */ | ||
| 824 | #define KVM_CREATE_DEVICE_TEST 1 | ||
| 825 | |||
| 826 | struct kvm_create_device { | ||
| 827 | __u32 type; /* in: KVM_DEV_TYPE_xxx */ | ||
| 828 | __u32 fd; /* out: device handle */ | ||
| 829 | __u32 flags; /* in: KVM_CREATE_DEVICE_xxx */ | ||
| 830 | }; | ||
| 831 | |||
| 832 | struct kvm_device_attr { | ||
| 833 | __u32 flags; /* no flags currently defined */ | ||
| 834 | __u32 group; /* device-defined */ | ||
| 835 | __u64 attr; /* group-defined */ | ||
| 836 | __u64 addr; /* userspace address of attr data */ | ||
| 837 | }; | ||
| 838 | |||
| 839 | #define KVM_DEV_TYPE_FSL_MPIC_20 1 | ||
| 840 | #define KVM_DEV_TYPE_FSL_MPIC_42 2 | ||
| 841 | #define KVM_DEV_TYPE_XICS 3 | ||
| 842 | |||
| 843 | /* | ||
| 821 | * ioctls for VM fds | 844 | * ioctls for VM fds |
| 822 | */ | 845 | */ |
| 823 | #define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) | 846 | #define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) |
| @@ -904,6 +927,16 @@ struct kvm_s390_ucas_mapping { | |||
| 904 | #define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd) | 927 | #define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd) |
| 905 | /* Available with KVM_CAP_ARM_SET_DEVICE_ADDR */ | 928 | /* Available with KVM_CAP_ARM_SET_DEVICE_ADDR */ |
| 906 | #define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr) | 929 | #define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr) |
| 930 | /* Available with KVM_CAP_PPC_RTAS */ | ||
| 931 | #define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args) | ||
| 932 | |||
| 933 | /* ioctl for vm fd */ | ||
| 934 | #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) | ||
| 935 | |||
| 936 | /* ioctls for fds returned by KVM_CREATE_DEVICE */ | ||
| 937 | #define KVM_SET_DEVICE_ATTR _IOW(KVMIO, 0xe1, struct kvm_device_attr) | ||
| 938 | #define KVM_GET_DEVICE_ATTR _IOW(KVMIO, 0xe2, struct kvm_device_attr) | ||
| 939 | #define KVM_HAS_DEVICE_ATTR _IOW(KVMIO, 0xe3, struct kvm_device_attr) | ||
| 907 | 940 | ||
| 908 | /* | 941 | /* |
| 909 | * ioctls for vcpu fds | 942 | * ioctls for vcpu fds |
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index d01b24b72c61..779262f59e25 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig | |||
| @@ -6,6 +6,9 @@ config HAVE_KVM | |||
| 6 | config HAVE_KVM_IRQCHIP | 6 | config HAVE_KVM_IRQCHIP |
| 7 | bool | 7 | bool |
| 8 | 8 | ||
| 9 | config HAVE_KVM_IRQ_ROUTING | ||
| 10 | bool | ||
| 11 | |||
| 9 | config HAVE_KVM_EVENTFD | 12 | config HAVE_KVM_EVENTFD |
| 10 | bool | 13 | bool |
| 11 | select EVENTFD | 14 | select EVENTFD |
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 3642239252b0..8db43701016f 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c | |||
| @@ -80,11 +80,12 @@ kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev, | |||
| 80 | spin_lock(&assigned_dev->intx_mask_lock); | 80 | spin_lock(&assigned_dev->intx_mask_lock); |
| 81 | if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) | 81 | if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) |
| 82 | kvm_set_irq(assigned_dev->kvm, | 82 | kvm_set_irq(assigned_dev->kvm, |
| 83 | assigned_dev->irq_source_id, vector, 1); | 83 | assigned_dev->irq_source_id, vector, 1, |
| 84 | false); | ||
| 84 | spin_unlock(&assigned_dev->intx_mask_lock); | 85 | spin_unlock(&assigned_dev->intx_mask_lock); |
| 85 | } else | 86 | } else |
| 86 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | 87 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, |
| 87 | vector, 1); | 88 | vector, 1, false); |
| 88 | } | 89 | } |
| 89 | 90 | ||
| 90 | static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) | 91 | static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) |
| @@ -165,7 +166,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | |||
| 165 | container_of(kian, struct kvm_assigned_dev_kernel, | 166 | container_of(kian, struct kvm_assigned_dev_kernel, |
| 166 | ack_notifier); | 167 | ack_notifier); |
| 167 | 168 | ||
| 168 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); | 169 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false); |
| 169 | 170 | ||
| 170 | spin_lock(&dev->intx_mask_lock); | 171 | spin_lock(&dev->intx_mask_lock); |
| 171 | 172 | ||
| @@ -188,7 +189,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | |||
| 188 | 189 | ||
| 189 | if (reassert) | 190 | if (reassert) |
| 190 | kvm_set_irq(dev->kvm, dev->irq_source_id, | 191 | kvm_set_irq(dev->kvm, dev->irq_source_id, |
| 191 | dev->guest_irq, 1); | 192 | dev->guest_irq, 1, false); |
| 192 | } | 193 | } |
| 193 | 194 | ||
| 194 | spin_unlock(&dev->intx_mask_lock); | 195 | spin_unlock(&dev->intx_mask_lock); |
| @@ -202,7 +203,7 @@ static void deassign_guest_irq(struct kvm *kvm, | |||
| 202 | &assigned_dev->ack_notifier); | 203 | &assigned_dev->ack_notifier); |
| 203 | 204 | ||
| 204 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | 205 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, |
| 205 | assigned_dev->guest_irq, 0); | 206 | assigned_dev->guest_irq, 0, false); |
| 206 | 207 | ||
| 207 | if (assigned_dev->irq_source_id != -1) | 208 | if (assigned_dev->irq_source_id != -1) |
| 208 | kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); | 209 | kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); |
| @@ -901,7 +902,7 @@ static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, | |||
| 901 | if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { | 902 | if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { |
| 902 | if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) { | 903 | if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) { |
| 903 | kvm_set_irq(match->kvm, match->irq_source_id, | 904 | kvm_set_irq(match->kvm, match->irq_source_id, |
| 904 | match->guest_irq, 0); | 905 | match->guest_irq, 0, false); |
| 905 | /* | 906 | /* |
| 906 | * Masking at hardware-level is performed on demand, | 907 | * Masking at hardware-level is performed on demand, |
| 907 | * i.e. when an IRQ actually arrives at the host. | 908 | * i.e. when an IRQ actually arrives at the host. |
| @@ -982,36 +983,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | |||
| 982 | goto out; | 983 | goto out; |
| 983 | break; | 984 | break; |
| 984 | } | 985 | } |
| 985 | #ifdef KVM_CAP_IRQ_ROUTING | ||
| 986 | case KVM_SET_GSI_ROUTING: { | ||
| 987 | struct kvm_irq_routing routing; | ||
| 988 | struct kvm_irq_routing __user *urouting; | ||
| 989 | struct kvm_irq_routing_entry *entries; | ||
| 990 | |||
| 991 | r = -EFAULT; | ||
| 992 | if (copy_from_user(&routing, argp, sizeof(routing))) | ||
| 993 | goto out; | ||
| 994 | r = -EINVAL; | ||
| 995 | if (routing.nr >= KVM_MAX_IRQ_ROUTES) | ||
| 996 | goto out; | ||
| 997 | if (routing.flags) | ||
| 998 | goto out; | ||
| 999 | r = -ENOMEM; | ||
| 1000 | entries = vmalloc(routing.nr * sizeof(*entries)); | ||
| 1001 | if (!entries) | ||
| 1002 | goto out; | ||
| 1003 | r = -EFAULT; | ||
| 1004 | urouting = argp; | ||
| 1005 | if (copy_from_user(entries, urouting->entries, | ||
| 1006 | routing.nr * sizeof(*entries))) | ||
| 1007 | goto out_free_irq_routing; | ||
| 1008 | r = kvm_set_irq_routing(kvm, entries, routing.nr, | ||
| 1009 | routing.flags); | ||
| 1010 | out_free_irq_routing: | ||
| 1011 | vfree(entries); | ||
| 1012 | break; | ||
| 1013 | } | ||
| 1014 | #endif /* KVM_CAP_IRQ_ROUTING */ | ||
| 1015 | #ifdef __KVM_HAVE_MSIX | 986 | #ifdef __KVM_HAVE_MSIX |
| 1016 | case KVM_ASSIGN_SET_MSIX_NR: { | 987 | case KVM_ASSIGN_SET_MSIX_NR: { |
| 1017 | struct kvm_assigned_msix_nr entry_nr; | 988 | struct kvm_assigned_msix_nr entry_nr; |
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index adb17f266b28..64ee720b75c7 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
| @@ -35,7 +35,7 @@ | |||
| 35 | 35 | ||
| 36 | #include "iodev.h" | 36 | #include "iodev.h" |
| 37 | 37 | ||
| 38 | #ifdef __KVM_HAVE_IOAPIC | 38 | #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING |
| 39 | /* | 39 | /* |
| 40 | * -------------------------------------------------------------------- | 40 | * -------------------------------------------------------------------- |
| 41 | * irqfd: Allows an fd to be used to inject an interrupt to the guest | 41 | * irqfd: Allows an fd to be used to inject an interrupt to the guest |
| @@ -100,11 +100,13 @@ irqfd_inject(struct work_struct *work) | |||
| 100 | struct kvm *kvm = irqfd->kvm; | 100 | struct kvm *kvm = irqfd->kvm; |
| 101 | 101 | ||
| 102 | if (!irqfd->resampler) { | 102 | if (!irqfd->resampler) { |
| 103 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); | 103 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1, |
| 104 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); | 104 | false); |
| 105 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0, | ||
| 106 | false); | ||
| 105 | } else | 107 | } else |
| 106 | kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, | 108 | kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, |
| 107 | irqfd->gsi, 1); | 109 | irqfd->gsi, 1, false); |
| 108 | } | 110 | } |
| 109 | 111 | ||
| 110 | /* | 112 | /* |
| @@ -121,7 +123,7 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) | |||
| 121 | resampler = container_of(kian, struct _irqfd_resampler, notifier); | 123 | resampler = container_of(kian, struct _irqfd_resampler, notifier); |
| 122 | 124 | ||
| 123 | kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, | 125 | kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, |
| 124 | resampler->notifier.gsi, 0); | 126 | resampler->notifier.gsi, 0, false); |
| 125 | 127 | ||
| 126 | rcu_read_lock(); | 128 | rcu_read_lock(); |
| 127 | 129 | ||
| @@ -146,7 +148,7 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd) | |||
| 146 | list_del(&resampler->link); | 148 | list_del(&resampler->link); |
| 147 | kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier); | 149 | kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier); |
| 148 | kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, | 150 | kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, |
| 149 | resampler->notifier.gsi, 0); | 151 | resampler->notifier.gsi, 0, false); |
| 150 | kfree(resampler); | 152 | kfree(resampler); |
| 151 | } | 153 | } |
| 152 | 154 | ||
| @@ -225,7 +227,8 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) | |||
| 225 | irq = rcu_dereference(irqfd->irq_entry); | 227 | irq = rcu_dereference(irqfd->irq_entry); |
| 226 | /* An event has been signaled, inject an interrupt */ | 228 | /* An event has been signaled, inject an interrupt */ |
| 227 | if (irq) | 229 | if (irq) |
| 228 | kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1); | 230 | kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, |
| 231 | false); | ||
| 229 | else | 232 | else |
| 230 | schedule_work(&irqfd->inject); | 233 | schedule_work(&irqfd->inject); |
| 231 | rcu_read_unlock(); | 234 | rcu_read_unlock(); |
| @@ -430,7 +433,7 @@ fail: | |||
| 430 | void | 433 | void |
| 431 | kvm_eventfd_init(struct kvm *kvm) | 434 | kvm_eventfd_init(struct kvm *kvm) |
| 432 | { | 435 | { |
| 433 | #ifdef __KVM_HAVE_IOAPIC | 436 | #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING |
| 434 | spin_lock_init(&kvm->irqfds.lock); | 437 | spin_lock_init(&kvm->irqfds.lock); |
| 435 | INIT_LIST_HEAD(&kvm->irqfds.items); | 438 | INIT_LIST_HEAD(&kvm->irqfds.items); |
| 436 | INIT_LIST_HEAD(&kvm->irqfds.resampler_list); | 439 | INIT_LIST_HEAD(&kvm->irqfds.resampler_list); |
| @@ -439,7 +442,7 @@ kvm_eventfd_init(struct kvm *kvm) | |||
| 439 | INIT_LIST_HEAD(&kvm->ioeventfds); | 442 | INIT_LIST_HEAD(&kvm->ioeventfds); |
| 440 | } | 443 | } |
| 441 | 444 | ||
| 442 | #ifdef __KVM_HAVE_IOAPIC | 445 | #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING |
| 443 | /* | 446 | /* |
| 444 | * shutdown any irqfd's that match fd+gsi | 447 | * shutdown any irqfd's that match fd+gsi |
| 445 | */ | 448 | */ |
| @@ -543,7 +546,7 @@ void kvm_irq_routing_update(struct kvm *kvm, | |||
| 543 | * aggregated from all vm* instances. We need our own isolated single-thread | 546 | * aggregated from all vm* instances. We need our own isolated single-thread |
| 544 | * queue to prevent deadlock against flushing the normal work-queue. | 547 | * queue to prevent deadlock against flushing the normal work-queue. |
| 545 | */ | 548 | */ |
| 546 | static int __init irqfd_module_init(void) | 549 | int kvm_irqfd_init(void) |
| 547 | { | 550 | { |
| 548 | irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup"); | 551 | irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup"); |
| 549 | if (!irqfd_cleanup_wq) | 552 | if (!irqfd_cleanup_wq) |
| @@ -552,13 +555,10 @@ static int __init irqfd_module_init(void) | |||
| 552 | return 0; | 555 | return 0; |
| 553 | } | 556 | } |
| 554 | 557 | ||
| 555 | static void __exit irqfd_module_exit(void) | 558 | void kvm_irqfd_exit(void) |
| 556 | { | 559 | { |
| 557 | destroy_workqueue(irqfd_cleanup_wq); | 560 | destroy_workqueue(irqfd_cleanup_wq); |
| 558 | } | 561 | } |
| 559 | |||
| 560 | module_init(irqfd_module_init); | ||
| 561 | module_exit(irqfd_module_exit); | ||
| 562 | #endif | 562 | #endif |
| 563 | 563 | ||
| 564 | /* | 564 | /* |
| @@ -577,6 +577,7 @@ struct _ioeventfd { | |||
| 577 | struct eventfd_ctx *eventfd; | 577 | struct eventfd_ctx *eventfd; |
| 578 | u64 datamatch; | 578 | u64 datamatch; |
| 579 | struct kvm_io_device dev; | 579 | struct kvm_io_device dev; |
| 580 | u8 bus_idx; | ||
| 580 | bool wildcard; | 581 | bool wildcard; |
| 581 | }; | 582 | }; |
| 582 | 583 | ||
| @@ -669,7 +670,8 @@ ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) | |||
| 669 | struct _ioeventfd *_p; | 670 | struct _ioeventfd *_p; |
| 670 | 671 | ||
| 671 | list_for_each_entry(_p, &kvm->ioeventfds, list) | 672 | list_for_each_entry(_p, &kvm->ioeventfds, list) |
| 672 | if (_p->addr == p->addr && _p->length == p->length && | 673 | if (_p->bus_idx == p->bus_idx && |
| 674 | _p->addr == p->addr && _p->length == p->length && | ||
| 673 | (_p->wildcard || p->wildcard || | 675 | (_p->wildcard || p->wildcard || |
| 674 | _p->datamatch == p->datamatch)) | 676 | _p->datamatch == p->datamatch)) |
| 675 | return true; | 677 | return true; |
| @@ -677,15 +679,24 @@ ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) | |||
| 677 | return false; | 679 | return false; |
| 678 | } | 680 | } |
| 679 | 681 | ||
| 682 | static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags) | ||
| 683 | { | ||
| 684 | if (flags & KVM_IOEVENTFD_FLAG_PIO) | ||
| 685 | return KVM_PIO_BUS; | ||
| 686 | if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY) | ||
| 687 | return KVM_VIRTIO_CCW_NOTIFY_BUS; | ||
| 688 | return KVM_MMIO_BUS; | ||
| 689 | } | ||
| 690 | |||
| 680 | static int | 691 | static int |
| 681 | kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | 692 | kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) |
| 682 | { | 693 | { |
| 683 | int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; | 694 | enum kvm_bus bus_idx; |
| 684 | enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; | ||
| 685 | struct _ioeventfd *p; | 695 | struct _ioeventfd *p; |
| 686 | struct eventfd_ctx *eventfd; | 696 | struct eventfd_ctx *eventfd; |
| 687 | int ret; | 697 | int ret; |
| 688 | 698 | ||
| 699 | bus_idx = ioeventfd_bus_from_flags(args->flags); | ||
| 689 | /* must be natural-word sized */ | 700 | /* must be natural-word sized */ |
| 690 | switch (args->len) { | 701 | switch (args->len) { |
| 691 | case 1: | 702 | case 1: |
| @@ -717,6 +728,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | |||
| 717 | 728 | ||
| 718 | INIT_LIST_HEAD(&p->list); | 729 | INIT_LIST_HEAD(&p->list); |
| 719 | p->addr = args->addr; | 730 | p->addr = args->addr; |
| 731 | p->bus_idx = bus_idx; | ||
| 720 | p->length = args->len; | 732 | p->length = args->len; |
| 721 | p->eventfd = eventfd; | 733 | p->eventfd = eventfd; |
| 722 | 734 | ||
| @@ -760,12 +772,12 @@ fail: | |||
| 760 | static int | 772 | static int |
| 761 | kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | 773 | kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) |
| 762 | { | 774 | { |
| 763 | int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; | 775 | enum kvm_bus bus_idx; |
| 764 | enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; | ||
| 765 | struct _ioeventfd *p, *tmp; | 776 | struct _ioeventfd *p, *tmp; |
| 766 | struct eventfd_ctx *eventfd; | 777 | struct eventfd_ctx *eventfd; |
| 767 | int ret = -ENOENT; | 778 | int ret = -ENOENT; |
| 768 | 779 | ||
| 780 | bus_idx = ioeventfd_bus_from_flags(args->flags); | ||
| 769 | eventfd = eventfd_ctx_fdget(args->fd); | 781 | eventfd = eventfd_ctx_fdget(args->fd); |
| 770 | if (IS_ERR(eventfd)) | 782 | if (IS_ERR(eventfd)) |
| 771 | return PTR_ERR(eventfd); | 783 | return PTR_ERR(eventfd); |
| @@ -775,7 +787,8 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | |||
| 775 | list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { | 787 | list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { |
| 776 | bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); | 788 | bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); |
| 777 | 789 | ||
| 778 | if (p->eventfd != eventfd || | 790 | if (p->bus_idx != bus_idx || |
| 791 | p->eventfd != eventfd || | ||
| 779 | p->addr != args->addr || | 792 | p->addr != args->addr || |
| 780 | p->length != args->len || | 793 | p->length != args->len || |
| 781 | p->wildcard != wildcard) | 794 | p->wildcard != wildcard) |
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 5ba005c00e2f..2d682977ce82 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c | |||
| @@ -50,7 +50,8 @@ | |||
| 50 | #else | 50 | #else |
| 51 | #define ioapic_debug(fmt, arg...) | 51 | #define ioapic_debug(fmt, arg...) |
| 52 | #endif | 52 | #endif |
| 53 | static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq); | 53 | static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq, |
| 54 | bool line_status); | ||
| 54 | 55 | ||
| 55 | static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, | 56 | static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, |
| 56 | unsigned long addr, | 57 | unsigned long addr, |
| @@ -90,7 +91,80 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, | |||
| 90 | return result; | 91 | return result; |
| 91 | } | 92 | } |
| 92 | 93 | ||
| 93 | static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) | 94 | static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) |
| 95 | { | ||
| 96 | ioapic->rtc_status.pending_eoi = 0; | ||
| 97 | bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS); | ||
| 98 | } | ||
| 99 | |||
| 100 | static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) | ||
| 101 | { | ||
| 102 | bool new_val, old_val; | ||
| 103 | struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; | ||
| 104 | union kvm_ioapic_redirect_entry *e; | ||
| 105 | |||
| 106 | e = &ioapic->redirtbl[RTC_GSI]; | ||
| 107 | if (!kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id, | ||
| 108 | e->fields.dest_mode)) | ||
| 109 | return; | ||
| 110 | |||
| 111 | new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector); | ||
| 112 | old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); | ||
| 113 | |||
| 114 | if (new_val == old_val) | ||
| 115 | return; | ||
| 116 | |||
| 117 | if (new_val) { | ||
| 118 | __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); | ||
| 119 | ioapic->rtc_status.pending_eoi++; | ||
| 120 | } else { | ||
| 121 | __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); | ||
| 122 | ioapic->rtc_status.pending_eoi--; | ||
| 123 | } | ||
| 124 | |||
| 125 | WARN_ON(ioapic->rtc_status.pending_eoi < 0); | ||
| 126 | } | ||
| 127 | |||
| 128 | void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) | ||
| 129 | { | ||
| 130 | struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; | ||
| 131 | |||
| 132 | spin_lock(&ioapic->lock); | ||
| 133 | __rtc_irq_eoi_tracking_restore_one(vcpu); | ||
| 134 | spin_unlock(&ioapic->lock); | ||
| 135 | } | ||
| 136 | |||
| 137 | static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic) | ||
| 138 | { | ||
| 139 | struct kvm_vcpu *vcpu; | ||
| 140 | int i; | ||
| 141 | |||
| 142 | if (RTC_GSI >= IOAPIC_NUM_PINS) | ||
| 143 | return; | ||
| 144 | |||
| 145 | rtc_irq_eoi_tracking_reset(ioapic); | ||
| 146 | kvm_for_each_vcpu(i, vcpu, ioapic->kvm) | ||
| 147 | __rtc_irq_eoi_tracking_restore_one(vcpu); | ||
| 148 | } | ||
| 149 | |||
| 150 | static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu) | ||
| 151 | { | ||
| 152 | if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) | ||
| 153 | --ioapic->rtc_status.pending_eoi; | ||
| 154 | |||
| 155 | WARN_ON(ioapic->rtc_status.pending_eoi < 0); | ||
| 156 | } | ||
| 157 | |||
| 158 | static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic) | ||
| 159 | { | ||
| 160 | if (ioapic->rtc_status.pending_eoi > 0) | ||
| 161 | return true; /* coalesced */ | ||
| 162 | |||
| 163 | return false; | ||
| 164 | } | ||
| 165 | |||
| 166 | static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx, | ||
| 167 | bool line_status) | ||
| 94 | { | 168 | { |
| 95 | union kvm_ioapic_redirect_entry *pent; | 169 | union kvm_ioapic_redirect_entry *pent; |
| 96 | int injected = -1; | 170 | int injected = -1; |
| @@ -98,7 +172,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) | |||
| 98 | pent = &ioapic->redirtbl[idx]; | 172 | pent = &ioapic->redirtbl[idx]; |
| 99 | 173 | ||
| 100 | if (!pent->fields.mask) { | 174 | if (!pent->fields.mask) { |
| 101 | injected = ioapic_deliver(ioapic, idx); | 175 | injected = ioapic_deliver(ioapic, idx, line_status); |
| 102 | if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) | 176 | if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) |
| 103 | pent->fields.remote_irr = 1; | 177 | pent->fields.remote_irr = 1; |
| 104 | } | 178 | } |
| @@ -119,41 +193,48 @@ static void update_handled_vectors(struct kvm_ioapic *ioapic) | |||
| 119 | smp_wmb(); | 193 | smp_wmb(); |
| 120 | } | 194 | } |
| 121 | 195 | ||
| 122 | void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, | 196 | void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, |
| 123 | u64 *eoi_exit_bitmap) | 197 | u32 *tmr) |
| 124 | { | 198 | { |
| 125 | struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; | 199 | struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; |
| 126 | union kvm_ioapic_redirect_entry *e; | 200 | union kvm_ioapic_redirect_entry *e; |
| 127 | struct kvm_lapic_irq irqe; | ||
| 128 | int index; | 201 | int index; |
| 129 | 202 | ||
| 130 | spin_lock(&ioapic->lock); | 203 | spin_lock(&ioapic->lock); |
| 131 | /* traverse ioapic entry to set eoi exit bitmap*/ | ||
| 132 | for (index = 0; index < IOAPIC_NUM_PINS; index++) { | 204 | for (index = 0; index < IOAPIC_NUM_PINS; index++) { |
| 133 | e = &ioapic->redirtbl[index]; | 205 | e = &ioapic->redirtbl[index]; |
| 134 | if (!e->fields.mask && | 206 | if (!e->fields.mask && |
| 135 | (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || | 207 | (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || |
| 136 | kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, | 208 | kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, |
| 137 | index))) { | 209 | index) || index == RTC_GSI)) { |
| 138 | irqe.dest_id = e->fields.dest_id; | 210 | if (kvm_apic_match_dest(vcpu, NULL, 0, |
| 139 | irqe.vector = e->fields.vector; | 211 | e->fields.dest_id, e->fields.dest_mode)) { |
| 140 | irqe.dest_mode = e->fields.dest_mode; | 212 | __set_bit(e->fields.vector, |
| 141 | irqe.delivery_mode = e->fields.delivery_mode << 8; | 213 | (unsigned long *)eoi_exit_bitmap); |
| 142 | kvm_calculate_eoi_exitmap(vcpu, &irqe, eoi_exit_bitmap); | 214 | if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG) |
| 215 | __set_bit(e->fields.vector, | ||
| 216 | (unsigned long *)tmr); | ||
| 217 | } | ||
| 143 | } | 218 | } |
| 144 | } | 219 | } |
| 145 | spin_unlock(&ioapic->lock); | 220 | spin_unlock(&ioapic->lock); |
| 146 | } | 221 | } |
| 147 | EXPORT_SYMBOL_GPL(kvm_ioapic_calculate_eoi_exitmap); | ||
| 148 | 222 | ||
| 149 | void kvm_ioapic_make_eoibitmap_request(struct kvm *kvm) | 223 | #ifdef CONFIG_X86 |
| 224 | void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) | ||
| 150 | { | 225 | { |
| 151 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | 226 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
| 152 | 227 | ||
| 153 | if (!kvm_apic_vid_enabled(kvm) || !ioapic) | 228 | if (!ioapic) |
| 154 | return; | 229 | return; |
| 155 | kvm_make_update_eoibitmap_request(kvm); | 230 | kvm_make_scan_ioapic_request(kvm); |
| 156 | } | 231 | } |
| 232 | #else | ||
| 233 | void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) | ||
| 234 | { | ||
| 235 | return; | ||
| 236 | } | ||
| 237 | #endif | ||
| 157 | 238 | ||
| 158 | static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) | 239 | static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) |
| 159 | { | 240 | { |
| @@ -195,16 +276,17 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) | |||
| 195 | kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); | 276 | kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); |
| 196 | if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG | 277 | if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG |
| 197 | && ioapic->irr & (1 << index)) | 278 | && ioapic->irr & (1 << index)) |
| 198 | ioapic_service(ioapic, index); | 279 | ioapic_service(ioapic, index, false); |
| 199 | kvm_ioapic_make_eoibitmap_request(ioapic->kvm); | 280 | kvm_vcpu_request_scan_ioapic(ioapic->kvm); |
| 200 | break; | 281 | break; |
| 201 | } | 282 | } |
| 202 | } | 283 | } |
| 203 | 284 | ||
| 204 | static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) | 285 | static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status) |
| 205 | { | 286 | { |
| 206 | union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; | 287 | union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; |
| 207 | struct kvm_lapic_irq irqe; | 288 | struct kvm_lapic_irq irqe; |
| 289 | int ret; | ||
| 208 | 290 | ||
| 209 | ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " | 291 | ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " |
| 210 | "vector=%x trig_mode=%x\n", | 292 | "vector=%x trig_mode=%x\n", |
| @@ -220,11 +302,19 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) | |||
| 220 | irqe.level = 1; | 302 | irqe.level = 1; |
| 221 | irqe.shorthand = 0; | 303 | irqe.shorthand = 0; |
| 222 | 304 | ||
| 223 | return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); | 305 | if (irq == RTC_GSI && line_status) { |
| 306 | BUG_ON(ioapic->rtc_status.pending_eoi != 0); | ||
| 307 | ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, | ||
| 308 | ioapic->rtc_status.dest_map); | ||
| 309 | ioapic->rtc_status.pending_eoi = ret; | ||
| 310 | } else | ||
| 311 | ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); | ||
| 312 | |||
| 313 | return ret; | ||
| 224 | } | 314 | } |
| 225 | 315 | ||
| 226 | int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, | 316 | int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, |
| 227 | int level) | 317 | int level, bool line_status) |
| 228 | { | 318 | { |
| 229 | u32 old_irr; | 319 | u32 old_irr; |
| 230 | u32 mask = 1 << irq; | 320 | u32 mask = 1 << irq; |
| @@ -244,13 +334,20 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, | |||
| 244 | ret = 1; | 334 | ret = 1; |
| 245 | } else { | 335 | } else { |
| 246 | int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); | 336 | int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); |
| 337 | |||
| 338 | if (irq == RTC_GSI && line_status && | ||
| 339 | rtc_irq_check_coalesced(ioapic)) { | ||
| 340 | ret = 0; /* coalesced */ | ||
| 341 | goto out; | ||
| 342 | } | ||
| 247 | ioapic->irr |= mask; | 343 | ioapic->irr |= mask; |
| 248 | if ((edge && old_irr != ioapic->irr) || | 344 | if ((edge && old_irr != ioapic->irr) || |
| 249 | (!edge && !entry.fields.remote_irr)) | 345 | (!edge && !entry.fields.remote_irr)) |
| 250 | ret = ioapic_service(ioapic, irq); | 346 | ret = ioapic_service(ioapic, irq, line_status); |
| 251 | else | 347 | else |
| 252 | ret = 0; /* report coalesced interrupt */ | 348 | ret = 0; /* report coalesced interrupt */ |
| 253 | } | 349 | } |
| 350 | out: | ||
| 254 | trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); | 351 | trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); |
| 255 | spin_unlock(&ioapic->lock); | 352 | spin_unlock(&ioapic->lock); |
| 256 | 353 | ||
| @@ -267,8 +364,8 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id) | |||
| 267 | spin_unlock(&ioapic->lock); | 364 | spin_unlock(&ioapic->lock); |
| 268 | } | 365 | } |
| 269 | 366 | ||
| 270 | static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, | 367 | static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, |
| 271 | int trigger_mode) | 368 | struct kvm_ioapic *ioapic, int vector, int trigger_mode) |
| 272 | { | 369 | { |
| 273 | int i; | 370 | int i; |
| 274 | 371 | ||
| @@ -278,6 +375,8 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, | |||
| 278 | if (ent->fields.vector != vector) | 375 | if (ent->fields.vector != vector) |
| 279 | continue; | 376 | continue; |
| 280 | 377 | ||
| 378 | if (i == RTC_GSI) | ||
| 379 | rtc_irq_eoi(ioapic, vcpu); | ||
| 281 | /* | 380 | /* |
| 282 | * We are dropping lock while calling ack notifiers because ack | 381 | * We are dropping lock while calling ack notifiers because ack |
| 283 | * notifier callbacks for assigned devices call into IOAPIC | 382 | * notifier callbacks for assigned devices call into IOAPIC |
| @@ -296,7 +395,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, | |||
| 296 | ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); | 395 | ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); |
| 297 | ent->fields.remote_irr = 0; | 396 | ent->fields.remote_irr = 0; |
| 298 | if (!ent->fields.mask && (ioapic->irr & (1 << i))) | 397 | if (!ent->fields.mask && (ioapic->irr & (1 << i))) |
| 299 | ioapic_service(ioapic, i); | 398 | ioapic_service(ioapic, i, false); |
| 300 | } | 399 | } |
| 301 | } | 400 | } |
| 302 | 401 | ||
| @@ -307,12 +406,12 @@ bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector) | |||
| 307 | return test_bit(vector, ioapic->handled_vectors); | 406 | return test_bit(vector, ioapic->handled_vectors); |
| 308 | } | 407 | } |
| 309 | 408 | ||
| 310 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) | 409 | void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode) |
| 311 | { | 410 | { |
| 312 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | 411 | struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; |
| 313 | 412 | ||
| 314 | spin_lock(&ioapic->lock); | 413 | spin_lock(&ioapic->lock); |
| 315 | __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); | 414 | __kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode); |
| 316 | spin_unlock(&ioapic->lock); | 415 | spin_unlock(&ioapic->lock); |
| 317 | } | 416 | } |
| 318 | 417 | ||
| @@ -410,7 +509,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, | |||
| 410 | break; | 509 | break; |
| 411 | #ifdef CONFIG_IA64 | 510 | #ifdef CONFIG_IA64 |
| 412 | case IOAPIC_REG_EOI: | 511 | case IOAPIC_REG_EOI: |
| 413 | __kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG); | 512 | __kvm_ioapic_update_eoi(NULL, ioapic, data, IOAPIC_LEVEL_TRIG); |
| 414 | break; | 513 | break; |
| 415 | #endif | 514 | #endif |
| 416 | 515 | ||
| @@ -431,6 +530,7 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic) | |||
| 431 | ioapic->ioregsel = 0; | 530 | ioapic->ioregsel = 0; |
| 432 | ioapic->irr = 0; | 531 | ioapic->irr = 0; |
| 433 | ioapic->id = 0; | 532 | ioapic->id = 0; |
| 533 | rtc_irq_eoi_tracking_reset(ioapic); | ||
| 434 | update_handled_vectors(ioapic); | 534 | update_handled_vectors(ioapic); |
| 435 | } | 535 | } |
| 436 | 536 | ||
| @@ -496,7 +596,8 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) | |||
| 496 | spin_lock(&ioapic->lock); | 596 | spin_lock(&ioapic->lock); |
| 497 | memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); | 597 | memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); |
| 498 | update_handled_vectors(ioapic); | 598 | update_handled_vectors(ioapic); |
| 499 | kvm_ioapic_make_eoibitmap_request(kvm); | 599 | kvm_vcpu_request_scan_ioapic(kvm); |
| 600 | kvm_rtc_eoi_tracking_restore_all(ioapic); | ||
| 500 | spin_unlock(&ioapic->lock); | 601 | spin_unlock(&ioapic->lock); |
| 501 | return 0; | 602 | return 0; |
| 502 | } | 603 | } |
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 0400a466c50c..615d8c995c3c 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h | |||
| @@ -34,6 +34,17 @@ struct kvm_vcpu; | |||
| 34 | #define IOAPIC_INIT 0x5 | 34 | #define IOAPIC_INIT 0x5 |
| 35 | #define IOAPIC_EXTINT 0x7 | 35 | #define IOAPIC_EXTINT 0x7 |
| 36 | 36 | ||
| 37 | #ifdef CONFIG_X86 | ||
| 38 | #define RTC_GSI 8 | ||
| 39 | #else | ||
| 40 | #define RTC_GSI -1U | ||
| 41 | #endif | ||
| 42 | |||
| 43 | struct rtc_status { | ||
| 44 | int pending_eoi; | ||
| 45 | DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS); | ||
| 46 | }; | ||
| 47 | |||
| 37 | struct kvm_ioapic { | 48 | struct kvm_ioapic { |
| 38 | u64 base_address; | 49 | u64 base_address; |
| 39 | u32 ioregsel; | 50 | u32 ioregsel; |
| @@ -47,6 +58,7 @@ struct kvm_ioapic { | |||
| 47 | void (*ack_notifier)(void *opaque, int irq); | 58 | void (*ack_notifier)(void *opaque, int irq); |
| 48 | spinlock_t lock; | 59 | spinlock_t lock; |
| 49 | DECLARE_BITMAP(handled_vectors, 256); | 60 | DECLARE_BITMAP(handled_vectors, 256); |
| 61 | struct rtc_status rtc_status; | ||
| 50 | }; | 62 | }; |
| 51 | 63 | ||
| 52 | #ifdef DEBUG | 64 | #ifdef DEBUG |
| @@ -67,24 +79,25 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) | |||
| 67 | return kvm->arch.vioapic; | 79 | return kvm->arch.vioapic; |
| 68 | } | 80 | } |
| 69 | 81 | ||
| 82 | void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); | ||
| 70 | int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | 83 | int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, |
| 71 | int short_hand, int dest, int dest_mode); | 84 | int short_hand, int dest, int dest_mode); |
| 72 | int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); | 85 | int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); |
| 73 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); | 86 | void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, |
| 87 | int trigger_mode); | ||
| 74 | bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector); | 88 | bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector); |
| 75 | int kvm_ioapic_init(struct kvm *kvm); | 89 | int kvm_ioapic_init(struct kvm *kvm); |
| 76 | void kvm_ioapic_destroy(struct kvm *kvm); | 90 | void kvm_ioapic_destroy(struct kvm *kvm); |
| 77 | int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, | 91 | int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, |
| 78 | int level); | 92 | int level, bool line_status); |
| 79 | void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); | 93 | void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); |
| 80 | void kvm_ioapic_reset(struct kvm_ioapic *ioapic); | 94 | void kvm_ioapic_reset(struct kvm_ioapic *ioapic); |
| 81 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | 95 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, |
| 82 | struct kvm_lapic_irq *irq); | 96 | struct kvm_lapic_irq *irq, unsigned long *dest_map); |
| 83 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); | 97 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); |
| 84 | int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); | 98 | int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); |
| 85 | void kvm_ioapic_make_eoibitmap_request(struct kvm *kvm); | 99 | void kvm_vcpu_request_scan_ioapic(struct kvm *kvm); |
| 86 | void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, | 100 | void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, |
| 87 | u64 *eoi_exit_bitmap); | 101 | u32 *tmr); |
| 88 | |||
| 89 | 102 | ||
| 90 | #endif | 103 | #endif |
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index e9073cf4d040..e2e6b4473a96 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c | |||
| @@ -35,7 +35,8 @@ | |||
| 35 | #include "ioapic.h" | 35 | #include "ioapic.h" |
| 36 | 36 | ||
| 37 | static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, | 37 | static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, |
| 38 | struct kvm *kvm, int irq_source_id, int level) | 38 | struct kvm *kvm, int irq_source_id, int level, |
| 39 | bool line_status) | ||
| 39 | { | 40 | { |
| 40 | #ifdef CONFIG_X86 | 41 | #ifdef CONFIG_X86 |
| 41 | struct kvm_pic *pic = pic_irqchip(kvm); | 42 | struct kvm_pic *pic = pic_irqchip(kvm); |
| @@ -46,10 +47,12 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, | |||
| 46 | } | 47 | } |
| 47 | 48 | ||
| 48 | static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, | 49 | static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, |
| 49 | struct kvm *kvm, int irq_source_id, int level) | 50 | struct kvm *kvm, int irq_source_id, int level, |
| 51 | bool line_status) | ||
| 50 | { | 52 | { |
| 51 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | 53 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
| 52 | return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level); | 54 | return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level, |
| 55 | line_status); | ||
| 53 | } | 56 | } |
| 54 | 57 | ||
| 55 | inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) | 58 | inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) |
| @@ -63,7 +66,7 @@ inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) | |||
| 63 | } | 66 | } |
| 64 | 67 | ||
| 65 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | 68 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, |
| 66 | struct kvm_lapic_irq *irq) | 69 | struct kvm_lapic_irq *irq, unsigned long *dest_map) |
| 67 | { | 70 | { |
| 68 | int i, r = -1; | 71 | int i, r = -1; |
| 69 | struct kvm_vcpu *vcpu, *lowest = NULL; | 72 | struct kvm_vcpu *vcpu, *lowest = NULL; |
| @@ -74,7 +77,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
| 74 | irq->delivery_mode = APIC_DM_FIXED; | 77 | irq->delivery_mode = APIC_DM_FIXED; |
| 75 | } | 78 | } |
| 76 | 79 | ||
| 77 | if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r)) | 80 | if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map)) |
| 78 | return r; | 81 | return r; |
| 79 | 82 | ||
| 80 | kvm_for_each_vcpu(i, vcpu, kvm) { | 83 | kvm_for_each_vcpu(i, vcpu, kvm) { |
| @@ -88,7 +91,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
| 88 | if (!kvm_is_dm_lowest_prio(irq)) { | 91 | if (!kvm_is_dm_lowest_prio(irq)) { |
| 89 | if (r < 0) | 92 | if (r < 0) |
| 90 | r = 0; | 93 | r = 0; |
| 91 | r += kvm_apic_set_irq(vcpu, irq); | 94 | r += kvm_apic_set_irq(vcpu, irq, dest_map); |
| 92 | } else if (kvm_lapic_enabled(vcpu)) { | 95 | } else if (kvm_lapic_enabled(vcpu)) { |
| 93 | if (!lowest) | 96 | if (!lowest) |
| 94 | lowest = vcpu; | 97 | lowest = vcpu; |
| @@ -98,7 +101,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
| 98 | } | 101 | } |
| 99 | 102 | ||
| 100 | if (lowest) | 103 | if (lowest) |
| 101 | r = kvm_apic_set_irq(lowest, irq); | 104 | r = kvm_apic_set_irq(lowest, irq, dest_map); |
| 102 | 105 | ||
| 103 | return r; | 106 | return r; |
| 104 | } | 107 | } |
| @@ -121,7 +124,7 @@ static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, | |||
| 121 | } | 124 | } |
| 122 | 125 | ||
| 123 | int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | 126 | int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, |
| 124 | struct kvm *kvm, int irq_source_id, int level) | 127 | struct kvm *kvm, int irq_source_id, int level, bool line_status) |
| 125 | { | 128 | { |
| 126 | struct kvm_lapic_irq irq; | 129 | struct kvm_lapic_irq irq; |
| 127 | 130 | ||
| @@ -130,7 +133,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | |||
| 130 | 133 | ||
| 131 | kvm_set_msi_irq(e, &irq); | 134 | kvm_set_msi_irq(e, &irq); |
| 132 | 135 | ||
| 133 | return kvm_irq_delivery_to_apic(kvm, NULL, &irq); | 136 | return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL); |
| 134 | } | 137 | } |
| 135 | 138 | ||
| 136 | 139 | ||
| @@ -142,63 +145,12 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e, | |||
| 142 | 145 | ||
| 143 | kvm_set_msi_irq(e, &irq); | 146 | kvm_set_msi_irq(e, &irq); |
| 144 | 147 | ||
| 145 | if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r)) | 148 | if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) |
| 146 | return r; | 149 | return r; |
| 147 | else | 150 | else |
| 148 | return -EWOULDBLOCK; | 151 | return -EWOULDBLOCK; |
| 149 | } | 152 | } |
| 150 | 153 | ||
| 151 | int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) | ||
| 152 | { | ||
| 153 | struct kvm_kernel_irq_routing_entry route; | ||
| 154 | |||
| 155 | if (!irqchip_in_kernel(kvm) || msi->flags != 0) | ||
| 156 | return -EINVAL; | ||
| 157 | |||
| 158 | route.msi.address_lo = msi->address_lo; | ||
| 159 | route.msi.address_hi = msi->address_hi; | ||
| 160 | route.msi.data = msi->data; | ||
| 161 | |||
| 162 | return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1); | ||
| 163 | } | ||
| 164 | |||
| 165 | /* | ||
| 166 | * Return value: | ||
| 167 | * < 0 Interrupt was ignored (masked or not delivered for other reasons) | ||
| 168 | * = 0 Interrupt was coalesced (previous irq is still pending) | ||
| 169 | * > 0 Number of CPUs interrupt was delivered to | ||
| 170 | */ | ||
| 171 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) | ||
| 172 | { | ||
| 173 | struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; | ||
| 174 | int ret = -1, i = 0; | ||
| 175 | struct kvm_irq_routing_table *irq_rt; | ||
| 176 | |||
| 177 | trace_kvm_set_irq(irq, level, irq_source_id); | ||
| 178 | |||
| 179 | /* Not possible to detect if the guest uses the PIC or the | ||
| 180 | * IOAPIC. So set the bit in both. The guest will ignore | ||
| 181 | * writes to the unused one. | ||
| 182 | */ | ||
| 183 | rcu_read_lock(); | ||
| 184 | irq_rt = rcu_dereference(kvm->irq_routing); | ||
| 185 | if (irq < irq_rt->nr_rt_entries) | ||
| 186 | hlist_for_each_entry(e, &irq_rt->map[irq], link) | ||
| 187 | irq_set[i++] = *e; | ||
| 188 | rcu_read_unlock(); | ||
| 189 | |||
| 190 | while(i--) { | ||
| 191 | int r; | ||
| 192 | r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level); | ||
| 193 | if (r < 0) | ||
| 194 | continue; | ||
| 195 | |||
| 196 | ret = r + ((ret < 0) ? 0 : ret); | ||
| 197 | } | ||
| 198 | |||
| 199 | return ret; | ||
| 200 | } | ||
| 201 | |||
| 202 | /* | 154 | /* |
| 203 | * Deliver an IRQ in an atomic context if we can, or return a failure, | 155 | * Deliver an IRQ in an atomic context if we can, or return a failure, |
| 204 | * user can retry in a process context. | 156 | * user can retry in a process context. |
| @@ -236,63 +188,6 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) | |||
| 236 | return ret; | 188 | return ret; |
| 237 | } | 189 | } |
| 238 | 190 | ||
| 239 | bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) | ||
| 240 | { | ||
| 241 | struct kvm_irq_ack_notifier *kian; | ||
| 242 | int gsi; | ||
| 243 | |||
| 244 | rcu_read_lock(); | ||
| 245 | gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; | ||
| 246 | if (gsi != -1) | ||
| 247 | hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, | ||
| 248 | link) | ||
| 249 | if (kian->gsi == gsi) { | ||
| 250 | rcu_read_unlock(); | ||
| 251 | return true; | ||
| 252 | } | ||
| 253 | |||
| 254 | rcu_read_unlock(); | ||
| 255 | |||
| 256 | return false; | ||
| 257 | } | ||
| 258 | EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); | ||
| 259 | |||
| 260 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) | ||
| 261 | { | ||
| 262 | struct kvm_irq_ack_notifier *kian; | ||
| 263 | int gsi; | ||
| 264 | |||
| 265 | trace_kvm_ack_irq(irqchip, pin); | ||
| 266 | |||
| 267 | rcu_read_lock(); | ||
| 268 | gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; | ||
| 269 | if (gsi != -1) | ||
| 270 | hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, | ||
| 271 | link) | ||
| 272 | if (kian->gsi == gsi) | ||
| 273 | kian->irq_acked(kian); | ||
| 274 | rcu_read_unlock(); | ||
| 275 | } | ||
| 276 | |||
| 277 | void kvm_register_irq_ack_notifier(struct kvm *kvm, | ||
| 278 | struct kvm_irq_ack_notifier *kian) | ||
| 279 | { | ||
| 280 | mutex_lock(&kvm->irq_lock); | ||
| 281 | hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); | ||
| 282 | mutex_unlock(&kvm->irq_lock); | ||
| 283 | kvm_ioapic_make_eoibitmap_request(kvm); | ||
| 284 | } | ||
| 285 | |||
| 286 | void kvm_unregister_irq_ack_notifier(struct kvm *kvm, | ||
| 287 | struct kvm_irq_ack_notifier *kian) | ||
| 288 | { | ||
| 289 | mutex_lock(&kvm->irq_lock); | ||
| 290 | hlist_del_init_rcu(&kian->link); | ||
| 291 | mutex_unlock(&kvm->irq_lock); | ||
| 292 | synchronize_rcu(); | ||
| 293 | kvm_ioapic_make_eoibitmap_request(kvm); | ||
| 294 | } | ||
| 295 | |||
| 296 | int kvm_request_irq_source_id(struct kvm *kvm) | 191 | int kvm_request_irq_source_id(struct kvm *kvm) |
| 297 | { | 192 | { |
| 298 | unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; | 193 | unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; |
| @@ -376,34 +271,14 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, | |||
| 376 | rcu_read_unlock(); | 271 | rcu_read_unlock(); |
| 377 | } | 272 | } |
| 378 | 273 | ||
| 379 | void kvm_free_irq_routing(struct kvm *kvm) | 274 | int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, |
| 380 | { | 275 | struct kvm_kernel_irq_routing_entry *e, |
| 381 | /* Called only during vm destruction. Nobody can use the pointer | 276 | const struct kvm_irq_routing_entry *ue) |
| 382 | at this stage */ | ||
| 383 | kfree(kvm->irq_routing); | ||
| 384 | } | ||
| 385 | |||
| 386 | static int setup_routing_entry(struct kvm_irq_routing_table *rt, | ||
| 387 | struct kvm_kernel_irq_routing_entry *e, | ||
| 388 | const struct kvm_irq_routing_entry *ue) | ||
| 389 | { | 277 | { |
| 390 | int r = -EINVAL; | 278 | int r = -EINVAL; |
| 391 | int delta; | 279 | int delta; |
| 392 | unsigned max_pin; | 280 | unsigned max_pin; |
| 393 | struct kvm_kernel_irq_routing_entry *ei; | ||
| 394 | 281 | ||
| 395 | /* | ||
| 396 | * Do not allow GSI to be mapped to the same irqchip more than once. | ||
| 397 | * Allow only one to one mapping between GSI and MSI. | ||
| 398 | */ | ||
| 399 | hlist_for_each_entry(ei, &rt->map[ue->gsi], link) | ||
| 400 | if (ei->type == KVM_IRQ_ROUTING_MSI || | ||
| 401 | ue->type == KVM_IRQ_ROUTING_MSI || | ||
| 402 | ue->u.irqchip.irqchip == ei->irqchip.irqchip) | ||
| 403 | return r; | ||
| 404 | |||
| 405 | e->gsi = ue->gsi; | ||
| 406 | e->type = ue->type; | ||
| 407 | switch (ue->type) { | 282 | switch (ue->type) { |
| 408 | case KVM_IRQ_ROUTING_IRQCHIP: | 283 | case KVM_IRQ_ROUTING_IRQCHIP: |
| 409 | delta = 0; | 284 | delta = 0; |
| @@ -440,69 +315,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, | |||
| 440 | goto out; | 315 | goto out; |
| 441 | } | 316 | } |
| 442 | 317 | ||
| 443 | hlist_add_head(&e->link, &rt->map[e->gsi]); | ||
| 444 | r = 0; | 318 | r = 0; |
| 445 | out: | 319 | out: |
| 446 | return r; | 320 | return r; |
| 447 | } | 321 | } |
| 448 | 322 | ||
| 449 | |||
| 450 | int kvm_set_irq_routing(struct kvm *kvm, | ||
| 451 | const struct kvm_irq_routing_entry *ue, | ||
| 452 | unsigned nr, | ||
| 453 | unsigned flags) | ||
| 454 | { | ||
| 455 | struct kvm_irq_routing_table *new, *old; | ||
| 456 | u32 i, j, nr_rt_entries = 0; | ||
| 457 | int r; | ||
| 458 | |||
| 459 | for (i = 0; i < nr; ++i) { | ||
| 460 | if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES) | ||
| 461 | return -EINVAL; | ||
| 462 | nr_rt_entries = max(nr_rt_entries, ue[i].gsi); | ||
| 463 | } | ||
| 464 | |||
| 465 | nr_rt_entries += 1; | ||
| 466 | |||
| 467 | new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)) | ||
| 468 | + (nr * sizeof(struct kvm_kernel_irq_routing_entry)), | ||
| 469 | GFP_KERNEL); | ||
| 470 | |||
| 471 | if (!new) | ||
| 472 | return -ENOMEM; | ||
| 473 | |||
| 474 | new->rt_entries = (void *)&new->map[nr_rt_entries]; | ||
| 475 | |||
| 476 | new->nr_rt_entries = nr_rt_entries; | ||
| 477 | for (i = 0; i < 3; i++) | ||
| 478 | for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++) | ||
| 479 | new->chip[i][j] = -1; | ||
| 480 | |||
| 481 | for (i = 0; i < nr; ++i) { | ||
| 482 | r = -EINVAL; | ||
| 483 | if (ue->flags) | ||
| 484 | goto out; | ||
| 485 | r = setup_routing_entry(new, &new->rt_entries[i], ue); | ||
| 486 | if (r) | ||
| 487 | goto out; | ||
| 488 | ++ue; | ||
| 489 | } | ||
| 490 | |||
| 491 | mutex_lock(&kvm->irq_lock); | ||
| 492 | old = kvm->irq_routing; | ||
| 493 | kvm_irq_routing_update(kvm, new); | ||
| 494 | mutex_unlock(&kvm->irq_lock); | ||
| 495 | |||
| 496 | synchronize_rcu(); | ||
| 497 | |||
| 498 | new = old; | ||
| 499 | r = 0; | ||
| 500 | |||
| 501 | out: | ||
| 502 | kfree(new); | ||
| 503 | return r; | ||
| 504 | } | ||
| 505 | |||
| 506 | #define IOAPIC_ROUTING_ENTRY(irq) \ | 323 | #define IOAPIC_ROUTING_ENTRY(irq) \ |
| 507 | { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ | 324 | { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ |
| 508 | .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) } | 325 | .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) } |
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c new file mode 100644 index 000000000000..20dc9e4a8f6c --- /dev/null +++ b/virt/kvm/irqchip.c | |||
| @@ -0,0 +1,237 @@ | |||
| 1 | /* | ||
| 2 | * irqchip.c: Common API for in kernel interrupt controllers | ||
| 3 | * Copyright (c) 2007, Intel Corporation. | ||
| 4 | * Copyright 2010 Red Hat, Inc. and/or its affiliates. | ||
| 5 | * Copyright (c) 2013, Alexander Graf <agraf@suse.de> | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or modify it | ||
| 8 | * under the terms and conditions of the GNU General Public License, | ||
| 9 | * version 2, as published by the Free Software Foundation. | ||
| 10 | * | ||
| 11 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 14 | * more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU General Public License along with | ||
| 17 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
| 18 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
| 19 | * | ||
| 20 | * This file is derived from virt/kvm/irq_comm.c. | ||
| 21 | * | ||
| 22 | * Authors: | ||
| 23 | * Yaozu (Eddie) Dong <Eddie.dong@intel.com> | ||
| 24 | * Alexander Graf <agraf@suse.de> | ||
| 25 | */ | ||
| 26 | |||
| 27 | #include <linux/kvm_host.h> | ||
| 28 | #include <linux/slab.h> | ||
| 29 | #include <linux/export.h> | ||
| 30 | #include <trace/events/kvm.h> | ||
| 31 | #include "irq.h" | ||
| 32 | |||
| 33 | bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) | ||
| 34 | { | ||
| 35 | struct kvm_irq_ack_notifier *kian; | ||
| 36 | int gsi; | ||
| 37 | |||
| 38 | rcu_read_lock(); | ||
| 39 | gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; | ||
| 40 | if (gsi != -1) | ||
| 41 | hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, | ||
| 42 | link) | ||
| 43 | if (kian->gsi == gsi) { | ||
| 44 | rcu_read_unlock(); | ||
| 45 | return true; | ||
| 46 | } | ||
| 47 | |||
| 48 | rcu_read_unlock(); | ||
| 49 | |||
| 50 | return false; | ||
| 51 | } | ||
| 52 | EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); | ||
| 53 | |||
| 54 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) | ||
| 55 | { | ||
| 56 | struct kvm_irq_ack_notifier *kian; | ||
| 57 | int gsi; | ||
| 58 | |||
| 59 | trace_kvm_ack_irq(irqchip, pin); | ||
| 60 | |||
| 61 | rcu_read_lock(); | ||
| 62 | gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; | ||
| 63 | if (gsi != -1) | ||
| 64 | hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, | ||
| 65 | link) | ||
| 66 | if (kian->gsi == gsi) | ||
| 67 | kian->irq_acked(kian); | ||
| 68 | rcu_read_unlock(); | ||
| 69 | } | ||
| 70 | |||
| 71 | void kvm_register_irq_ack_notifier(struct kvm *kvm, | ||
| 72 | struct kvm_irq_ack_notifier *kian) | ||
| 73 | { | ||
| 74 | mutex_lock(&kvm->irq_lock); | ||
| 75 | hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); | ||
| 76 | mutex_unlock(&kvm->irq_lock); | ||
| 77 | #ifdef __KVM_HAVE_IOAPIC | ||
| 78 | kvm_vcpu_request_scan_ioapic(kvm); | ||
| 79 | #endif | ||
| 80 | } | ||
| 81 | |||
| 82 | void kvm_unregister_irq_ack_notifier(struct kvm *kvm, | ||
| 83 | struct kvm_irq_ack_notifier *kian) | ||
| 84 | { | ||
| 85 | mutex_lock(&kvm->irq_lock); | ||
| 86 | hlist_del_init_rcu(&kian->link); | ||
| 87 | mutex_unlock(&kvm->irq_lock); | ||
| 88 | synchronize_rcu(); | ||
| 89 | #ifdef __KVM_HAVE_IOAPIC | ||
| 90 | kvm_vcpu_request_scan_ioapic(kvm); | ||
| 91 | #endif | ||
| 92 | } | ||
| 93 | |||
| 94 | int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) | ||
| 95 | { | ||
| 96 | struct kvm_kernel_irq_routing_entry route; | ||
| 97 | |||
| 98 | if (!irqchip_in_kernel(kvm) || msi->flags != 0) | ||
| 99 | return -EINVAL; | ||
| 100 | |||
| 101 | route.msi.address_lo = msi->address_lo; | ||
| 102 | route.msi.address_hi = msi->address_hi; | ||
| 103 | route.msi.data = msi->data; | ||
| 104 | |||
| 105 | return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); | ||
| 106 | } | ||
| 107 | |||
| 108 | /* | ||
| 109 | * Return value: | ||
| 110 | * < 0 Interrupt was ignored (masked or not delivered for other reasons) | ||
| 111 | * = 0 Interrupt was coalesced (previous irq is still pending) | ||
| 112 | * > 0 Number of CPUs interrupt was delivered to | ||
| 113 | */ | ||
| 114 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, | ||
| 115 | bool line_status) | ||
| 116 | { | ||
| 117 | struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; | ||
| 118 | int ret = -1, i = 0; | ||
| 119 | struct kvm_irq_routing_table *irq_rt; | ||
| 120 | |||
| 121 | trace_kvm_set_irq(irq, level, irq_source_id); | ||
| 122 | |||
| 123 | /* Not possible to detect if the guest uses the PIC or the | ||
| 124 | * IOAPIC. So set the bit in both. The guest will ignore | ||
| 125 | * writes to the unused one. | ||
| 126 | */ | ||
| 127 | rcu_read_lock(); | ||
| 128 | irq_rt = rcu_dereference(kvm->irq_routing); | ||
| 129 | if (irq < irq_rt->nr_rt_entries) | ||
| 130 | hlist_for_each_entry(e, &irq_rt->map[irq], link) | ||
| 131 | irq_set[i++] = *e; | ||
| 132 | rcu_read_unlock(); | ||
| 133 | |||
| 134 | while(i--) { | ||
| 135 | int r; | ||
| 136 | r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level, | ||
| 137 | line_status); | ||
| 138 | if (r < 0) | ||
| 139 | continue; | ||
| 140 | |||
| 141 | ret = r + ((ret < 0) ? 0 : ret); | ||
| 142 | } | ||
| 143 | |||
| 144 | return ret; | ||
| 145 | } | ||
| 146 | |||
| 147 | void kvm_free_irq_routing(struct kvm *kvm) | ||
| 148 | { | ||
| 149 | /* Called only during vm destruction. Nobody can use the pointer | ||
| 150 | at this stage */ | ||
| 151 | kfree(kvm->irq_routing); | ||
| 152 | } | ||
| 153 | |||
| 154 | static int setup_routing_entry(struct kvm_irq_routing_table *rt, | ||
| 155 | struct kvm_kernel_irq_routing_entry *e, | ||
| 156 | const struct kvm_irq_routing_entry *ue) | ||
| 157 | { | ||
| 158 | int r = -EINVAL; | ||
| 159 | struct kvm_kernel_irq_routing_entry *ei; | ||
| 160 | |||
| 161 | /* | ||
| 162 | * Do not allow GSI to be mapped to the same irqchip more than once. | ||
| 163 | * Allow only one to one mapping between GSI and MSI. | ||
| 164 | */ | ||
| 165 | hlist_for_each_entry(ei, &rt->map[ue->gsi], link) | ||
| 166 | if (ei->type == KVM_IRQ_ROUTING_MSI || | ||
| 167 | ue->type == KVM_IRQ_ROUTING_MSI || | ||
| 168 | ue->u.irqchip.irqchip == ei->irqchip.irqchip) | ||
| 169 | return r; | ||
| 170 | |||
| 171 | e->gsi = ue->gsi; | ||
| 172 | e->type = ue->type; | ||
| 173 | r = kvm_set_routing_entry(rt, e, ue); | ||
| 174 | if (r) | ||
| 175 | goto out; | ||
| 176 | |||
| 177 | hlist_add_head(&e->link, &rt->map[e->gsi]); | ||
| 178 | r = 0; | ||
| 179 | out: | ||
| 180 | return r; | ||
| 181 | } | ||
| 182 | |||
| 183 | int kvm_set_irq_routing(struct kvm *kvm, | ||
| 184 | const struct kvm_irq_routing_entry *ue, | ||
| 185 | unsigned nr, | ||
| 186 | unsigned flags) | ||
| 187 | { | ||
| 188 | struct kvm_irq_routing_table *new, *old; | ||
| 189 | u32 i, j, nr_rt_entries = 0; | ||
| 190 | int r; | ||
| 191 | |||
| 192 | for (i = 0; i < nr; ++i) { | ||
| 193 | if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES) | ||
| 194 | return -EINVAL; | ||
| 195 | nr_rt_entries = max(nr_rt_entries, ue[i].gsi); | ||
| 196 | } | ||
| 197 | |||
| 198 | nr_rt_entries += 1; | ||
| 199 | |||
| 200 | new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)) | ||
| 201 | + (nr * sizeof(struct kvm_kernel_irq_routing_entry)), | ||
| 202 | GFP_KERNEL); | ||
| 203 | |||
| 204 | if (!new) | ||
| 205 | return -ENOMEM; | ||
| 206 | |||
| 207 | new->rt_entries = (void *)&new->map[nr_rt_entries]; | ||
| 208 | |||
| 209 | new->nr_rt_entries = nr_rt_entries; | ||
| 210 | for (i = 0; i < KVM_NR_IRQCHIPS; i++) | ||
| 211 | for (j = 0; j < KVM_IRQCHIP_NUM_PINS; j++) | ||
| 212 | new->chip[i][j] = -1; | ||
| 213 | |||
| 214 | for (i = 0; i < nr; ++i) { | ||
| 215 | r = -EINVAL; | ||
| 216 | if (ue->flags) | ||
| 217 | goto out; | ||
| 218 | r = setup_routing_entry(new, &new->rt_entries[i], ue); | ||
| 219 | if (r) | ||
| 220 | goto out; | ||
| 221 | ++ue; | ||
| 222 | } | ||
| 223 | |||
| 224 | mutex_lock(&kvm->irq_lock); | ||
| 225 | old = kvm->irq_routing; | ||
| 226 | kvm_irq_routing_update(kvm, new); | ||
| 227 | mutex_unlock(&kvm->irq_lock); | ||
| 228 | |||
| 229 | synchronize_rcu(); | ||
| 230 | |||
| 231 | new = old; | ||
| 232 | r = 0; | ||
| 233 | |||
| 234 | out: | ||
| 235 | kfree(new); | ||
| 236 | return r; | ||
| 237 | } | ||
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f18013f09e68..45f09362ee7b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
| @@ -217,9 +217,9 @@ void kvm_make_mclock_inprogress_request(struct kvm *kvm) | |||
| 217 | make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); | 217 | make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); |
| 218 | } | 218 | } |
| 219 | 219 | ||
| 220 | void kvm_make_update_eoibitmap_request(struct kvm *kvm) | 220 | void kvm_make_scan_ioapic_request(struct kvm *kvm) |
| 221 | { | 221 | { |
| 222 | make_all_cpus_request(kvm, KVM_REQ_EOIBITMAP); | 222 | make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); |
| 223 | } | 223 | } |
| 224 | 224 | ||
| 225 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | 225 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) |
| @@ -244,6 +244,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | |||
| 244 | 244 | ||
| 245 | kvm_vcpu_set_in_spin_loop(vcpu, false); | 245 | kvm_vcpu_set_in_spin_loop(vcpu, false); |
| 246 | kvm_vcpu_set_dy_eligible(vcpu, false); | 246 | kvm_vcpu_set_dy_eligible(vcpu, false); |
| 247 | vcpu->preempted = false; | ||
| 247 | 248 | ||
| 248 | r = kvm_arch_vcpu_init(vcpu); | 249 | r = kvm_arch_vcpu_init(vcpu); |
| 249 | if (r < 0) | 250 | if (r < 0) |
| @@ -503,6 +504,7 @@ static struct kvm *kvm_create_vm(unsigned long type) | |||
| 503 | mutex_init(&kvm->irq_lock); | 504 | mutex_init(&kvm->irq_lock); |
| 504 | mutex_init(&kvm->slots_lock); | 505 | mutex_init(&kvm->slots_lock); |
| 505 | atomic_set(&kvm->users_count, 1); | 506 | atomic_set(&kvm->users_count, 1); |
| 507 | INIT_LIST_HEAD(&kvm->devices); | ||
| 506 | 508 | ||
| 507 | r = kvm_init_mmu_notifier(kvm); | 509 | r = kvm_init_mmu_notifier(kvm); |
| 508 | if (r) | 510 | if (r) |
| @@ -580,6 +582,19 @@ void kvm_free_physmem(struct kvm *kvm) | |||
| 580 | kfree(kvm->memslots); | 582 | kfree(kvm->memslots); |
| 581 | } | 583 | } |
| 582 | 584 | ||
| 585 | static void kvm_destroy_devices(struct kvm *kvm) | ||
| 586 | { | ||
| 587 | struct list_head *node, *tmp; | ||
| 588 | |||
| 589 | list_for_each_safe(node, tmp, &kvm->devices) { | ||
| 590 | struct kvm_device *dev = | ||
| 591 | list_entry(node, struct kvm_device, vm_node); | ||
| 592 | |||
| 593 | list_del(node); | ||
| 594 | dev->ops->destroy(dev); | ||
| 595 | } | ||
| 596 | } | ||
| 597 | |||
| 583 | static void kvm_destroy_vm(struct kvm *kvm) | 598 | static void kvm_destroy_vm(struct kvm *kvm) |
| 584 | { | 599 | { |
| 585 | int i; | 600 | int i; |
| @@ -599,6 +614,7 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
| 599 | kvm_arch_flush_shadow_all(kvm); | 614 | kvm_arch_flush_shadow_all(kvm); |
| 600 | #endif | 615 | #endif |
| 601 | kvm_arch_destroy_vm(kvm); | 616 | kvm_arch_destroy_vm(kvm); |
| 617 | kvm_destroy_devices(kvm); | ||
| 602 | kvm_free_physmem(kvm); | 618 | kvm_free_physmem(kvm); |
| 603 | cleanup_srcu_struct(&kvm->srcu); | 619 | cleanup_srcu_struct(&kvm->srcu); |
| 604 | kvm_arch_free_vm(kvm); | 620 | kvm_arch_free_vm(kvm); |
| @@ -719,24 +735,6 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, | |||
| 719 | } | 735 | } |
| 720 | 736 | ||
| 721 | /* | 737 | /* |
| 722 | * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: | ||
| 723 | * - create a new memory slot | ||
| 724 | * - delete an existing memory slot | ||
| 725 | * - modify an existing memory slot | ||
| 726 | * -- move it in the guest physical memory space | ||
| 727 | * -- just change its flags | ||
| 728 | * | ||
| 729 | * Since flags can be changed by some of these operations, the following | ||
| 730 | * differentiation is the best we can do for __kvm_set_memory_region(): | ||
| 731 | */ | ||
| 732 | enum kvm_mr_change { | ||
| 733 | KVM_MR_CREATE, | ||
| 734 | KVM_MR_DELETE, | ||
| 735 | KVM_MR_MOVE, | ||
| 736 | KVM_MR_FLAGS_ONLY, | ||
| 737 | }; | ||
| 738 | |||
| 739 | /* | ||
| 740 | * Allocate some memory and give it an address in the guest physical address | 738 | * Allocate some memory and give it an address in the guest physical address |
| 741 | * space. | 739 | * space. |
| 742 | * | 740 | * |
| @@ -745,8 +743,7 @@ enum kvm_mr_change { | |||
| 745 | * Must be called holding mmap_sem for write. | 743 | * Must be called holding mmap_sem for write. |
| 746 | */ | 744 | */ |
| 747 | int __kvm_set_memory_region(struct kvm *kvm, | 745 | int __kvm_set_memory_region(struct kvm *kvm, |
| 748 | struct kvm_userspace_memory_region *mem, | 746 | struct kvm_userspace_memory_region *mem) |
| 749 | bool user_alloc) | ||
| 750 | { | 747 | { |
| 751 | int r; | 748 | int r; |
| 752 | gfn_t base_gfn; | 749 | gfn_t base_gfn; |
| @@ -767,7 +764,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
| 767 | if (mem->guest_phys_addr & (PAGE_SIZE - 1)) | 764 | if (mem->guest_phys_addr & (PAGE_SIZE - 1)) |
| 768 | goto out; | 765 | goto out; |
| 769 | /* We can read the guest memory with __xxx_user() later on. */ | 766 | /* We can read the guest memory with __xxx_user() later on. */ |
| 770 | if (user_alloc && | 767 | if ((mem->slot < KVM_USER_MEM_SLOTS) && |
| 771 | ((mem->userspace_addr & (PAGE_SIZE - 1)) || | 768 | ((mem->userspace_addr & (PAGE_SIZE - 1)) || |
| 772 | !access_ok(VERIFY_WRITE, | 769 | !access_ok(VERIFY_WRITE, |
| 773 | (void __user *)(unsigned long)mem->userspace_addr, | 770 | (void __user *)(unsigned long)mem->userspace_addr, |
| @@ -875,7 +872,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
| 875 | slots = old_memslots; | 872 | slots = old_memslots; |
| 876 | } | 873 | } |
| 877 | 874 | ||
| 878 | r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc); | 875 | r = kvm_arch_prepare_memory_region(kvm, &new, mem, change); |
| 879 | if (r) | 876 | if (r) |
| 880 | goto out_slots; | 877 | goto out_slots; |
| 881 | 878 | ||
| @@ -915,7 +912,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
| 915 | 912 | ||
| 916 | old_memslots = install_new_memslots(kvm, slots, &new); | 913 | old_memslots = install_new_memslots(kvm, slots, &new); |
| 917 | 914 | ||
| 918 | kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); | 915 | kvm_arch_commit_memory_region(kvm, mem, &old, change); |
| 919 | 916 | ||
| 920 | kvm_free_physmem_slot(&old, &new); | 917 | kvm_free_physmem_slot(&old, &new); |
| 921 | kfree(old_memslots); | 918 | kfree(old_memslots); |
| @@ -932,26 +929,23 @@ out: | |||
| 932 | EXPORT_SYMBOL_GPL(__kvm_set_memory_region); | 929 | EXPORT_SYMBOL_GPL(__kvm_set_memory_region); |
| 933 | 930 | ||
| 934 | int kvm_set_memory_region(struct kvm *kvm, | 931 | int kvm_set_memory_region(struct kvm *kvm, |
| 935 | struct kvm_userspace_memory_region *mem, | 932 | struct kvm_userspace_memory_region *mem) |
| 936 | bool user_alloc) | ||
| 937 | { | 933 | { |
| 938 | int r; | 934 | int r; |
| 939 | 935 | ||
| 940 | mutex_lock(&kvm->slots_lock); | 936 | mutex_lock(&kvm->slots_lock); |
| 941 | r = __kvm_set_memory_region(kvm, mem, user_alloc); | 937 | r = __kvm_set_memory_region(kvm, mem); |
| 942 | mutex_unlock(&kvm->slots_lock); | 938 | mutex_unlock(&kvm->slots_lock); |
| 943 | return r; | 939 | return r; |
| 944 | } | 940 | } |
| 945 | EXPORT_SYMBOL_GPL(kvm_set_memory_region); | 941 | EXPORT_SYMBOL_GPL(kvm_set_memory_region); |
| 946 | 942 | ||
| 947 | int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, | 943 | int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, |
| 948 | struct | 944 | struct kvm_userspace_memory_region *mem) |
| 949 | kvm_userspace_memory_region *mem, | ||
| 950 | bool user_alloc) | ||
| 951 | { | 945 | { |
| 952 | if (mem->slot >= KVM_USER_MEM_SLOTS) | 946 | if (mem->slot >= KVM_USER_MEM_SLOTS) |
| 953 | return -EINVAL; | 947 | return -EINVAL; |
| 954 | return kvm_set_memory_region(kvm, mem, user_alloc); | 948 | return kvm_set_memory_region(kvm, mem); |
| 955 | } | 949 | } |
| 956 | 950 | ||
| 957 | int kvm_get_dirty_log(struct kvm *kvm, | 951 | int kvm_get_dirty_log(struct kvm *kvm, |
| @@ -1099,7 +1093,7 @@ static int kvm_read_hva_atomic(void *data, void __user *hva, int len) | |||
| 1099 | return __copy_from_user_inatomic(data, hva, len); | 1093 | return __copy_from_user_inatomic(data, hva, len); |
| 1100 | } | 1094 | } |
| 1101 | 1095 | ||
| 1102 | int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, | 1096 | static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, |
| 1103 | unsigned long start, int write, struct page **page) | 1097 | unsigned long start, int write, struct page **page) |
| 1104 | { | 1098 | { |
| 1105 | int flags = FOLL_TOUCH | FOLL_NOWAIT | FOLL_HWPOISON | FOLL_GET; | 1099 | int flags = FOLL_TOUCH | FOLL_NOWAIT | FOLL_HWPOISON | FOLL_GET; |
| @@ -1719,6 +1713,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) | |||
| 1719 | smp_send_reschedule(cpu); | 1713 | smp_send_reschedule(cpu); |
| 1720 | put_cpu(); | 1714 | put_cpu(); |
| 1721 | } | 1715 | } |
| 1716 | EXPORT_SYMBOL_GPL(kvm_vcpu_kick); | ||
| 1722 | #endif /* !CONFIG_S390 */ | 1717 | #endif /* !CONFIG_S390 */ |
| 1723 | 1718 | ||
| 1724 | void kvm_resched(struct kvm_vcpu *vcpu) | 1719 | void kvm_resched(struct kvm_vcpu *vcpu) |
| @@ -1816,6 +1811,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) | |||
| 1816 | continue; | 1811 | continue; |
| 1817 | } else if (pass && i > last_boosted_vcpu) | 1812 | } else if (pass && i > last_boosted_vcpu) |
| 1818 | break; | 1813 | break; |
| 1814 | if (!ACCESS_ONCE(vcpu->preempted)) | ||
| 1815 | continue; | ||
| 1819 | if (vcpu == me) | 1816 | if (vcpu == me) |
| 1820 | continue; | 1817 | continue; |
| 1821 | if (waitqueue_active(&vcpu->wq)) | 1818 | if (waitqueue_active(&vcpu->wq)) |
| @@ -2204,6 +2201,119 @@ out: | |||
| 2204 | } | 2201 | } |
| 2205 | #endif | 2202 | #endif |
| 2206 | 2203 | ||
| 2204 | static int kvm_device_ioctl_attr(struct kvm_device *dev, | ||
| 2205 | int (*accessor)(struct kvm_device *dev, | ||
| 2206 | struct kvm_device_attr *attr), | ||
| 2207 | unsigned long arg) | ||
| 2208 | { | ||
| 2209 | struct kvm_device_attr attr; | ||
| 2210 | |||
| 2211 | if (!accessor) | ||
| 2212 | return -EPERM; | ||
| 2213 | |||
| 2214 | if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) | ||
| 2215 | return -EFAULT; | ||
| 2216 | |||
| 2217 | return accessor(dev, &attr); | ||
| 2218 | } | ||
| 2219 | |||
| 2220 | static long kvm_device_ioctl(struct file *filp, unsigned int ioctl, | ||
| 2221 | unsigned long arg) | ||
| 2222 | { | ||
| 2223 | struct kvm_device *dev = filp->private_data; | ||
| 2224 | |||
| 2225 | switch (ioctl) { | ||
| 2226 | case KVM_SET_DEVICE_ATTR: | ||
| 2227 | return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg); | ||
| 2228 | case KVM_GET_DEVICE_ATTR: | ||
| 2229 | return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg); | ||
| 2230 | case KVM_HAS_DEVICE_ATTR: | ||
| 2231 | return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg); | ||
| 2232 | default: | ||
| 2233 | if (dev->ops->ioctl) | ||
| 2234 | return dev->ops->ioctl(dev, ioctl, arg); | ||
| 2235 | |||
| 2236 | return -ENOTTY; | ||
| 2237 | } | ||
| 2238 | } | ||
| 2239 | |||
| 2240 | static int kvm_device_release(struct inode *inode, struct file *filp) | ||
| 2241 | { | ||
| 2242 | struct kvm_device *dev = filp->private_data; | ||
| 2243 | struct kvm *kvm = dev->kvm; | ||
| 2244 | |||
| 2245 | kvm_put_kvm(kvm); | ||
| 2246 | return 0; | ||
| 2247 | } | ||
| 2248 | |||
| 2249 | static const struct file_operations kvm_device_fops = { | ||
| 2250 | .unlocked_ioctl = kvm_device_ioctl, | ||
| 2251 | #ifdef CONFIG_COMPAT | ||
| 2252 | .compat_ioctl = kvm_device_ioctl, | ||
| 2253 | #endif | ||
| 2254 | .release = kvm_device_release, | ||
| 2255 | }; | ||
| 2256 | |||
| 2257 | struct kvm_device *kvm_device_from_filp(struct file *filp) | ||
| 2258 | { | ||
| 2259 | if (filp->f_op != &kvm_device_fops) | ||
| 2260 | return NULL; | ||
| 2261 | |||
| 2262 | return filp->private_data; | ||
| 2263 | } | ||
| 2264 | |||
| 2265 | static int kvm_ioctl_create_device(struct kvm *kvm, | ||
| 2266 | struct kvm_create_device *cd) | ||
| 2267 | { | ||
| 2268 | struct kvm_device_ops *ops = NULL; | ||
| 2269 | struct kvm_device *dev; | ||
| 2270 | bool test = cd->flags & KVM_CREATE_DEVICE_TEST; | ||
| 2271 | int ret; | ||
| 2272 | |||
| 2273 | switch (cd->type) { | ||
| 2274 | #ifdef CONFIG_KVM_MPIC | ||
| 2275 | case KVM_DEV_TYPE_FSL_MPIC_20: | ||
| 2276 | case KVM_DEV_TYPE_FSL_MPIC_42: | ||
| 2277 | ops = &kvm_mpic_ops; | ||
| 2278 | break; | ||
| 2279 | #endif | ||
| 2280 | #ifdef CONFIG_KVM_XICS | ||
| 2281 | case KVM_DEV_TYPE_XICS: | ||
| 2282 | ops = &kvm_xics_ops; | ||
| 2283 | break; | ||
| 2284 | #endif | ||
| 2285 | default: | ||
| 2286 | return -ENODEV; | ||
| 2287 | } | ||
| 2288 | |||
| 2289 | if (test) | ||
| 2290 | return 0; | ||
| 2291 | |||
| 2292 | dev = kzalloc(sizeof(*dev), GFP_KERNEL); | ||
| 2293 | if (!dev) | ||
| 2294 | return -ENOMEM; | ||
| 2295 | |||
| 2296 | dev->ops = ops; | ||
| 2297 | dev->kvm = kvm; | ||
| 2298 | |||
| 2299 | ret = ops->create(dev, cd->type); | ||
| 2300 | if (ret < 0) { | ||
| 2301 | kfree(dev); | ||
| 2302 | return ret; | ||
| 2303 | } | ||
| 2304 | |||
| 2305 | ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR); | ||
| 2306 | if (ret < 0) { | ||
| 2307 | ops->destroy(dev); | ||
| 2308 | return ret; | ||
| 2309 | } | ||
| 2310 | |||
| 2311 | list_add(&dev->vm_node, &kvm->devices); | ||
| 2312 | kvm_get_kvm(kvm); | ||
| 2313 | cd->fd = ret; | ||
| 2314 | return 0; | ||
| 2315 | } | ||
| 2316 | |||
| 2207 | static long kvm_vm_ioctl(struct file *filp, | 2317 | static long kvm_vm_ioctl(struct file *filp, |
| 2208 | unsigned int ioctl, unsigned long arg) | 2318 | unsigned int ioctl, unsigned long arg) |
| 2209 | { | 2319 | { |
| @@ -2225,7 +2335,7 @@ static long kvm_vm_ioctl(struct file *filp, | |||
| 2225 | sizeof kvm_userspace_mem)) | 2335 | sizeof kvm_userspace_mem)) |
| 2226 | goto out; | 2336 | goto out; |
| 2227 | 2337 | ||
| 2228 | r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, true); | 2338 | r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem); |
| 2229 | break; | 2339 | break; |
| 2230 | } | 2340 | } |
| 2231 | case KVM_GET_DIRTY_LOG: { | 2341 | case KVM_GET_DIRTY_LOG: { |
| @@ -2304,7 +2414,8 @@ static long kvm_vm_ioctl(struct file *filp, | |||
| 2304 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) | 2414 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) |
| 2305 | goto out; | 2415 | goto out; |
| 2306 | 2416 | ||
| 2307 | r = kvm_vm_ioctl_irq_line(kvm, &irq_event); | 2417 | r = kvm_vm_ioctl_irq_line(kvm, &irq_event, |
| 2418 | ioctl == KVM_IRQ_LINE_STATUS); | ||
| 2308 | if (r) | 2419 | if (r) |
| 2309 | goto out; | 2420 | goto out; |
| 2310 | 2421 | ||
| @@ -2318,6 +2429,54 @@ static long kvm_vm_ioctl(struct file *filp, | |||
| 2318 | break; | 2429 | break; |
| 2319 | } | 2430 | } |
| 2320 | #endif | 2431 | #endif |
| 2432 | #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING | ||
| 2433 | case KVM_SET_GSI_ROUTING: { | ||
| 2434 | struct kvm_irq_routing routing; | ||
| 2435 | struct kvm_irq_routing __user *urouting; | ||
| 2436 | struct kvm_irq_routing_entry *entries; | ||
| 2437 | |||
| 2438 | r = -EFAULT; | ||
| 2439 | if (copy_from_user(&routing, argp, sizeof(routing))) | ||
| 2440 | goto out; | ||
| 2441 | r = -EINVAL; | ||
| 2442 | if (routing.nr >= KVM_MAX_IRQ_ROUTES) | ||
| 2443 | goto out; | ||
| 2444 | if (routing.flags) | ||
| 2445 | goto out; | ||
| 2446 | r = -ENOMEM; | ||
| 2447 | entries = vmalloc(routing.nr * sizeof(*entries)); | ||
| 2448 | if (!entries) | ||
| 2449 | goto out; | ||
| 2450 | r = -EFAULT; | ||
| 2451 | urouting = argp; | ||
| 2452 | if (copy_from_user(entries, urouting->entries, | ||
| 2453 | routing.nr * sizeof(*entries))) | ||
| 2454 | goto out_free_irq_routing; | ||
| 2455 | r = kvm_set_irq_routing(kvm, entries, routing.nr, | ||
| 2456 | routing.flags); | ||
| 2457 | out_free_irq_routing: | ||
| 2458 | vfree(entries); | ||
| 2459 | break; | ||
| 2460 | } | ||
| 2461 | #endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */ | ||
| 2462 | case KVM_CREATE_DEVICE: { | ||
| 2463 | struct kvm_create_device cd; | ||
| 2464 | |||
| 2465 | r = -EFAULT; | ||
| 2466 | if (copy_from_user(&cd, argp, sizeof(cd))) | ||
| 2467 | goto out; | ||
| 2468 | |||
| 2469 | r = kvm_ioctl_create_device(kvm, &cd); | ||
| 2470 | if (r) | ||
| 2471 | goto out; | ||
| 2472 | |||
| 2473 | r = -EFAULT; | ||
| 2474 | if (copy_to_user(argp, &cd, sizeof(cd))) | ||
| 2475 | goto out; | ||
| 2476 | |||
| 2477 | r = 0; | ||
| 2478 | break; | ||
| 2479 | } | ||
| 2321 | default: | 2480 | default: |
| 2322 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); | 2481 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); |
| 2323 | if (r == -ENOTTY) | 2482 | if (r == -ENOTTY) |
| @@ -2447,8 +2606,11 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) | |||
| 2447 | #ifdef CONFIG_HAVE_KVM_MSI | 2606 | #ifdef CONFIG_HAVE_KVM_MSI |
| 2448 | case KVM_CAP_SIGNAL_MSI: | 2607 | case KVM_CAP_SIGNAL_MSI: |
| 2449 | #endif | 2608 | #endif |
| 2609 | #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING | ||
| 2610 | case KVM_CAP_IRQFD_RESAMPLE: | ||
| 2611 | #endif | ||
| 2450 | return 1; | 2612 | return 1; |
| 2451 | #ifdef KVM_CAP_IRQ_ROUTING | 2613 | #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING |
| 2452 | case KVM_CAP_IRQ_ROUTING: | 2614 | case KVM_CAP_IRQ_ROUTING: |
| 2453 | return KVM_MAX_IRQ_ROUTES; | 2615 | return KVM_MAX_IRQ_ROUTES; |
| 2454 | #endif | 2616 | #endif |
| @@ -2618,14 +2780,6 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, | |||
| 2618 | return NOTIFY_OK; | 2780 | return NOTIFY_OK; |
| 2619 | } | 2781 | } |
| 2620 | 2782 | ||
| 2621 | |||
| 2622 | asmlinkage void kvm_spurious_fault(void) | ||
| 2623 | { | ||
| 2624 | /* Fault while not rebooting. We want the trace. */ | ||
| 2625 | BUG(); | ||
| 2626 | } | ||
| 2627 | EXPORT_SYMBOL_GPL(kvm_spurious_fault); | ||
| 2628 | |||
| 2629 | static int kvm_reboot(struct notifier_block *notifier, unsigned long val, | 2783 | static int kvm_reboot(struct notifier_block *notifier, unsigned long val, |
| 2630 | void *v) | 2784 | void *v) |
| 2631 | { | 2785 | { |
| @@ -2658,7 +2812,7 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) | |||
| 2658 | kfree(bus); | 2812 | kfree(bus); |
| 2659 | } | 2813 | } |
| 2660 | 2814 | ||
| 2661 | int kvm_io_bus_sort_cmp(const void *p1, const void *p2) | 2815 | static int kvm_io_bus_sort_cmp(const void *p1, const void *p2) |
| 2662 | { | 2816 | { |
| 2663 | const struct kvm_io_range *r1 = p1; | 2817 | const struct kvm_io_range *r1 = p1; |
| 2664 | const struct kvm_io_range *r2 = p2; | 2818 | const struct kvm_io_range *r2 = p2; |
| @@ -2670,7 +2824,7 @@ int kvm_io_bus_sort_cmp(const void *p1, const void *p2) | |||
| 2670 | return 0; | 2824 | return 0; |
| 2671 | } | 2825 | } |
| 2672 | 2826 | ||
| 2673 | int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, | 2827 | static int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, |
| 2674 | gpa_t addr, int len) | 2828 | gpa_t addr, int len) |
| 2675 | { | 2829 | { |
| 2676 | bus->range[bus->dev_count++] = (struct kvm_io_range) { | 2830 | bus->range[bus->dev_count++] = (struct kvm_io_range) { |
| @@ -2685,7 +2839,7 @@ int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, | |||
| 2685 | return 0; | 2839 | return 0; |
| 2686 | } | 2840 | } |
| 2687 | 2841 | ||
| 2688 | int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, | 2842 | static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, |
| 2689 | gpa_t addr, int len) | 2843 | gpa_t addr, int len) |
| 2690 | { | 2844 | { |
| 2691 | struct kvm_io_range *range, key; | 2845 | struct kvm_io_range *range, key; |
| @@ -2929,6 +3083,8 @@ struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) | |||
| 2929 | static void kvm_sched_in(struct preempt_notifier *pn, int cpu) | 3083 | static void kvm_sched_in(struct preempt_notifier *pn, int cpu) |
| 2930 | { | 3084 | { |
| 2931 | struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); | 3085 | struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); |
| 3086 | if (vcpu->preempted) | ||
| 3087 | vcpu->preempted = false; | ||
| 2932 | 3088 | ||
| 2933 | kvm_arch_vcpu_load(vcpu, cpu); | 3089 | kvm_arch_vcpu_load(vcpu, cpu); |
| 2934 | } | 3090 | } |
| @@ -2938,6 +3094,8 @@ static void kvm_sched_out(struct preempt_notifier *pn, | |||
| 2938 | { | 3094 | { |
| 2939 | struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); | 3095 | struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); |
| 2940 | 3096 | ||
| 3097 | if (current->state == TASK_RUNNING) | ||
| 3098 | vcpu->preempted = true; | ||
| 2941 | kvm_arch_vcpu_put(vcpu); | 3099 | kvm_arch_vcpu_put(vcpu); |
| 2942 | } | 3100 | } |
| 2943 | 3101 | ||
| @@ -2947,6 +3105,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, | |||
| 2947 | int r; | 3105 | int r; |
| 2948 | int cpu; | 3106 | int cpu; |
| 2949 | 3107 | ||
| 3108 | r = kvm_irqfd_init(); | ||
| 3109 | if (r) | ||
| 3110 | goto out_irqfd; | ||
| 2950 | r = kvm_arch_init(opaque); | 3111 | r = kvm_arch_init(opaque); |
| 2951 | if (r) | 3112 | if (r) |
| 2952 | goto out_fail; | 3113 | goto out_fail; |
| @@ -3027,6 +3188,8 @@ out_free_0a: | |||
| 3027 | out_free_0: | 3188 | out_free_0: |
| 3028 | kvm_arch_exit(); | 3189 | kvm_arch_exit(); |
| 3029 | out_fail: | 3190 | out_fail: |
| 3191 | kvm_irqfd_exit(); | ||
| 3192 | out_irqfd: | ||
| 3030 | return r; | 3193 | return r; |
| 3031 | } | 3194 | } |
| 3032 | EXPORT_SYMBOL_GPL(kvm_init); | 3195 | EXPORT_SYMBOL_GPL(kvm_init); |
| @@ -3043,6 +3206,7 @@ void kvm_exit(void) | |||
| 3043 | on_each_cpu(hardware_disable_nolock, NULL, 1); | 3206 | on_each_cpu(hardware_disable_nolock, NULL, 1); |
| 3044 | kvm_arch_hardware_unsetup(); | 3207 | kvm_arch_hardware_unsetup(); |
| 3045 | kvm_arch_exit(); | 3208 | kvm_arch_exit(); |
| 3209 | kvm_irqfd_exit(); | ||
| 3046 | free_cpumask_var(cpus_hardware_enabled); | 3210 | free_cpumask_var(cpus_hardware_enabled); |
| 3047 | } | 3211 | } |
| 3048 | EXPORT_SYMBOL_GPL(kvm_exit); | 3212 | EXPORT_SYMBOL_GPL(kvm_exit); |
