diff options
| author | Paolo Bonzini <pbonzini@redhat.com> | 2019-05-15 17:38:42 -0400 |
|---|---|---|
| committer | Paolo Bonzini <pbonzini@redhat.com> | 2019-05-15 17:39:38 -0400 |
| commit | 59c5c58c5b93285753d5c1de34d2e00039c27bc0 (patch) | |
| tree | 19ac0493a5eb3bef477cb04f8117dad12b6bddb9 | |
| parent | f93f7ede087f2edcc18e4b02310df5749a6b5a61 (diff) | |
| parent | 4894fbcce856635c9ab79f44e50826e86bb92110 (diff) | |
Merge tag 'kvm-ppc-next-5.2-2' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc into HEAD
PPC KVM update for 5.2
* Support for guests to access the new POWER9 XIVE interrupt controller
hardware directly, reducing interrupt latency and overhead for guests.
* In-kernel implementation of the H_PAGE_INIT hypercall.
* Reduce memory usage of sparsely-populated IOMMU tables.
* Several bug fixes.
Second PPC KVM update for 5.2
* Fix a bug, fix a spelling mistake, remove some useless code.
31 files changed, 2670 insertions, 295 deletions
diff --git a/Documentation/powerpc/DAWR-POWER9.txt b/Documentation/powerpc/DAWR-POWER9.txt index 2feaa6619658..bdec03650941 100644 --- a/Documentation/powerpc/DAWR-POWER9.txt +++ b/Documentation/powerpc/DAWR-POWER9.txt | |||
| @@ -56,3 +56,35 @@ POWER9. Loads and stores to the watchpoint locations will not be | |||
| 56 | trapped in GDB. The watchpoint is remembered, so if the guest is | 56 | trapped in GDB. The watchpoint is remembered, so if the guest is |
| 57 | migrated back to the POWER8 host, it will start working again. | 57 | migrated back to the POWER8 host, it will start working again. |
| 58 | 58 | ||
| 59 | Force enabling the DAWR | ||
| 60 | ============================= | ||
| 61 | Kernels (since ~v5.2) have an option to force enable the DAWR via: | ||
| 62 | |||
| 63 | echo Y > /sys/kernel/debug/powerpc/dawr_enable_dangerous | ||
| 64 | |||
| 65 | This enables the DAWR even on POWER9. | ||
| 66 | |||
| 67 | This is a dangerous setting, USE AT YOUR OWN RISK. | ||
| 68 | |||
| 69 | Some users may not care about a bad user crashing their box | ||
| 70 | (ie. single user/desktop systems) and really want the DAWR. This | ||
| 71 | allows them to force enable DAWR. | ||
| 72 | |||
| 73 | This flag can also be used to disable DAWR access. Once this is | ||
| 74 | cleared, all DAWR access should be cleared immediately and your | ||
| 75 | machine once again safe from crashing. | ||
| 76 | |||
| 77 | Userspace may get confused by toggling this. If DAWR is force | ||
| 78 | enabled/disabled between getting the number of breakpoints (via | ||
| 79 | PTRACE_GETHWDBGINFO) and setting the breakpoint, userspace will get an | ||
| 80 | inconsistent view of what's available. Similarly for guests. | ||
| 81 | |||
| 82 | For the DAWR to be enabled in a KVM guest, the DAWR needs to be force | ||
| 83 | enabled in the host AND the guest. For this reason, this won't work on | ||
| 84 | POWERVM as it doesn't allow the HCALL to work. Writes of 'Y' to the | ||
| 85 | dawr_enable_dangerous file will fail if the hypervisor doesn't support | ||
| 86 | writing the DAWR. | ||
| 87 | |||
| 88 | To double check the DAWR is working, run this kernel selftest: | ||
| 89 | tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c | ||
| 90 | Any errors/failures/skips mean something is wrong. | ||
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 47a5eb00bc53..8ffd9beb931b 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
| @@ -1967,6 +1967,7 @@ registers, find a list below: | |||
| 1967 | PPC | KVM_REG_PPC_TLB3PS | 32 | 1967 | PPC | KVM_REG_PPC_TLB3PS | 32 |
| 1968 | PPC | KVM_REG_PPC_EPTCFG | 32 | 1968 | PPC | KVM_REG_PPC_EPTCFG | 32 |
| 1969 | PPC | KVM_REG_PPC_ICP_STATE | 64 | 1969 | PPC | KVM_REG_PPC_ICP_STATE | 64 |
| 1970 | PPC | KVM_REG_PPC_VP_STATE | 128 | ||
| 1970 | PPC | KVM_REG_PPC_TB_OFFSET | 64 | 1971 | PPC | KVM_REG_PPC_TB_OFFSET | 64 |
| 1971 | PPC | KVM_REG_PPC_SPMC1 | 32 | 1972 | PPC | KVM_REG_PPC_SPMC1 | 32 |
| 1972 | PPC | KVM_REG_PPC_SPMC2 | 32 | 1973 | PPC | KVM_REG_PPC_SPMC2 | 32 |
| @@ -4487,6 +4488,15 @@ struct kvm_sync_regs { | |||
| 4487 | struct kvm_vcpu_events events; | 4488 | struct kvm_vcpu_events events; |
| 4488 | }; | 4489 | }; |
| 4489 | 4490 | ||
| 4491 | 6.75 KVM_CAP_PPC_IRQ_XIVE | ||
| 4492 | |||
| 4493 | Architectures: ppc | ||
| 4494 | Target: vcpu | ||
| 4495 | Parameters: args[0] is the XIVE device fd | ||
| 4496 | args[1] is the XIVE CPU number (server ID) for this vcpu | ||
| 4497 | |||
| 4498 | This capability connects the vcpu to an in-kernel XIVE device. | ||
| 4499 | |||
| 4490 | 7. Capabilities that can be enabled on VMs | 4500 | 7. Capabilities that can be enabled on VMs |
| 4491 | ------------------------------------------ | 4501 | ------------------------------------------ |
| 4492 | 4502 | ||
diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virtual/kvm/devices/xive.txt new file mode 100644 index 000000000000..9a24a4525253 --- /dev/null +++ b/Documentation/virtual/kvm/devices/xive.txt | |||
| @@ -0,0 +1,197 @@ | |||
| 1 | POWER9 eXternal Interrupt Virtualization Engine (XIVE Gen1) | ||
| 2 | ========================================================== | ||
| 3 | |||
| 4 | Device types supported: | ||
| 5 | KVM_DEV_TYPE_XIVE POWER9 XIVE Interrupt Controller generation 1 | ||
| 6 | |||
| 7 | This device acts as a VM interrupt controller. It provides the KVM | ||
| 8 | interface to configure the interrupt sources of a VM in the underlying | ||
| 9 | POWER9 XIVE interrupt controller. | ||
| 10 | |||
| 11 | Only one XIVE instance may be instantiated. A guest XIVE device | ||
| 12 | requires a POWER9 host and the guest OS should have support for the | ||
| 13 | XIVE native exploitation interrupt mode. If not, it should run using | ||
| 14 | the legacy interrupt mode, referred as XICS (POWER7/8). | ||
| 15 | |||
| 16 | * Device Mappings | ||
| 17 | |||
| 18 | The KVM device exposes different MMIO ranges of the XIVE HW which | ||
| 19 | are required for interrupt management. These are exposed to the | ||
| 20 | guest in VMAs populated with a custom VM fault handler. | ||
| 21 | |||
| 22 | 1. Thread Interrupt Management Area (TIMA) | ||
| 23 | |||
| 24 | Each thread has an associated Thread Interrupt Management context | ||
| 25 | composed of a set of registers. These registers let the thread | ||
| 26 | handle priority management and interrupt acknowledgment. The most | ||
| 27 | important are : | ||
| 28 | |||
| 29 | - Interrupt Pending Buffer (IPB) | ||
| 30 | - Current Processor Priority (CPPR) | ||
| 31 | - Notification Source Register (NSR) | ||
| 32 | |||
| 33 | They are exposed to software in four different pages each proposing | ||
| 34 | a view with a different privilege. The first page is for the | ||
| 35 | physical thread context and the second for the hypervisor. Only the | ||
| 36 | third (operating system) and the fourth (user level) are exposed the | ||
| 37 | guest. | ||
| 38 | |||
| 39 | 2. Event State Buffer (ESB) | ||
| 40 | |||
| 41 | Each source is associated with an Event State Buffer (ESB) with | ||
| 42 | either a pair of even/odd pair of pages which provides commands to | ||
| 43 | manage the source: to trigger, to EOI, to turn off the source for | ||
| 44 | instance. | ||
| 45 | |||
| 46 | 3. Device pass-through | ||
| 47 | |||
| 48 | When a device is passed-through into the guest, the source | ||
| 49 | interrupts are from a different HW controller (PHB4) and the ESB | ||
| 50 | pages exposed to the guest should accommadate this change. | ||
| 51 | |||
| 52 | The passthru_irq helpers, kvmppc_xive_set_mapped() and | ||
| 53 | kvmppc_xive_clr_mapped() are called when the device HW irqs are | ||
| 54 | mapped into or unmapped from the guest IRQ number space. The KVM | ||
| 55 | device extends these helpers to clear the ESB pages of the guest IRQ | ||
| 56 | number being mapped and then lets the VM fault handler repopulate. | ||
| 57 | The handler will insert the ESB page corresponding to the HW | ||
| 58 | interrupt of the device being passed-through or the initial IPI ESB | ||
| 59 | page if the device has being removed. | ||
| 60 | |||
| 61 | The ESB remapping is fully transparent to the guest and the OS | ||
| 62 | device driver. All handling is done within VFIO and the above | ||
| 63 | helpers in KVM-PPC. | ||
| 64 | |||
| 65 | * Groups: | ||
| 66 | |||
| 67 | 1. KVM_DEV_XIVE_GRP_CTRL | ||
| 68 | Provides global controls on the device | ||
| 69 | Attributes: | ||
| 70 | 1.1 KVM_DEV_XIVE_RESET (write only) | ||
| 71 | Resets the interrupt controller configuration for sources and event | ||
| 72 | queues. To be used by kexec and kdump. | ||
| 73 | Errors: none | ||
| 74 | |||
| 75 | 1.2 KVM_DEV_XIVE_EQ_SYNC (write only) | ||
| 76 | Sync all the sources and queues and mark the EQ pages dirty. This | ||
| 77 | to make sure that a consistent memory state is captured when | ||
| 78 | migrating the VM. | ||
| 79 | Errors: none | ||
| 80 | |||
| 81 | 2. KVM_DEV_XIVE_GRP_SOURCE (write only) | ||
| 82 | Initializes a new source in the XIVE device and mask it. | ||
| 83 | Attributes: | ||
| 84 | Interrupt source number (64-bit) | ||
| 85 | The kvm_device_attr.addr points to a __u64 value: | ||
| 86 | bits: | 63 .... 2 | 1 | 0 | ||
| 87 | values: | unused | level | type | ||
| 88 | - type: 0:MSI 1:LSI | ||
| 89 | - level: assertion level in case of an LSI. | ||
| 90 | Errors: | ||
| 91 | -E2BIG: Interrupt source number is out of range | ||
| 92 | -ENOMEM: Could not create a new source block | ||
| 93 | -EFAULT: Invalid user pointer for attr->addr. | ||
| 94 | -ENXIO: Could not allocate underlying HW interrupt | ||
| 95 | |||
| 96 | 3. KVM_DEV_XIVE_GRP_SOURCE_CONFIG (write only) | ||
| 97 | Configures source targeting | ||
| 98 | Attributes: | ||
| 99 | Interrupt source number (64-bit) | ||
| 100 | The kvm_device_attr.addr points to a __u64 value: | ||
| 101 | bits: | 63 .... 33 | 32 | 31 .. 3 | 2 .. 0 | ||
| 102 | values: | eisn | mask | server | priority | ||
| 103 | - priority: 0-7 interrupt priority level | ||
| 104 | - server: CPU number chosen to handle the interrupt | ||
| 105 | - mask: mask flag (unused) | ||
| 106 | - eisn: Effective Interrupt Source Number | ||
| 107 | Errors: | ||
| 108 | -ENOENT: Unknown source number | ||
| 109 | -EINVAL: Not initialized source number | ||
| 110 | -EINVAL: Invalid priority | ||
| 111 | -EINVAL: Invalid CPU number. | ||
| 112 | -EFAULT: Invalid user pointer for attr->addr. | ||
| 113 | -ENXIO: CPU event queues not configured or configuration of the | ||
| 114 | underlying HW interrupt failed | ||
| 115 | -EBUSY: No CPU available to serve interrupt | ||
| 116 | |||
| 117 | 4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write) | ||
| 118 | Configures an event queue of a CPU | ||
| 119 | Attributes: | ||
| 120 | EQ descriptor identifier (64-bit) | ||
| 121 | The EQ descriptor identifier is a tuple (server, priority) : | ||
| 122 | bits: | 63 .... 32 | 31 .. 3 | 2 .. 0 | ||
| 123 | values: | unused | server | priority | ||
| 124 | The kvm_device_attr.addr points to : | ||
| 125 | struct kvm_ppc_xive_eq { | ||
| 126 | __u32 flags; | ||
| 127 | __u32 qshift; | ||
| 128 | __u64 qaddr; | ||
| 129 | __u32 qtoggle; | ||
| 130 | __u32 qindex; | ||
| 131 | __u8 pad[40]; | ||
| 132 | }; | ||
| 133 | - flags: queue flags | ||
| 134 | KVM_XIVE_EQ_ALWAYS_NOTIFY (required) | ||
| 135 | forces notification without using the coalescing mechanism | ||
| 136 | provided by the XIVE END ESBs. | ||
| 137 | - qshift: queue size (power of 2) | ||
| 138 | - qaddr: real address of queue | ||
| 139 | - qtoggle: current queue toggle bit | ||
| 140 | - qindex: current queue index | ||
| 141 | - pad: reserved for future use | ||
| 142 | Errors: | ||
| 143 | -ENOENT: Invalid CPU number | ||
| 144 | -EINVAL: Invalid priority | ||
| 145 | -EINVAL: Invalid flags | ||
| 146 | -EINVAL: Invalid queue size | ||
| 147 | -EINVAL: Invalid queue address | ||
| 148 | -EFAULT: Invalid user pointer for attr->addr. | ||
| 149 | -EIO: Configuration of the underlying HW failed | ||
| 150 | |||
| 151 | 5. KVM_DEV_XIVE_GRP_SOURCE_SYNC (write only) | ||
| 152 | Synchronize the source to flush event notifications | ||
| 153 | Attributes: | ||
| 154 | Interrupt source number (64-bit) | ||
| 155 | Errors: | ||
| 156 | -ENOENT: Unknown source number | ||
| 157 | -EINVAL: Not initialized source number | ||
| 158 | |||
| 159 | * VCPU state | ||
| 160 | |||
| 161 | The XIVE IC maintains VP interrupt state in an internal structure | ||
| 162 | called the NVT. When a VP is not dispatched on a HW processor | ||
| 163 | thread, this structure can be updated by HW if the VP is the target | ||
| 164 | of an event notification. | ||
| 165 | |||
| 166 | It is important for migration to capture the cached IPB from the NVT | ||
| 167 | as it synthesizes the priorities of the pending interrupts. We | ||
| 168 | capture a bit more to report debug information. | ||
| 169 | |||
| 170 | KVM_REG_PPC_VP_STATE (2 * 64bits) | ||
| 171 | bits: | 63 .... 32 | 31 .... 0 | | ||
| 172 | values: | TIMA word0 | TIMA word1 | | ||
| 173 | bits: | 127 .......... 64 | | ||
| 174 | values: | unused | | ||
| 175 | |||
| 176 | * Migration: | ||
| 177 | |||
| 178 | Saving the state of a VM using the XIVE native exploitation mode | ||
| 179 | should follow a specific sequence. When the VM is stopped : | ||
| 180 | |||
| 181 | 1. Mask all sources (PQ=01) to stop the flow of events. | ||
| 182 | |||
| 183 | 2. Sync the XIVE device with the KVM control KVM_DEV_XIVE_EQ_SYNC to | ||
| 184 | flush any in-flight event notification and to stabilize the EQs. At | ||
| 185 | this stage, the EQ pages are marked dirty to make sure they are | ||
| 186 | transferred in the migration sequence. | ||
| 187 | |||
| 188 | 3. Capture the state of the source targeting, the EQs configuration | ||
| 189 | and the state of thread interrupt context registers. | ||
| 190 | |||
| 191 | Restore is similar : | ||
| 192 | |||
| 193 | 1. Restore the EQ configuration. As targeting depends on it. | ||
| 194 | 2. Restore targeting | ||
| 195 | 3. Restore the thread interrupt contexts | ||
| 196 | 4. Restore the source states | ||
| 197 | 5. Let the vCPU run | ||
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index ece4dc89c90b..0fe8c1e46bbc 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h | |||
| @@ -90,10 +90,18 @@ static inline void hw_breakpoint_disable(void) | |||
| 90 | extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs); | 90 | extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs); |
| 91 | int hw_breakpoint_handler(struct die_args *args); | 91 | int hw_breakpoint_handler(struct die_args *args); |
| 92 | 92 | ||
| 93 | extern int set_dawr(struct arch_hw_breakpoint *brk); | ||
| 94 | extern bool dawr_force_enable; | ||
| 95 | static inline bool dawr_enabled(void) | ||
| 96 | { | ||
| 97 | return dawr_force_enable; | ||
| 98 | } | ||
| 99 | |||
| 93 | #else /* CONFIG_HAVE_HW_BREAKPOINT */ | 100 | #else /* CONFIG_HAVE_HW_BREAKPOINT */ |
| 94 | static inline void hw_breakpoint_disable(void) { } | 101 | static inline void hw_breakpoint_disable(void) { } |
| 95 | static inline void thread_change_pc(struct task_struct *tsk, | 102 | static inline void thread_change_pc(struct task_struct *tsk, |
| 96 | struct pt_regs *regs) { } | 103 | struct pt_regs *regs) { } |
| 104 | static inline bool dawr_enabled(void) { return false; } | ||
| 97 | #endif /* CONFIG_HAVE_HW_BREAKPOINT */ | 105 | #endif /* CONFIG_HAVE_HW_BREAKPOINT */ |
| 98 | #endif /* __KERNEL__ */ | 106 | #endif /* __KERNEL__ */ |
| 99 | #endif /* _PPC_BOOK3S_64_HW_BREAKPOINT_H */ | 107 | #endif /* _PPC_BOOK3S_64_HW_BREAKPOINT_H */ |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index e6b5bb012ccb..013c76a0a03e 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
| @@ -201,6 +201,8 @@ struct kvmppc_spapr_tce_iommu_table { | |||
| 201 | struct kref kref; | 201 | struct kref kref; |
| 202 | }; | 202 | }; |
| 203 | 203 | ||
| 204 | #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) | ||
| 205 | |||
| 204 | struct kvmppc_spapr_tce_table { | 206 | struct kvmppc_spapr_tce_table { |
| 205 | struct list_head list; | 207 | struct list_head list; |
| 206 | struct kvm *kvm; | 208 | struct kvm *kvm; |
| @@ -210,6 +212,7 @@ struct kvmppc_spapr_tce_table { | |||
| 210 | u64 offset; /* in pages */ | 212 | u64 offset; /* in pages */ |
| 211 | u64 size; /* window size in pages */ | 213 | u64 size; /* window size in pages */ |
| 212 | struct list_head iommu_tables; | 214 | struct list_head iommu_tables; |
| 215 | struct mutex alloc_lock; | ||
| 213 | struct page *pages[0]; | 216 | struct page *pages[0]; |
| 214 | }; | 217 | }; |
| 215 | 218 | ||
| @@ -222,6 +225,7 @@ extern struct kvm_device_ops kvm_xics_ops; | |||
| 222 | struct kvmppc_xive; | 225 | struct kvmppc_xive; |
| 223 | struct kvmppc_xive_vcpu; | 226 | struct kvmppc_xive_vcpu; |
| 224 | extern struct kvm_device_ops kvm_xive_ops; | 227 | extern struct kvm_device_ops kvm_xive_ops; |
| 228 | extern struct kvm_device_ops kvm_xive_native_ops; | ||
| 225 | 229 | ||
| 226 | struct kvmppc_passthru_irqmap; | 230 | struct kvmppc_passthru_irqmap; |
| 227 | 231 | ||
| @@ -312,7 +316,11 @@ struct kvm_arch { | |||
| 312 | #endif | 316 | #endif |
| 313 | #ifdef CONFIG_KVM_XICS | 317 | #ifdef CONFIG_KVM_XICS |
| 314 | struct kvmppc_xics *xics; | 318 | struct kvmppc_xics *xics; |
| 315 | struct kvmppc_xive *xive; | 319 | struct kvmppc_xive *xive; /* Current XIVE device in use */ |
| 320 | struct { | ||
| 321 | struct kvmppc_xive *native; | ||
| 322 | struct kvmppc_xive *xics_on_xive; | ||
| 323 | } xive_devices; | ||
| 316 | struct kvmppc_passthru_irqmap *pimap; | 324 | struct kvmppc_passthru_irqmap *pimap; |
| 317 | #endif | 325 | #endif |
| 318 | struct kvmppc_ops *kvm_ops; | 326 | struct kvmppc_ops *kvm_ops; |
| @@ -449,6 +457,7 @@ struct kvmppc_passthru_irqmap { | |||
| 449 | #define KVMPPC_IRQ_DEFAULT 0 | 457 | #define KVMPPC_IRQ_DEFAULT 0 |
| 450 | #define KVMPPC_IRQ_MPIC 1 | 458 | #define KVMPPC_IRQ_MPIC 1 |
| 451 | #define KVMPPC_IRQ_XICS 2 /* Includes a XIVE option */ | 459 | #define KVMPPC_IRQ_XICS 2 /* Includes a XIVE option */ |
| 460 | #define KVMPPC_IRQ_XIVE 3 /* XIVE native exploitation mode */ | ||
| 452 | 461 | ||
| 453 | #define MMIO_HPTE_CACHE_SIZE 4 | 462 | #define MMIO_HPTE_CACHE_SIZE 4 |
| 454 | 463 | ||
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index ac22b28ae78d..bc892380e6cd 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
| @@ -197,10 +197,6 @@ extern struct kvmppc_spapr_tce_table *kvmppc_find_table( | |||
| 197 | (iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \ | 197 | (iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \ |
| 198 | (stt)->size, (ioba), (npages)) ? \ | 198 | (stt)->size, (ioba), (npages)) ? \ |
| 199 | H_PARAMETER : H_SUCCESS) | 199 | H_PARAMETER : H_SUCCESS) |
| 200 | extern long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce, | ||
| 201 | unsigned long *ua, unsigned long **prmap); | ||
| 202 | extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt, | ||
| 203 | unsigned long idx, unsigned long tce); | ||
| 204 | extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | 200 | extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, |
| 205 | unsigned long ioba, unsigned long tce); | 201 | unsigned long ioba, unsigned long tce); |
| 206 | extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, | 202 | extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, |
| @@ -273,6 +269,7 @@ union kvmppc_one_reg { | |||
| 273 | u64 addr; | 269 | u64 addr; |
| 274 | u64 length; | 270 | u64 length; |
| 275 | } vpaval; | 271 | } vpaval; |
| 272 | u64 xive_timaval[2]; | ||
| 276 | }; | 273 | }; |
| 277 | 274 | ||
| 278 | struct kvmppc_ops { | 275 | struct kvmppc_ops { |
| @@ -480,6 +477,9 @@ extern void kvm_hv_vm_activated(void); | |||
| 480 | extern void kvm_hv_vm_deactivated(void); | 477 | extern void kvm_hv_vm_deactivated(void); |
| 481 | extern bool kvm_hv_mode_active(void); | 478 | extern bool kvm_hv_mode_active(void); |
| 482 | 479 | ||
| 480 | extern void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu, | ||
| 481 | struct kvm_nested_guest *nested); | ||
| 482 | |||
| 483 | #else | 483 | #else |
| 484 | static inline void __init kvm_cma_reserve(void) | 484 | static inline void __init kvm_cma_reserve(void) |
| 485 | {} | 485 | {} |
| @@ -594,6 +594,22 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval); | |||
| 594 | extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, | 594 | extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, |
| 595 | int level, bool line_status); | 595 | int level, bool line_status); |
| 596 | extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu); | 596 | extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu); |
| 597 | |||
| 598 | static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu) | ||
| 599 | { | ||
| 600 | return vcpu->arch.irq_type == KVMPPC_IRQ_XIVE; | ||
| 601 | } | ||
| 602 | |||
| 603 | extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, | ||
| 604 | struct kvm_vcpu *vcpu, u32 cpu); | ||
| 605 | extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu); | ||
| 606 | extern void kvmppc_xive_native_init_module(void); | ||
| 607 | extern void kvmppc_xive_native_exit_module(void); | ||
| 608 | extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, | ||
| 609 | union kvmppc_one_reg *val); | ||
| 610 | extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, | ||
| 611 | union kvmppc_one_reg *val); | ||
| 612 | |||
| 597 | #else | 613 | #else |
| 598 | static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server, | 614 | static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server, |
| 599 | u32 priority) { return -1; } | 615 | u32 priority) { return -1; } |
| @@ -617,6 +633,21 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur | |||
| 617 | static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, | 633 | static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, |
| 618 | int level, bool line_status) { return -ENODEV; } | 634 | int level, bool line_status) { return -ENODEV; } |
| 619 | static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { } | 635 | static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { } |
| 636 | |||
| 637 | static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu) | ||
| 638 | { return 0; } | ||
| 639 | static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, | ||
| 640 | struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; } | ||
| 641 | static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { } | ||
| 642 | static inline void kvmppc_xive_native_init_module(void) { } | ||
| 643 | static inline void kvmppc_xive_native_exit_module(void) { } | ||
| 644 | static inline int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, | ||
| 645 | union kvmppc_one_reg *val) | ||
| 646 | { return 0; } | ||
| 647 | static inline int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, | ||
| 648 | union kvmppc_one_reg *val) | ||
| 649 | { return -ENOENT; } | ||
| 650 | |||
| 620 | #endif /* CONFIG_KVM_XIVE */ | 651 | #endif /* CONFIG_KVM_XIVE */ |
| 621 | 652 | ||
| 622 | #if defined(CONFIG_PPC_POWERNV) && defined(CONFIG_KVM_BOOK3S_64_HANDLER) | 653 | #if defined(CONFIG_PPC_POWERNV) && defined(CONFIG_KVM_BOOK3S_64_HANDLER) |
| @@ -665,6 +696,8 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, | |||
| 665 | unsigned long pte_index); | 696 | unsigned long pte_index); |
| 666 | long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, | 697 | long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, |
| 667 | unsigned long pte_index); | 698 | unsigned long pte_index); |
| 699 | long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags, | ||
| 700 | unsigned long dest, unsigned long src); | ||
| 668 | long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, | 701 | long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, |
| 669 | unsigned long slb_v, unsigned int status, bool data); | 702 | unsigned long slb_v, unsigned int status, bool data); |
| 670 | unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu); | 703 | unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu); |
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 870fb7b239ea..e1d118ac61dc 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h | |||
| @@ -186,8 +186,8 @@ | |||
| 186 | #define OPAL_XIVE_FREE_IRQ 140 | 186 | #define OPAL_XIVE_FREE_IRQ 140 |
| 187 | #define OPAL_XIVE_SYNC 141 | 187 | #define OPAL_XIVE_SYNC 141 |
| 188 | #define OPAL_XIVE_DUMP 142 | 188 | #define OPAL_XIVE_DUMP 142 |
| 189 | #define OPAL_XIVE_RESERVED3 143 | 189 | #define OPAL_XIVE_GET_QUEUE_STATE 143 |
| 190 | #define OPAL_XIVE_RESERVED4 144 | 190 | #define OPAL_XIVE_SET_QUEUE_STATE 144 |
| 191 | #define OPAL_SIGNAL_SYSTEM_RESET 145 | 191 | #define OPAL_SIGNAL_SYSTEM_RESET 145 |
| 192 | #define OPAL_NPU_INIT_CONTEXT 146 | 192 | #define OPAL_NPU_INIT_CONTEXT 146 |
| 193 | #define OPAL_NPU_DESTROY_CONTEXT 147 | 193 | #define OPAL_NPU_DESTROY_CONTEXT 147 |
| @@ -210,7 +210,8 @@ | |||
| 210 | #define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164 | 210 | #define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164 |
| 211 | #define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165 | 211 | #define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165 |
| 212 | #define OPAL_NX_COPROC_INIT 167 | 212 | #define OPAL_NX_COPROC_INIT 167 |
| 213 | #define OPAL_LAST 167 | 213 | #define OPAL_XIVE_GET_VP_STATE 170 |
| 214 | #define OPAL_LAST 170 | ||
| 214 | 215 | ||
| 215 | #define QUIESCE_HOLD 1 /* Spin all calls at entry */ | 216 | #define QUIESCE_HOLD 1 /* Spin all calls at entry */ |
| 216 | #define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */ | 217 | #define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */ |
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index a55b01c90bb1..4e978d4dea5c 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h | |||
| @@ -279,6 +279,13 @@ int64_t opal_xive_allocate_irq(uint32_t chip_id); | |||
| 279 | int64_t opal_xive_free_irq(uint32_t girq); | 279 | int64_t opal_xive_free_irq(uint32_t girq); |
| 280 | int64_t opal_xive_sync(uint32_t type, uint32_t id); | 280 | int64_t opal_xive_sync(uint32_t type, uint32_t id); |
| 281 | int64_t opal_xive_dump(uint32_t type, uint32_t id); | 281 | int64_t opal_xive_dump(uint32_t type, uint32_t id); |
| 282 | int64_t opal_xive_get_queue_state(uint64_t vp, uint32_t prio, | ||
| 283 | __be32 *out_qtoggle, | ||
| 284 | __be32 *out_qindex); | ||
| 285 | int64_t opal_xive_set_queue_state(uint64_t vp, uint32_t prio, | ||
| 286 | uint32_t qtoggle, | ||
| 287 | uint32_t qindex); | ||
| 288 | int64_t opal_xive_get_vp_state(uint64_t vp, __be64 *out_w01); | ||
| 282 | int64_t opal_pci_set_p2p(uint64_t phb_init, uint64_t phb_target, | 289 | int64_t opal_pci_set_p2p(uint64_t phb_init, uint64_t phb_target, |
| 283 | uint64_t desc, uint16_t pe_number); | 290 | uint64_t desc, uint16_t pe_number); |
| 284 | 291 | ||
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 3c704f5dd3ae..eaf76f57023a 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | * same offset regardless of where the code is executing | 23 | * same offset regardless of where the code is executing |
| 24 | */ | 24 | */ |
| 25 | extern void __iomem *xive_tima; | 25 | extern void __iomem *xive_tima; |
| 26 | extern unsigned long xive_tima_os; | ||
| 26 | 27 | ||
| 27 | /* | 28 | /* |
| 28 | * Offset in the TM area of our current execution level (provided by | 29 | * Offset in the TM area of our current execution level (provided by |
| @@ -73,6 +74,8 @@ struct xive_q { | |||
| 73 | u32 esc_irq; | 74 | u32 esc_irq; |
| 74 | atomic_t count; | 75 | atomic_t count; |
| 75 | atomic_t pending_count; | 76 | atomic_t pending_count; |
| 77 | u64 guest_qaddr; | ||
| 78 | u32 guest_qshift; | ||
| 76 | }; | 79 | }; |
| 77 | 80 | ||
| 78 | /* Global enable flags for the XIVE support */ | 81 | /* Global enable flags for the XIVE support */ |
| @@ -109,12 +112,26 @@ extern int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, | |||
| 109 | extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio); | 112 | extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio); |
| 110 | 113 | ||
| 111 | extern void xive_native_sync_source(u32 hw_irq); | 114 | extern void xive_native_sync_source(u32 hw_irq); |
| 115 | extern void xive_native_sync_queue(u32 hw_irq); | ||
| 112 | extern bool is_xive_irq(struct irq_chip *chip); | 116 | extern bool is_xive_irq(struct irq_chip *chip); |
| 113 | extern int xive_native_enable_vp(u32 vp_id, bool single_escalation); | 117 | extern int xive_native_enable_vp(u32 vp_id, bool single_escalation); |
| 114 | extern int xive_native_disable_vp(u32 vp_id); | 118 | extern int xive_native_disable_vp(u32 vp_id); |
| 115 | extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id); | 119 | extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id); |
| 116 | extern bool xive_native_has_single_escalation(void); | 120 | extern bool xive_native_has_single_escalation(void); |
| 117 | 121 | ||
| 122 | extern int xive_native_get_queue_info(u32 vp_id, uint32_t prio, | ||
| 123 | u64 *out_qpage, | ||
| 124 | u64 *out_qsize, | ||
| 125 | u64 *out_qeoi_page, | ||
| 126 | u32 *out_escalate_irq, | ||
| 127 | u64 *out_qflags); | ||
| 128 | |||
| 129 | extern int xive_native_get_queue_state(u32 vp_id, uint32_t prio, u32 *qtoggle, | ||
| 130 | u32 *qindex); | ||
| 131 | extern int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle, | ||
| 132 | u32 qindex); | ||
| 133 | extern int xive_native_get_vp_state(u32 vp_id, u64 *out_state); | ||
| 134 | |||
| 118 | #else | 135 | #else |
| 119 | 136 | ||
| 120 | static inline bool xive_enabled(void) { return false; } | 137 | static inline bool xive_enabled(void) { return false; } |
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 26ca425f4c2c..b0f72dea8b11 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h | |||
| @@ -482,6 +482,8 @@ struct kvm_ppc_cpu_char { | |||
| 482 | #define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */ | 482 | #define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */ |
| 483 | #define KVM_REG_PPC_ICP_PPRI_MASK 0xff | 483 | #define KVM_REG_PPC_ICP_PPRI_MASK 0xff |
| 484 | 484 | ||
| 485 | #define KVM_REG_PPC_VP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x8d) | ||
| 486 | |||
| 485 | /* Device control API: PPC-specific devices */ | 487 | /* Device control API: PPC-specific devices */ |
| 486 | #define KVM_DEV_MPIC_GRP_MISC 1 | 488 | #define KVM_DEV_MPIC_GRP_MISC 1 |
| 487 | #define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */ | 489 | #define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */ |
| @@ -677,4 +679,48 @@ struct kvm_ppc_cpu_char { | |||
| 677 | #define KVM_XICS_PRESENTED (1ULL << 43) | 679 | #define KVM_XICS_PRESENTED (1ULL << 43) |
| 678 | #define KVM_XICS_QUEUED (1ULL << 44) | 680 | #define KVM_XICS_QUEUED (1ULL << 44) |
| 679 | 681 | ||
| 682 | /* POWER9 XIVE Native Interrupt Controller */ | ||
| 683 | #define KVM_DEV_XIVE_GRP_CTRL 1 | ||
| 684 | #define KVM_DEV_XIVE_RESET 1 | ||
| 685 | #define KVM_DEV_XIVE_EQ_SYNC 2 | ||
| 686 | #define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source identifier */ | ||
| 687 | #define KVM_DEV_XIVE_GRP_SOURCE_CONFIG 3 /* 64-bit source identifier */ | ||
| 688 | #define KVM_DEV_XIVE_GRP_EQ_CONFIG 4 /* 64-bit EQ identifier */ | ||
| 689 | #define KVM_DEV_XIVE_GRP_SOURCE_SYNC 5 /* 64-bit source identifier */ | ||
| 690 | |||
| 691 | /* Layout of 64-bit XIVE source attribute values */ | ||
| 692 | #define KVM_XIVE_LEVEL_SENSITIVE (1ULL << 0) | ||
| 693 | #define KVM_XIVE_LEVEL_ASSERTED (1ULL << 1) | ||
| 694 | |||
| 695 | /* Layout of 64-bit XIVE source configuration attribute values */ | ||
| 696 | #define KVM_XIVE_SOURCE_PRIORITY_SHIFT 0 | ||
| 697 | #define KVM_XIVE_SOURCE_PRIORITY_MASK 0x7 | ||
| 698 | #define KVM_XIVE_SOURCE_SERVER_SHIFT 3 | ||
| 699 | #define KVM_XIVE_SOURCE_SERVER_MASK 0xfffffff8ULL | ||
| 700 | #define KVM_XIVE_SOURCE_MASKED_SHIFT 32 | ||
| 701 | #define KVM_XIVE_SOURCE_MASKED_MASK 0x100000000ULL | ||
| 702 | #define KVM_XIVE_SOURCE_EISN_SHIFT 33 | ||
| 703 | #define KVM_XIVE_SOURCE_EISN_MASK 0xfffffffe00000000ULL | ||
| 704 | |||
| 705 | /* Layout of 64-bit EQ identifier */ | ||
| 706 | #define KVM_XIVE_EQ_PRIORITY_SHIFT 0 | ||
| 707 | #define KVM_XIVE_EQ_PRIORITY_MASK 0x7 | ||
| 708 | #define KVM_XIVE_EQ_SERVER_SHIFT 3 | ||
| 709 | #define KVM_XIVE_EQ_SERVER_MASK 0xfffffff8ULL | ||
| 710 | |||
| 711 | /* Layout of EQ configuration values (64 bytes) */ | ||
| 712 | struct kvm_ppc_xive_eq { | ||
| 713 | __u32 flags; | ||
| 714 | __u32 qshift; | ||
| 715 | __u64 qaddr; | ||
| 716 | __u32 qtoggle; | ||
| 717 | __u32 qindex; | ||
| 718 | __u8 pad[40]; | ||
| 719 | }; | ||
| 720 | |||
| 721 | #define KVM_XIVE_EQ_ALWAYS_NOTIFY 0x00000001 | ||
| 722 | |||
| 723 | #define KVM_XIVE_TIMA_PAGE_OFFSET 0 | ||
| 724 | #define KVM_XIVE_ESB_PAGE_OFFSET 4 | ||
| 725 | |||
| 680 | #endif /* __LINUX_KVM_POWERPC_H */ | 726 | #endif /* __LINUX_KVM_POWERPC_H */ |
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index fec8a6773119..da307dd93ee3 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c | |||
| @@ -29,11 +29,15 @@ | |||
| 29 | #include <linux/kernel.h> | 29 | #include <linux/kernel.h> |
| 30 | #include <linux/sched.h> | 30 | #include <linux/sched.h> |
| 31 | #include <linux/smp.h> | 31 | #include <linux/smp.h> |
| 32 | #include <linux/debugfs.h> | ||
| 33 | #include <linux/init.h> | ||
| 32 | 34 | ||
| 33 | #include <asm/hw_breakpoint.h> | 35 | #include <asm/hw_breakpoint.h> |
| 34 | #include <asm/processor.h> | 36 | #include <asm/processor.h> |
| 35 | #include <asm/sstep.h> | 37 | #include <asm/sstep.h> |
| 36 | #include <asm/debug.h> | 38 | #include <asm/debug.h> |
| 39 | #include <asm/debugfs.h> | ||
| 40 | #include <asm/hvcall.h> | ||
| 37 | #include <linux/uaccess.h> | 41 | #include <linux/uaccess.h> |
| 38 | 42 | ||
| 39 | /* | 43 | /* |
| @@ -174,7 +178,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, | |||
| 174 | if (!ppc_breakpoint_available()) | 178 | if (!ppc_breakpoint_available()) |
| 175 | return -ENODEV; | 179 | return -ENODEV; |
| 176 | length_max = 8; /* DABR */ | 180 | length_max = 8; /* DABR */ |
| 177 | if (cpu_has_feature(CPU_FTR_DAWR)) { | 181 | if (dawr_enabled()) { |
| 178 | length_max = 512 ; /* 64 doublewords */ | 182 | length_max = 512 ; /* 64 doublewords */ |
| 179 | /* DAWR region can't cross 512 boundary */ | 183 | /* DAWR region can't cross 512 boundary */ |
| 180 | if ((attr->bp_addr >> 9) != | 184 | if ((attr->bp_addr >> 9) != |
| @@ -376,3 +380,59 @@ void hw_breakpoint_pmu_read(struct perf_event *bp) | |||
| 376 | { | 380 | { |
| 377 | /* TODO */ | 381 | /* TODO */ |
| 378 | } | 382 | } |
| 383 | |||
| 384 | bool dawr_force_enable; | ||
| 385 | EXPORT_SYMBOL_GPL(dawr_force_enable); | ||
| 386 | |||
| 387 | static ssize_t dawr_write_file_bool(struct file *file, | ||
| 388 | const char __user *user_buf, | ||
| 389 | size_t count, loff_t *ppos) | ||
| 390 | { | ||
| 391 | struct arch_hw_breakpoint null_brk = {0, 0, 0}; | ||
| 392 | size_t rc; | ||
| 393 | |||
| 394 | /* Send error to user if they hypervisor won't allow us to write DAWR */ | ||
| 395 | if ((!dawr_force_enable) && | ||
| 396 | (firmware_has_feature(FW_FEATURE_LPAR)) && | ||
| 397 | (set_dawr(&null_brk) != H_SUCCESS)) | ||
| 398 | return -1; | ||
| 399 | |||
| 400 | rc = debugfs_write_file_bool(file, user_buf, count, ppos); | ||
| 401 | if (rc) | ||
| 402 | return rc; | ||
| 403 | |||
| 404 | /* If we are clearing, make sure all CPUs have the DAWR cleared */ | ||
| 405 | if (!dawr_force_enable) | ||
| 406 | smp_call_function((smp_call_func_t)set_dawr, &null_brk, 0); | ||
| 407 | |||
| 408 | return rc; | ||
| 409 | } | ||
| 410 | |||
| 411 | static const struct file_operations dawr_enable_fops = { | ||
| 412 | .read = debugfs_read_file_bool, | ||
| 413 | .write = dawr_write_file_bool, | ||
| 414 | .open = simple_open, | ||
| 415 | .llseek = default_llseek, | ||
| 416 | }; | ||
| 417 | |||
| 418 | static int __init dawr_force_setup(void) | ||
| 419 | { | ||
| 420 | dawr_force_enable = false; | ||
| 421 | |||
| 422 | if (cpu_has_feature(CPU_FTR_DAWR)) { | ||
| 423 | /* Don't setup sysfs file for user control on P8 */ | ||
| 424 | dawr_force_enable = true; | ||
| 425 | return 0; | ||
| 426 | } | ||
| 427 | |||
| 428 | if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) { | ||
| 429 | /* Turn DAWR off by default, but allow admin to turn it on */ | ||
| 430 | dawr_force_enable = false; | ||
| 431 | debugfs_create_file_unsafe("dawr_enable_dangerous", 0600, | ||
| 432 | powerpc_debugfs_root, | ||
| 433 | &dawr_force_enable, | ||
| 434 | &dawr_enable_fops); | ||
| 435 | } | ||
| 436 | return 0; | ||
| 437 | } | ||
| 438 | arch_initcall(dawr_force_setup); | ||
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index dd9e0d5386ee..225705aac814 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c | |||
| @@ -67,6 +67,7 @@ | |||
| 67 | #include <asm/cpu_has_feature.h> | 67 | #include <asm/cpu_has_feature.h> |
| 68 | #include <asm/asm-prototypes.h> | 68 | #include <asm/asm-prototypes.h> |
| 69 | #include <asm/stacktrace.h> | 69 | #include <asm/stacktrace.h> |
| 70 | #include <asm/hw_breakpoint.h> | ||
| 70 | 71 | ||
| 71 | #include <linux/kprobes.h> | 72 | #include <linux/kprobes.h> |
| 72 | #include <linux/kdebug.h> | 73 | #include <linux/kdebug.h> |
| @@ -784,7 +785,7 @@ static inline int set_dabr(struct arch_hw_breakpoint *brk) | |||
| 784 | return __set_dabr(dabr, dabrx); | 785 | return __set_dabr(dabr, dabrx); |
| 785 | } | 786 | } |
| 786 | 787 | ||
| 787 | static inline int set_dawr(struct arch_hw_breakpoint *brk) | 788 | int set_dawr(struct arch_hw_breakpoint *brk) |
| 788 | { | 789 | { |
| 789 | unsigned long dawr, dawrx, mrd; | 790 | unsigned long dawr, dawrx, mrd; |
| 790 | 791 | ||
| @@ -816,7 +817,7 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk) | |||
| 816 | { | 817 | { |
| 817 | memcpy(this_cpu_ptr(¤t_brk), brk, sizeof(*brk)); | 818 | memcpy(this_cpu_ptr(¤t_brk), brk, sizeof(*brk)); |
| 818 | 819 | ||
| 819 | if (cpu_has_feature(CPU_FTR_DAWR)) | 820 | if (dawr_enabled()) |
| 820 | // Power8 or later | 821 | // Power8 or later |
| 821 | set_dawr(brk); | 822 | set_dawr(brk); |
| 822 | else if (!cpu_has_feature(CPU_FTR_ARCH_207S)) | 823 | else if (!cpu_has_feature(CPU_FTR_ARCH_207S)) |
| @@ -830,8 +831,8 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk) | |||
| 830 | /* Check if we have DAWR or DABR hardware */ | 831 | /* Check if we have DAWR or DABR hardware */ |
| 831 | bool ppc_breakpoint_available(void) | 832 | bool ppc_breakpoint_available(void) |
| 832 | { | 833 | { |
| 833 | if (cpu_has_feature(CPU_FTR_DAWR)) | 834 | if (dawr_enabled()) |
| 834 | return true; /* POWER8 DAWR */ | 835 | return true; /* POWER8 DAWR or POWER9 forced DAWR */ |
| 835 | if (cpu_has_feature(CPU_FTR_ARCH_207S)) | 836 | if (cpu_has_feature(CPU_FTR_ARCH_207S)) |
| 836 | return false; /* POWER9 with DAWR disabled */ | 837 | return false; /* POWER9 with DAWR disabled */ |
| 837 | /* DABR: Everything but POWER8 and POWER9 */ | 838 | /* DABR: Everything but POWER8 and POWER9 */ |
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index d9ac7d94656e..684b0b315c32 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c | |||
| @@ -43,6 +43,7 @@ | |||
| 43 | #include <asm/tm.h> | 43 | #include <asm/tm.h> |
| 44 | #include <asm/asm-prototypes.h> | 44 | #include <asm/asm-prototypes.h> |
| 45 | #include <asm/debug.h> | 45 | #include <asm/debug.h> |
| 46 | #include <asm/hw_breakpoint.h> | ||
| 46 | 47 | ||
| 47 | #define CREATE_TRACE_POINTS | 48 | #define CREATE_TRACE_POINTS |
| 48 | #include <trace/events/syscalls.h> | 49 | #include <trace/events/syscalls.h> |
| @@ -3088,7 +3089,7 @@ long arch_ptrace(struct task_struct *child, long request, | |||
| 3088 | dbginfo.sizeof_condition = 0; | 3089 | dbginfo.sizeof_condition = 0; |
| 3089 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | 3090 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
| 3090 | dbginfo.features = PPC_DEBUG_FEATURE_DATA_BP_RANGE; | 3091 | dbginfo.features = PPC_DEBUG_FEATURE_DATA_BP_RANGE; |
| 3091 | if (cpu_has_feature(CPU_FTR_DAWR)) | 3092 | if (dawr_enabled()) |
| 3092 | dbginfo.features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR; | 3093 | dbginfo.features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR; |
| 3093 | #else | 3094 | #else |
| 3094 | dbginfo.features = 0; | 3095 | dbginfo.features = 0; |
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 3223aec88b2c..4c67cc79de7c 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile | |||
| @@ -94,7 +94,7 @@ endif | |||
| 94 | kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ | 94 | kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ |
| 95 | book3s_xics.o | 95 | book3s_xics.o |
| 96 | 96 | ||
| 97 | kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o | 97 | kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o book3s_xive_native.o |
| 98 | kvm-book3s_64-objs-$(CONFIG_SPAPR_TCE_IOMMU) += book3s_64_vio.o | 98 | kvm-book3s_64-objs-$(CONFIG_SPAPR_TCE_IOMMU) += book3s_64_vio.o |
| 99 | 99 | ||
| 100 | kvm-book3s_64-module-objs := \ | 100 | kvm-book3s_64-module-objs := \ |
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 10c5579d20ce..61a212d0daf0 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c | |||
| @@ -651,6 +651,18 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, | |||
| 651 | *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu)); | 651 | *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu)); |
| 652 | break; | 652 | break; |
| 653 | #endif /* CONFIG_KVM_XICS */ | 653 | #endif /* CONFIG_KVM_XICS */ |
| 654 | #ifdef CONFIG_KVM_XIVE | ||
| 655 | case KVM_REG_PPC_VP_STATE: | ||
| 656 | if (!vcpu->arch.xive_vcpu) { | ||
| 657 | r = -ENXIO; | ||
| 658 | break; | ||
| 659 | } | ||
| 660 | if (xive_enabled()) | ||
| 661 | r = kvmppc_xive_native_get_vp(vcpu, val); | ||
| 662 | else | ||
| 663 | r = -ENXIO; | ||
| 664 | break; | ||
| 665 | #endif /* CONFIG_KVM_XIVE */ | ||
| 654 | case KVM_REG_PPC_FSCR: | 666 | case KVM_REG_PPC_FSCR: |
| 655 | *val = get_reg_val(id, vcpu->arch.fscr); | 667 | *val = get_reg_val(id, vcpu->arch.fscr); |
| 656 | break; | 668 | break; |
| @@ -724,6 +736,18 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, | |||
| 724 | r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val)); | 736 | r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val)); |
| 725 | break; | 737 | break; |
| 726 | #endif /* CONFIG_KVM_XICS */ | 738 | #endif /* CONFIG_KVM_XICS */ |
| 739 | #ifdef CONFIG_KVM_XIVE | ||
| 740 | case KVM_REG_PPC_VP_STATE: | ||
| 741 | if (!vcpu->arch.xive_vcpu) { | ||
| 742 | r = -ENXIO; | ||
| 743 | break; | ||
| 744 | } | ||
| 745 | if (xive_enabled()) | ||
| 746 | r = kvmppc_xive_native_set_vp(vcpu, val); | ||
| 747 | else | ||
| 748 | r = -ENXIO; | ||
| 749 | break; | ||
| 750 | #endif /* CONFIG_KVM_XIVE */ | ||
| 727 | case KVM_REG_PPC_FSCR: | 751 | case KVM_REG_PPC_FSCR: |
| 728 | vcpu->arch.fscr = set_reg_val(id, *val); | 752 | vcpu->arch.fscr = set_reg_val(id, *val); |
| 729 | break; | 753 | break; |
| @@ -891,6 +915,17 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) | |||
| 891 | kvmppc_rtas_tokens_free(kvm); | 915 | kvmppc_rtas_tokens_free(kvm); |
| 892 | WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); | 916 | WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); |
| 893 | #endif | 917 | #endif |
| 918 | |||
| 919 | #ifdef CONFIG_KVM_XICS | ||
| 920 | /* | ||
| 921 | * Free the XIVE devices which are not directly freed by the | ||
| 922 | * device 'release' method | ||
| 923 | */ | ||
| 924 | kfree(kvm->arch.xive_devices.native); | ||
| 925 | kvm->arch.xive_devices.native = NULL; | ||
| 926 | kfree(kvm->arch.xive_devices.xics_on_xive); | ||
| 927 | kvm->arch.xive_devices.xics_on_xive = NULL; | ||
| 928 | #endif /* CONFIG_KVM_XICS */ | ||
| 894 | } | 929 | } |
| 895 | 930 | ||
| 896 | int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu) | 931 | int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu) |
| @@ -1050,6 +1085,9 @@ static int kvmppc_book3s_init(void) | |||
| 1050 | if (xics_on_xive()) { | 1085 | if (xics_on_xive()) { |
| 1051 | kvmppc_xive_init_module(); | 1086 | kvmppc_xive_init_module(); |
| 1052 | kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS); | 1087 | kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS); |
| 1088 | kvmppc_xive_native_init_module(); | ||
| 1089 | kvm_register_device_ops(&kvm_xive_native_ops, | ||
| 1090 | KVM_DEV_TYPE_XIVE); | ||
| 1053 | } else | 1091 | } else |
| 1054 | #endif | 1092 | #endif |
| 1055 | kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS); | 1093 | kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS); |
| @@ -1060,8 +1098,10 @@ static int kvmppc_book3s_init(void) | |||
| 1060 | static void kvmppc_book3s_exit(void) | 1098 | static void kvmppc_book3s_exit(void) |
| 1061 | { | 1099 | { |
| 1062 | #ifdef CONFIG_KVM_XICS | 1100 | #ifdef CONFIG_KVM_XICS |
| 1063 | if (xics_on_xive()) | 1101 | if (xics_on_xive()) { |
| 1064 | kvmppc_xive_exit_module(); | 1102 | kvmppc_xive_exit_module(); |
| 1103 | kvmppc_xive_native_exit_module(); | ||
| 1104 | } | ||
| 1065 | #endif | 1105 | #endif |
| 1066 | #ifdef CONFIG_KVM_BOOK3S_32_HANDLER | 1106 | #ifdef CONFIG_KVM_BOOK3S_32_HANDLER |
| 1067 | kvmppc_book3s_exit_pr(); | 1107 | kvmppc_book3s_exit_pr(); |
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index f02b04973710..66270e07449a 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c | |||
| @@ -228,11 +228,33 @@ static void release_spapr_tce_table(struct rcu_head *head) | |||
| 228 | unsigned long i, npages = kvmppc_tce_pages(stt->size); | 228 | unsigned long i, npages = kvmppc_tce_pages(stt->size); |
| 229 | 229 | ||
| 230 | for (i = 0; i < npages; i++) | 230 | for (i = 0; i < npages; i++) |
| 231 | __free_page(stt->pages[i]); | 231 | if (stt->pages[i]) |
| 232 | __free_page(stt->pages[i]); | ||
| 232 | 233 | ||
| 233 | kfree(stt); | 234 | kfree(stt); |
| 234 | } | 235 | } |
| 235 | 236 | ||
| 237 | static struct page *kvm_spapr_get_tce_page(struct kvmppc_spapr_tce_table *stt, | ||
| 238 | unsigned long sttpage) | ||
| 239 | { | ||
| 240 | struct page *page = stt->pages[sttpage]; | ||
| 241 | |||
| 242 | if (page) | ||
| 243 | return page; | ||
| 244 | |||
| 245 | mutex_lock(&stt->alloc_lock); | ||
| 246 | page = stt->pages[sttpage]; | ||
| 247 | if (!page) { | ||
| 248 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
| 249 | WARN_ON_ONCE(!page); | ||
| 250 | if (page) | ||
| 251 | stt->pages[sttpage] = page; | ||
| 252 | } | ||
| 253 | mutex_unlock(&stt->alloc_lock); | ||
| 254 | |||
| 255 | return page; | ||
| 256 | } | ||
| 257 | |||
| 236 | static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf) | 258 | static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf) |
| 237 | { | 259 | { |
| 238 | struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data; | 260 | struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data; |
| @@ -241,7 +263,10 @@ static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf) | |||
| 241 | if (vmf->pgoff >= kvmppc_tce_pages(stt->size)) | 263 | if (vmf->pgoff >= kvmppc_tce_pages(stt->size)) |
| 242 | return VM_FAULT_SIGBUS; | 264 | return VM_FAULT_SIGBUS; |
| 243 | 265 | ||
| 244 | page = stt->pages[vmf->pgoff]; | 266 | page = kvm_spapr_get_tce_page(stt, vmf->pgoff); |
| 267 | if (!page) | ||
| 268 | return VM_FAULT_OOM; | ||
| 269 | |||
| 245 | get_page(page); | 270 | get_page(page); |
| 246 | vmf->page = page; | 271 | vmf->page = page; |
| 247 | return 0; | 272 | return 0; |
| @@ -296,7 +321,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | |||
| 296 | struct kvmppc_spapr_tce_table *siter; | 321 | struct kvmppc_spapr_tce_table *siter; |
| 297 | unsigned long npages, size = args->size; | 322 | unsigned long npages, size = args->size; |
| 298 | int ret = -ENOMEM; | 323 | int ret = -ENOMEM; |
| 299 | int i; | ||
| 300 | 324 | ||
| 301 | if (!args->size || args->page_shift < 12 || args->page_shift > 34 || | 325 | if (!args->size || args->page_shift < 12 || args->page_shift > 34 || |
| 302 | (args->offset + args->size > (ULLONG_MAX >> args->page_shift))) | 326 | (args->offset + args->size > (ULLONG_MAX >> args->page_shift))) |
| @@ -318,14 +342,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | |||
| 318 | stt->offset = args->offset; | 342 | stt->offset = args->offset; |
| 319 | stt->size = size; | 343 | stt->size = size; |
| 320 | stt->kvm = kvm; | 344 | stt->kvm = kvm; |
| 345 | mutex_init(&stt->alloc_lock); | ||
| 321 | INIT_LIST_HEAD_RCU(&stt->iommu_tables); | 346 | INIT_LIST_HEAD_RCU(&stt->iommu_tables); |
| 322 | 347 | ||
| 323 | for (i = 0; i < npages; i++) { | ||
| 324 | stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
| 325 | if (!stt->pages[i]) | ||
| 326 | goto fail; | ||
| 327 | } | ||
| 328 | |||
| 329 | mutex_lock(&kvm->lock); | 348 | mutex_lock(&kvm->lock); |
| 330 | 349 | ||
| 331 | /* Check this LIOBN hasn't been previously allocated */ | 350 | /* Check this LIOBN hasn't been previously allocated */ |
| @@ -352,17 +371,28 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | |||
| 352 | if (ret >= 0) | 371 | if (ret >= 0) |
| 353 | return ret; | 372 | return ret; |
| 354 | 373 | ||
| 355 | fail: | ||
| 356 | for (i = 0; i < npages; i++) | ||
| 357 | if (stt->pages[i]) | ||
| 358 | __free_page(stt->pages[i]); | ||
| 359 | |||
| 360 | kfree(stt); | 374 | kfree(stt); |
| 361 | fail_acct: | 375 | fail_acct: |
| 362 | kvmppc_account_memlimit(kvmppc_stt_pages(npages), false); | 376 | kvmppc_account_memlimit(kvmppc_stt_pages(npages), false); |
| 363 | return ret; | 377 | return ret; |
| 364 | } | 378 | } |
| 365 | 379 | ||
| 380 | static long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce, | ||
| 381 | unsigned long *ua) | ||
| 382 | { | ||
| 383 | unsigned long gfn = tce >> PAGE_SHIFT; | ||
| 384 | struct kvm_memory_slot *memslot; | ||
| 385 | |||
| 386 | memslot = search_memslots(kvm_memslots(kvm), gfn); | ||
| 387 | if (!memslot) | ||
| 388 | return -EINVAL; | ||
| 389 | |||
| 390 | *ua = __gfn_to_hva_memslot(memslot, gfn) | | ||
| 391 | (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE)); | ||
| 392 | |||
| 393 | return 0; | ||
| 394 | } | ||
| 395 | |||
| 366 | static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, | 396 | static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, |
| 367 | unsigned long tce) | 397 | unsigned long tce) |
| 368 | { | 398 | { |
| @@ -378,7 +408,7 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, | |||
| 378 | if (iommu_tce_check_gpa(stt->page_shift, gpa)) | 408 | if (iommu_tce_check_gpa(stt->page_shift, gpa)) |
| 379 | return H_TOO_HARD; | 409 | return H_TOO_HARD; |
| 380 | 410 | ||
| 381 | if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL)) | 411 | if (kvmppc_tce_to_ua(stt->kvm, tce, &ua)) |
| 382 | return H_TOO_HARD; | 412 | return H_TOO_HARD; |
| 383 | 413 | ||
| 384 | list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { | 414 | list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { |
| @@ -397,6 +427,36 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, | |||
| 397 | return H_SUCCESS; | 427 | return H_SUCCESS; |
| 398 | } | 428 | } |
| 399 | 429 | ||
| 430 | /* | ||
| 431 | * Handles TCE requests for emulated devices. | ||
| 432 | * Puts guest TCE values to the table and expects user space to convert them. | ||
| 433 | * Cannot fail so kvmppc_tce_validate must be called before it. | ||
| 434 | */ | ||
| 435 | static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt, | ||
| 436 | unsigned long idx, unsigned long tce) | ||
| 437 | { | ||
| 438 | struct page *page; | ||
| 439 | u64 *tbl; | ||
| 440 | unsigned long sttpage; | ||
| 441 | |||
| 442 | idx -= stt->offset; | ||
| 443 | sttpage = idx / TCES_PER_PAGE; | ||
| 444 | page = stt->pages[sttpage]; | ||
| 445 | |||
| 446 | if (!page) { | ||
| 447 | /* We allow any TCE, not just with read|write permissions */ | ||
| 448 | if (!tce) | ||
| 449 | return; | ||
| 450 | |||
| 451 | page = kvm_spapr_get_tce_page(stt, sttpage); | ||
| 452 | if (!page) | ||
| 453 | return; | ||
| 454 | } | ||
| 455 | tbl = page_to_virt(page); | ||
| 456 | |||
| 457 | tbl[idx % TCES_PER_PAGE] = tce; | ||
| 458 | } | ||
| 459 | |||
| 400 | static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl, | 460 | static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl, |
| 401 | unsigned long entry) | 461 | unsigned long entry) |
| 402 | { | 462 | { |
| @@ -543,15 +603,15 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | |||
| 543 | if (ret != H_SUCCESS) | 603 | if (ret != H_SUCCESS) |
| 544 | return ret; | 604 | return ret; |
| 545 | 605 | ||
| 606 | idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
| 607 | |||
| 546 | ret = kvmppc_tce_validate(stt, tce); | 608 | ret = kvmppc_tce_validate(stt, tce); |
| 547 | if (ret != H_SUCCESS) | 609 | if (ret != H_SUCCESS) |
| 548 | return ret; | 610 | goto unlock_exit; |
| 549 | 611 | ||
| 550 | dir = iommu_tce_direction(tce); | 612 | dir = iommu_tce_direction(tce); |
| 551 | 613 | ||
| 552 | idx = srcu_read_lock(&vcpu->kvm->srcu); | 614 | if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) { |
| 553 | |||
| 554 | if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) { | ||
| 555 | ret = H_PARAMETER; | 615 | ret = H_PARAMETER; |
| 556 | goto unlock_exit; | 616 | goto unlock_exit; |
| 557 | } | 617 | } |
| @@ -612,7 +672,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, | |||
| 612 | return ret; | 672 | return ret; |
| 613 | 673 | ||
| 614 | idx = srcu_read_lock(&vcpu->kvm->srcu); | 674 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
| 615 | if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL)) { | 675 | if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua)) { |
| 616 | ret = H_TOO_HARD; | 676 | ret = H_TOO_HARD; |
| 617 | goto unlock_exit; | 677 | goto unlock_exit; |
| 618 | } | 678 | } |
| @@ -647,7 +707,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, | |||
| 647 | } | 707 | } |
| 648 | tce = be64_to_cpu(tce); | 708 | tce = be64_to_cpu(tce); |
| 649 | 709 | ||
| 650 | if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) | 710 | if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) |
| 651 | return H_PARAMETER; | 711 | return H_PARAMETER; |
| 652 | 712 | ||
| 653 | list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { | 713 | list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { |
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 2206bc729b9a..484b47fa3960 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c | |||
| @@ -66,8 +66,6 @@ | |||
| 66 | 66 | ||
| 67 | #endif | 67 | #endif |
| 68 | 68 | ||
| 69 | #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) | ||
| 70 | |||
| 71 | /* | 69 | /* |
| 72 | * Finds a TCE table descriptor by LIOBN. | 70 | * Finds a TCE table descriptor by LIOBN. |
| 73 | * | 71 | * |
| @@ -88,6 +86,25 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm, | |||
| 88 | EXPORT_SYMBOL_GPL(kvmppc_find_table); | 86 | EXPORT_SYMBOL_GPL(kvmppc_find_table); |
| 89 | 87 | ||
| 90 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | 88 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE |
| 89 | static long kvmppc_rm_tce_to_ua(struct kvm *kvm, unsigned long tce, | ||
| 90 | unsigned long *ua, unsigned long **prmap) | ||
| 91 | { | ||
| 92 | unsigned long gfn = tce >> PAGE_SHIFT; | ||
| 93 | struct kvm_memory_slot *memslot; | ||
| 94 | |||
| 95 | memslot = search_memslots(kvm_memslots_raw(kvm), gfn); | ||
| 96 | if (!memslot) | ||
| 97 | return -EINVAL; | ||
| 98 | |||
| 99 | *ua = __gfn_to_hva_memslot(memslot, gfn) | | ||
| 100 | (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE)); | ||
| 101 | |||
| 102 | if (prmap) | ||
| 103 | *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; | ||
| 104 | |||
| 105 | return 0; | ||
| 106 | } | ||
| 107 | |||
| 91 | /* | 108 | /* |
| 92 | * Validates TCE address. | 109 | * Validates TCE address. |
| 93 | * At the moment flags and page mask are validated. | 110 | * At the moment flags and page mask are validated. |
| @@ -111,7 +128,7 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt, | |||
| 111 | if (iommu_tce_check_gpa(stt->page_shift, gpa)) | 128 | if (iommu_tce_check_gpa(stt->page_shift, gpa)) |
| 112 | return H_PARAMETER; | 129 | return H_PARAMETER; |
| 113 | 130 | ||
| 114 | if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL)) | 131 | if (kvmppc_rm_tce_to_ua(stt->kvm, tce, &ua, NULL)) |
| 115 | return H_TOO_HARD; | 132 | return H_TOO_HARD; |
| 116 | 133 | ||
| 117 | list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { | 134 | list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { |
| @@ -129,7 +146,6 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt, | |||
| 129 | 146 | ||
| 130 | return H_SUCCESS; | 147 | return H_SUCCESS; |
| 131 | } | 148 | } |
| 132 | #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ | ||
| 133 | 149 | ||
| 134 | /* Note on the use of page_address() in real mode, | 150 | /* Note on the use of page_address() in real mode, |
| 135 | * | 151 | * |
| @@ -161,13 +177,9 @@ static u64 *kvmppc_page_address(struct page *page) | |||
| 161 | /* | 177 | /* |
| 162 | * Handles TCE requests for emulated devices. | 178 | * Handles TCE requests for emulated devices. |
| 163 | * Puts guest TCE values to the table and expects user space to convert them. | 179 | * Puts guest TCE values to the table and expects user space to convert them. |
| 164 | * Called in both real and virtual modes. | 180 | * Cannot fail so kvmppc_rm_tce_validate must be called before it. |
| 165 | * Cannot fail so kvmppc_tce_validate must be called before it. | ||
| 166 | * | ||
| 167 | * WARNING: This will be called in real-mode on HV KVM and virtual | ||
| 168 | * mode on PR KVM | ||
| 169 | */ | 181 | */ |
| 170 | void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt, | 182 | static void kvmppc_rm_tce_put(struct kvmppc_spapr_tce_table *stt, |
| 171 | unsigned long idx, unsigned long tce) | 183 | unsigned long idx, unsigned long tce) |
| 172 | { | 184 | { |
| 173 | struct page *page; | 185 | struct page *page; |
| @@ -175,35 +187,48 @@ void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt, | |||
| 175 | 187 | ||
| 176 | idx -= stt->offset; | 188 | idx -= stt->offset; |
| 177 | page = stt->pages[idx / TCES_PER_PAGE]; | 189 | page = stt->pages[idx / TCES_PER_PAGE]; |
| 190 | /* | ||
| 191 | * page must not be NULL in real mode, | ||
| 192 | * kvmppc_rm_ioba_validate() must have taken care of this. | ||
| 193 | */ | ||
| 194 | WARN_ON_ONCE_RM(!page); | ||
| 178 | tbl = kvmppc_page_address(page); | 195 | tbl = kvmppc_page_address(page); |
| 179 | 196 | ||
| 180 | tbl[idx % TCES_PER_PAGE] = tce; | 197 | tbl[idx % TCES_PER_PAGE] = tce; |
| 181 | } | 198 | } |
| 182 | EXPORT_SYMBOL_GPL(kvmppc_tce_put); | ||
| 183 | 199 | ||
| 184 | long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce, | 200 | /* |
| 185 | unsigned long *ua, unsigned long **prmap) | 201 | * TCEs pages are allocated in kvmppc_rm_tce_put() which won't be able to do so |
| 202 | * in real mode. | ||
| 203 | * Check if kvmppc_rm_tce_put() can succeed in real mode, i.e. a TCEs page is | ||
| 204 | * allocated or not required (when clearing a tce entry). | ||
| 205 | */ | ||
| 206 | static long kvmppc_rm_ioba_validate(struct kvmppc_spapr_tce_table *stt, | ||
| 207 | unsigned long ioba, unsigned long npages, bool clearing) | ||
| 186 | { | 208 | { |
| 187 | unsigned long gfn = tce >> PAGE_SHIFT; | 209 | unsigned long i, idx, sttpage, sttpages; |
| 188 | struct kvm_memory_slot *memslot; | 210 | unsigned long ret = kvmppc_ioba_validate(stt, ioba, npages); |
| 189 | 211 | ||
| 190 | memslot = search_memslots(kvm_memslots(kvm), gfn); | 212 | if (ret) |
| 191 | if (!memslot) | 213 | return ret; |
| 192 | return -EINVAL; | 214 | /* |
| 193 | 215 | * clearing==true says kvmppc_rm_tce_put won't be allocating pages | |
| 194 | *ua = __gfn_to_hva_memslot(memslot, gfn) | | 216 | * for empty tces. |
| 195 | (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE)); | 217 | */ |
| 218 | if (clearing) | ||
| 219 | return H_SUCCESS; | ||
| 196 | 220 | ||
| 197 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | 221 | idx = (ioba >> stt->page_shift) - stt->offset; |
| 198 | if (prmap) | 222 | sttpage = idx / TCES_PER_PAGE; |
| 199 | *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; | 223 | sttpages = _ALIGN_UP(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) / |
| 200 | #endif | 224 | TCES_PER_PAGE; |
| 225 | for (i = sttpage; i < sttpage + sttpages; ++i) | ||
| 226 | if (!stt->pages[i]) | ||
| 227 | return H_TOO_HARD; | ||
| 201 | 228 | ||
| 202 | return 0; | 229 | return H_SUCCESS; |
| 203 | } | 230 | } |
| 204 | EXPORT_SYMBOL_GPL(kvmppc_tce_to_ua); | ||
| 205 | 231 | ||
| 206 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | ||
| 207 | static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl, | 232 | static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl, |
| 208 | unsigned long entry, unsigned long *hpa, | 233 | unsigned long entry, unsigned long *hpa, |
| 209 | enum dma_data_direction *direction) | 234 | enum dma_data_direction *direction) |
| @@ -381,7 +406,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | |||
| 381 | if (!stt) | 406 | if (!stt) |
| 382 | return H_TOO_HARD; | 407 | return H_TOO_HARD; |
| 383 | 408 | ||
| 384 | ret = kvmppc_ioba_validate(stt, ioba, 1); | 409 | ret = kvmppc_rm_ioba_validate(stt, ioba, 1, tce == 0); |
| 385 | if (ret != H_SUCCESS) | 410 | if (ret != H_SUCCESS) |
| 386 | return ret; | 411 | return ret; |
| 387 | 412 | ||
| @@ -390,7 +415,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | |||
| 390 | return ret; | 415 | return ret; |
| 391 | 416 | ||
| 392 | dir = iommu_tce_direction(tce); | 417 | dir = iommu_tce_direction(tce); |
| 393 | if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) | 418 | if ((dir != DMA_NONE) && kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) |
| 394 | return H_PARAMETER; | 419 | return H_PARAMETER; |
| 395 | 420 | ||
| 396 | entry = ioba >> stt->page_shift; | 421 | entry = ioba >> stt->page_shift; |
| @@ -409,7 +434,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | |||
| 409 | } | 434 | } |
| 410 | } | 435 | } |
| 411 | 436 | ||
| 412 | kvmppc_tce_put(stt, entry, tce); | 437 | kvmppc_rm_tce_put(stt, entry, tce); |
| 413 | 438 | ||
| 414 | return H_SUCCESS; | 439 | return H_SUCCESS; |
| 415 | } | 440 | } |
| @@ -480,7 +505,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, | |||
| 480 | if (tce_list & (SZ_4K - 1)) | 505 | if (tce_list & (SZ_4K - 1)) |
| 481 | return H_PARAMETER; | 506 | return H_PARAMETER; |
| 482 | 507 | ||
| 483 | ret = kvmppc_ioba_validate(stt, ioba, npages); | 508 | ret = kvmppc_rm_ioba_validate(stt, ioba, npages, false); |
| 484 | if (ret != H_SUCCESS) | 509 | if (ret != H_SUCCESS) |
| 485 | return ret; | 510 | return ret; |
| 486 | 511 | ||
| @@ -492,7 +517,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, | |||
| 492 | */ | 517 | */ |
| 493 | struct mm_iommu_table_group_mem_t *mem; | 518 | struct mm_iommu_table_group_mem_t *mem; |
| 494 | 519 | ||
| 495 | if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL)) | 520 | if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL)) |
| 496 | return H_TOO_HARD; | 521 | return H_TOO_HARD; |
| 497 | 522 | ||
| 498 | mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K); | 523 | mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K); |
| @@ -508,7 +533,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, | |||
| 508 | * We do not require memory to be preregistered in this case | 533 | * We do not require memory to be preregistered in this case |
| 509 | * so lock rmap and do __find_linux_pte_or_hugepte(). | 534 | * so lock rmap and do __find_linux_pte_or_hugepte(). |
| 510 | */ | 535 | */ |
| 511 | if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap)) | 536 | if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap)) |
| 512 | return H_TOO_HARD; | 537 | return H_TOO_HARD; |
| 513 | 538 | ||
| 514 | rmap = (void *) vmalloc_to_phys(rmap); | 539 | rmap = (void *) vmalloc_to_phys(rmap); |
| @@ -542,7 +567,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, | |||
| 542 | unsigned long tce = be64_to_cpu(((u64 *)tces)[i]); | 567 | unsigned long tce = be64_to_cpu(((u64 *)tces)[i]); |
| 543 | 568 | ||
| 544 | ua = 0; | 569 | ua = 0; |
| 545 | if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) | 570 | if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) |
| 546 | return H_PARAMETER; | 571 | return H_PARAMETER; |
| 547 | 572 | ||
| 548 | list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { | 573 | list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { |
| @@ -557,7 +582,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, | |||
| 557 | } | 582 | } |
| 558 | } | 583 | } |
| 559 | 584 | ||
| 560 | kvmppc_tce_put(stt, entry + i, tce); | 585 | kvmppc_rm_tce_put(stt, entry + i, tce); |
| 561 | } | 586 | } |
| 562 | 587 | ||
| 563 | unlock_exit: | 588 | unlock_exit: |
| @@ -583,7 +608,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, | |||
| 583 | if (!stt) | 608 | if (!stt) |
| 584 | return H_TOO_HARD; | 609 | return H_TOO_HARD; |
| 585 | 610 | ||
| 586 | ret = kvmppc_ioba_validate(stt, ioba, npages); | 611 | ret = kvmppc_rm_ioba_validate(stt, ioba, npages, tce_value == 0); |
| 587 | if (ret != H_SUCCESS) | 612 | if (ret != H_SUCCESS) |
| 588 | return ret; | 613 | return ret; |
| 589 | 614 | ||
| @@ -610,7 +635,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, | |||
| 610 | } | 635 | } |
| 611 | 636 | ||
| 612 | for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) | 637 | for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) |
| 613 | kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); | 638 | kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value); |
| 614 | 639 | ||
| 615 | return H_SUCCESS; | 640 | return H_SUCCESS; |
| 616 | } | 641 | } |
| @@ -635,6 +660,10 @@ long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | |||
| 635 | 660 | ||
| 636 | idx = (ioba >> stt->page_shift) - stt->offset; | 661 | idx = (ioba >> stt->page_shift) - stt->offset; |
| 637 | page = stt->pages[idx / TCES_PER_PAGE]; | 662 | page = stt->pages[idx / TCES_PER_PAGE]; |
| 663 | if (!page) { | ||
| 664 | vcpu->arch.regs.gpr[4] = 0; | ||
| 665 | return H_SUCCESS; | ||
| 666 | } | ||
| 638 | tbl = (u64 *)page_address(page); | 667 | tbl = (u64 *)page_address(page); |
| 639 | 668 | ||
| 640 | vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE]; | 669 | vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE]; |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 06964350b97a..d5fc624e0655 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
| @@ -74,6 +74,7 @@ | |||
| 74 | #include <asm/opal.h> | 74 | #include <asm/opal.h> |
| 75 | #include <asm/xics.h> | 75 | #include <asm/xics.h> |
| 76 | #include <asm/xive.h> | 76 | #include <asm/xive.h> |
| 77 | #include <asm/hw_breakpoint.h> | ||
| 77 | 78 | ||
| 78 | #include "book3s.h" | 79 | #include "book3s.h" |
| 79 | 80 | ||
| @@ -749,7 +750,7 @@ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu) | |||
| 749 | /* | 750 | /* |
| 750 | * Ensure that the read of vcore->dpdes comes after the read | 751 | * Ensure that the read of vcore->dpdes comes after the read |
| 751 | * of vcpu->doorbell_request. This barrier matches the | 752 | * of vcpu->doorbell_request. This barrier matches the |
| 752 | * smb_wmb() in kvmppc_guest_entry_inject(). | 753 | * smp_wmb() in kvmppc_guest_entry_inject(). |
| 753 | */ | 754 | */ |
| 754 | smp_rmb(); | 755 | smp_rmb(); |
| 755 | vc = vcpu->arch.vcore; | 756 | vc = vcpu->arch.vcore; |
| @@ -801,6 +802,80 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, | |||
| 801 | } | 802 | } |
| 802 | } | 803 | } |
| 803 | 804 | ||
| 805 | /* Copy guest memory in place - must reside within a single memslot */ | ||
| 806 | static int kvmppc_copy_guest(struct kvm *kvm, gpa_t to, gpa_t from, | ||
| 807 | unsigned long len) | ||
| 808 | { | ||
| 809 | struct kvm_memory_slot *to_memslot = NULL; | ||
| 810 | struct kvm_memory_slot *from_memslot = NULL; | ||
| 811 | unsigned long to_addr, from_addr; | ||
| 812 | int r; | ||
| 813 | |||
| 814 | /* Get HPA for from address */ | ||
| 815 | from_memslot = gfn_to_memslot(kvm, from >> PAGE_SHIFT); | ||
| 816 | if (!from_memslot) | ||
| 817 | return -EFAULT; | ||
| 818 | if ((from + len) >= ((from_memslot->base_gfn + from_memslot->npages) | ||
| 819 | << PAGE_SHIFT)) | ||
| 820 | return -EINVAL; | ||
| 821 | from_addr = gfn_to_hva_memslot(from_memslot, from >> PAGE_SHIFT); | ||
| 822 | if (kvm_is_error_hva(from_addr)) | ||
| 823 | return -EFAULT; | ||
| 824 | from_addr |= (from & (PAGE_SIZE - 1)); | ||
| 825 | |||
| 826 | /* Get HPA for to address */ | ||
| 827 | to_memslot = gfn_to_memslot(kvm, to >> PAGE_SHIFT); | ||
| 828 | if (!to_memslot) | ||
| 829 | return -EFAULT; | ||
| 830 | if ((to + len) >= ((to_memslot->base_gfn + to_memslot->npages) | ||
| 831 | << PAGE_SHIFT)) | ||
| 832 | return -EINVAL; | ||
| 833 | to_addr = gfn_to_hva_memslot(to_memslot, to >> PAGE_SHIFT); | ||
| 834 | if (kvm_is_error_hva(to_addr)) | ||
| 835 | return -EFAULT; | ||
| 836 | to_addr |= (to & (PAGE_SIZE - 1)); | ||
| 837 | |||
| 838 | /* Perform copy */ | ||
| 839 | r = raw_copy_in_user((void __user *)to_addr, (void __user *)from_addr, | ||
| 840 | len); | ||
| 841 | if (r) | ||
| 842 | return -EFAULT; | ||
| 843 | mark_page_dirty(kvm, to >> PAGE_SHIFT); | ||
| 844 | return 0; | ||
| 845 | } | ||
| 846 | |||
| 847 | static long kvmppc_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags, | ||
| 848 | unsigned long dest, unsigned long src) | ||
| 849 | { | ||
| 850 | u64 pg_sz = SZ_4K; /* 4K page size */ | ||
| 851 | u64 pg_mask = SZ_4K - 1; | ||
| 852 | int ret; | ||
| 853 | |||
| 854 | /* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */ | ||
| 855 | if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE | | ||
| 856 | H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED)) | ||
| 857 | return H_PARAMETER; | ||
| 858 | |||
| 859 | /* dest (and src if copy_page flag set) must be page aligned */ | ||
| 860 | if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask))) | ||
| 861 | return H_PARAMETER; | ||
| 862 | |||
| 863 | /* zero and/or copy the page as determined by the flags */ | ||
| 864 | if (flags & H_COPY_PAGE) { | ||
| 865 | ret = kvmppc_copy_guest(vcpu->kvm, dest, src, pg_sz); | ||
| 866 | if (ret < 0) | ||
| 867 | return H_PARAMETER; | ||
| 868 | } else if (flags & H_ZERO_PAGE) { | ||
| 869 | ret = kvm_clear_guest(vcpu->kvm, dest, pg_sz); | ||
| 870 | if (ret < 0) | ||
| 871 | return H_PARAMETER; | ||
| 872 | } | ||
| 873 | |||
| 874 | /* We can ignore the remaining flags */ | ||
| 875 | |||
| 876 | return H_SUCCESS; | ||
| 877 | } | ||
| 878 | |||
| 804 | static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target) | 879 | static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target) |
| 805 | { | 880 | { |
| 806 | struct kvmppc_vcore *vcore = target->arch.vcore; | 881 | struct kvmppc_vcore *vcore = target->arch.vcore; |
| @@ -1003,6 +1078,11 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | |||
| 1003 | if (nesting_enabled(vcpu->kvm)) | 1078 | if (nesting_enabled(vcpu->kvm)) |
| 1004 | ret = kvmhv_copy_tofrom_guest_nested(vcpu); | 1079 | ret = kvmhv_copy_tofrom_guest_nested(vcpu); |
| 1005 | break; | 1080 | break; |
| 1081 | case H_PAGE_INIT: | ||
| 1082 | ret = kvmppc_h_page_init(vcpu, kvmppc_get_gpr(vcpu, 4), | ||
| 1083 | kvmppc_get_gpr(vcpu, 5), | ||
| 1084 | kvmppc_get_gpr(vcpu, 6)); | ||
| 1085 | break; | ||
| 1006 | default: | 1086 | default: |
| 1007 | return RESUME_HOST; | 1087 | return RESUME_HOST; |
| 1008 | } | 1088 | } |
| @@ -1047,6 +1127,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd) | |||
| 1047 | case H_IPOLL: | 1127 | case H_IPOLL: |
| 1048 | case H_XIRR_X: | 1128 | case H_XIRR_X: |
| 1049 | #endif | 1129 | #endif |
| 1130 | case H_PAGE_INIT: | ||
| 1050 | return 1; | 1131 | return 1; |
| 1051 | } | 1132 | } |
| 1052 | 1133 | ||
| @@ -2504,37 +2585,6 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) | |||
| 2504 | } | 2585 | } |
| 2505 | } | 2586 | } |
| 2506 | 2587 | ||
| 2507 | static void kvmppc_radix_check_need_tlb_flush(struct kvm *kvm, int pcpu, | ||
| 2508 | struct kvm_nested_guest *nested) | ||
| 2509 | { | ||
| 2510 | cpumask_t *need_tlb_flush; | ||
| 2511 | int lpid; | ||
| 2512 | |||
| 2513 | if (!cpu_has_feature(CPU_FTR_HVMODE)) | ||
| 2514 | return; | ||
| 2515 | |||
| 2516 | if (cpu_has_feature(CPU_FTR_ARCH_300)) | ||
| 2517 | pcpu &= ~0x3UL; | ||
| 2518 | |||
| 2519 | if (nested) { | ||
| 2520 | lpid = nested->shadow_lpid; | ||
| 2521 | need_tlb_flush = &nested->need_tlb_flush; | ||
| 2522 | } else { | ||
| 2523 | lpid = kvm->arch.lpid; | ||
| 2524 | need_tlb_flush = &kvm->arch.need_tlb_flush; | ||
| 2525 | } | ||
| 2526 | |||
| 2527 | mtspr(SPRN_LPID, lpid); | ||
| 2528 | isync(); | ||
| 2529 | smp_mb(); | ||
| 2530 | |||
| 2531 | if (cpumask_test_cpu(pcpu, need_tlb_flush)) { | ||
| 2532 | radix__local_flush_tlb_lpid_guest(lpid); | ||
| 2533 | /* Clear the bit after the TLB flush */ | ||
| 2534 | cpumask_clear_cpu(pcpu, need_tlb_flush); | ||
| 2535 | } | ||
| 2536 | } | ||
| 2537 | |||
| 2538 | static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) | 2588 | static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) |
| 2539 | { | 2589 | { |
| 2540 | int cpu; | 2590 | int cpu; |
| @@ -3228,19 +3278,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) | |||
| 3228 | for (sub = 0; sub < core_info.n_subcores; ++sub) | 3278 | for (sub = 0; sub < core_info.n_subcores; ++sub) |
| 3229 | spin_unlock(&core_info.vc[sub]->lock); | 3279 | spin_unlock(&core_info.vc[sub]->lock); |
| 3230 | 3280 | ||
| 3231 | if (kvm_is_radix(vc->kvm)) { | 3281 | guest_enter_irqoff(); |
| 3232 | /* | 3282 | |
| 3233 | * Do we need to flush the process scoped TLB for the LPAR? | 3283 | srcu_idx = srcu_read_lock(&vc->kvm->srcu); |
| 3234 | * | 3284 | |
| 3235 | * On POWER9, individual threads can come in here, but the | 3285 | this_cpu_disable_ftrace(); |
| 3236 | * TLB is shared between the 4 threads in a core, hence | ||
| 3237 | * invalidating on one thread invalidates for all. | ||
| 3238 | * Thus we make all 4 threads use the same bit here. | ||
| 3239 | * | ||
| 3240 | * Hash must be flushed in realmode in order to use tlbiel. | ||
| 3241 | */ | ||
| 3242 | kvmppc_radix_check_need_tlb_flush(vc->kvm, pcpu, NULL); | ||
| 3243 | } | ||
| 3244 | 3286 | ||
| 3245 | /* | 3287 | /* |
| 3246 | * Interrupts will be enabled once we get into the guest, | 3288 | * Interrupts will be enabled once we get into the guest, |
| @@ -3248,19 +3290,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) | |||
| 3248 | */ | 3290 | */ |
| 3249 | trace_hardirqs_on(); | 3291 | trace_hardirqs_on(); |
| 3250 | 3292 | ||
| 3251 | guest_enter_irqoff(); | ||
| 3252 | |||
| 3253 | srcu_idx = srcu_read_lock(&vc->kvm->srcu); | ||
| 3254 | |||
| 3255 | this_cpu_disable_ftrace(); | ||
| 3256 | |||
| 3257 | trap = __kvmppc_vcore_entry(); | 3293 | trap = __kvmppc_vcore_entry(); |
| 3258 | 3294 | ||
| 3295 | trace_hardirqs_off(); | ||
| 3296 | |||
| 3259 | this_cpu_enable_ftrace(); | 3297 | this_cpu_enable_ftrace(); |
| 3260 | 3298 | ||
| 3261 | srcu_read_unlock(&vc->kvm->srcu, srcu_idx); | 3299 | srcu_read_unlock(&vc->kvm->srcu, srcu_idx); |
| 3262 | 3300 | ||
| 3263 | trace_hardirqs_off(); | ||
| 3264 | set_irq_happened(trap); | 3301 | set_irq_happened(trap); |
| 3265 | 3302 | ||
| 3266 | spin_lock(&vc->lock); | 3303 | spin_lock(&vc->lock); |
| @@ -3374,7 +3411,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, | |||
| 3374 | mtspr(SPRN_PURR, vcpu->arch.purr); | 3411 | mtspr(SPRN_PURR, vcpu->arch.purr); |
| 3375 | mtspr(SPRN_SPURR, vcpu->arch.spurr); | 3412 | mtspr(SPRN_SPURR, vcpu->arch.spurr); |
| 3376 | 3413 | ||
| 3377 | if (cpu_has_feature(CPU_FTR_DAWR)) { | 3414 | if (dawr_enabled()) { |
| 3378 | mtspr(SPRN_DAWR, vcpu->arch.dawr); | 3415 | mtspr(SPRN_DAWR, vcpu->arch.dawr); |
| 3379 | mtspr(SPRN_DAWRX, vcpu->arch.dawrx); | 3416 | mtspr(SPRN_DAWRX, vcpu->arch.dawrx); |
| 3380 | } | 3417 | } |
| @@ -3423,7 +3460,9 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, | |||
| 3423 | vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2); | 3460 | vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2); |
| 3424 | vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3); | 3461 | vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3); |
| 3425 | 3462 | ||
| 3426 | mtspr(SPRN_PSSCR, host_psscr); | 3463 | /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */ |
| 3464 | mtspr(SPRN_PSSCR, host_psscr | | ||
| 3465 | (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); | ||
| 3427 | mtspr(SPRN_HFSCR, host_hfscr); | 3466 | mtspr(SPRN_HFSCR, host_hfscr); |
| 3428 | mtspr(SPRN_CIABR, host_ciabr); | 3467 | mtspr(SPRN_CIABR, host_ciabr); |
| 3429 | mtspr(SPRN_DAWR, host_dawr); | 3468 | mtspr(SPRN_DAWR, host_dawr); |
| @@ -3511,6 +3550,7 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, | |||
| 3511 | #ifdef CONFIG_ALTIVEC | 3550 | #ifdef CONFIG_ALTIVEC |
| 3512 | load_vr_state(&vcpu->arch.vr); | 3551 | load_vr_state(&vcpu->arch.vr); |
| 3513 | #endif | 3552 | #endif |
| 3553 | mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); | ||
| 3514 | 3554 | ||
| 3515 | mtspr(SPRN_DSCR, vcpu->arch.dscr); | 3555 | mtspr(SPRN_DSCR, vcpu->arch.dscr); |
| 3516 | mtspr(SPRN_IAMR, vcpu->arch.iamr); | 3556 | mtspr(SPRN_IAMR, vcpu->arch.iamr); |
| @@ -3602,6 +3642,7 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, | |||
| 3602 | #ifdef CONFIG_ALTIVEC | 3642 | #ifdef CONFIG_ALTIVEC |
| 3603 | store_vr_state(&vcpu->arch.vr); | 3643 | store_vr_state(&vcpu->arch.vr); |
| 3604 | #endif | 3644 | #endif |
| 3645 | vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); | ||
| 3605 | 3646 | ||
| 3606 | if (cpu_has_feature(CPU_FTR_TM) || | 3647 | if (cpu_has_feature(CPU_FTR_TM) || |
| 3607 | cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) | 3648 | cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) |
| @@ -3967,7 +4008,7 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, | |||
| 3967 | unsigned long lpcr) | 4008 | unsigned long lpcr) |
| 3968 | { | 4009 | { |
| 3969 | int trap, r, pcpu; | 4010 | int trap, r, pcpu; |
| 3970 | int srcu_idx; | 4011 | int srcu_idx, lpid; |
| 3971 | struct kvmppc_vcore *vc; | 4012 | struct kvmppc_vcore *vc; |
| 3972 | struct kvm *kvm = vcpu->kvm; | 4013 | struct kvm *kvm = vcpu->kvm; |
| 3973 | struct kvm_nested_guest *nested = vcpu->arch.nested; | 4014 | struct kvm_nested_guest *nested = vcpu->arch.nested; |
| @@ -4043,8 +4084,12 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, | |||
| 4043 | vc->vcore_state = VCORE_RUNNING; | 4084 | vc->vcore_state = VCORE_RUNNING; |
| 4044 | trace_kvmppc_run_core(vc, 0); | 4085 | trace_kvmppc_run_core(vc, 0); |
| 4045 | 4086 | ||
| 4046 | if (cpu_has_feature(CPU_FTR_HVMODE)) | 4087 | if (cpu_has_feature(CPU_FTR_HVMODE)) { |
| 4047 | kvmppc_radix_check_need_tlb_flush(kvm, pcpu, nested); | 4088 | lpid = nested ? nested->shadow_lpid : kvm->arch.lpid; |
| 4089 | mtspr(SPRN_LPID, lpid); | ||
| 4090 | isync(); | ||
| 4091 | kvmppc_check_need_tlb_flush(kvm, pcpu, nested); | ||
| 4092 | } | ||
| 4048 | 4093 | ||
| 4049 | trace_hardirqs_on(); | 4094 | trace_hardirqs_on(); |
| 4050 | guest_enter_irqoff(); | 4095 | guest_enter_irqoff(); |
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index b0cf22477e87..6035d24f1d1d 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c | |||
| @@ -805,3 +805,60 @@ void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu) | |||
| 805 | vcpu->arch.doorbell_request = 0; | 805 | vcpu->arch.doorbell_request = 0; |
| 806 | } | 806 | } |
| 807 | } | 807 | } |
| 808 | |||
| 809 | static void flush_guest_tlb(struct kvm *kvm) | ||
| 810 | { | ||
| 811 | unsigned long rb, set; | ||
| 812 | |||
| 813 | rb = PPC_BIT(52); /* IS = 2 */ | ||
| 814 | if (kvm_is_radix(kvm)) { | ||
| 815 | /* R=1 PRS=1 RIC=2 */ | ||
| 816 | asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) | ||
| 817 | : : "r" (rb), "i" (1), "i" (1), "i" (2), | ||
| 818 | "r" (0) : "memory"); | ||
| 819 | for (set = 1; set < kvm->arch.tlb_sets; ++set) { | ||
| 820 | rb += PPC_BIT(51); /* increment set number */ | ||
| 821 | /* R=1 PRS=1 RIC=0 */ | ||
| 822 | asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) | ||
| 823 | : : "r" (rb), "i" (1), "i" (1), "i" (0), | ||
| 824 | "r" (0) : "memory"); | ||
| 825 | } | ||
| 826 | } else { | ||
| 827 | for (set = 0; set < kvm->arch.tlb_sets; ++set) { | ||
| 828 | /* R=0 PRS=0 RIC=0 */ | ||
| 829 | asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) | ||
| 830 | : : "r" (rb), "i" (0), "i" (0), "i" (0), | ||
| 831 | "r" (0) : "memory"); | ||
| 832 | rb += PPC_BIT(51); /* increment set number */ | ||
| 833 | } | ||
| 834 | } | ||
| 835 | asm volatile("ptesync": : :"memory"); | ||
| 836 | } | ||
| 837 | |||
| 838 | void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu, | ||
| 839 | struct kvm_nested_guest *nested) | ||
| 840 | { | ||
| 841 | cpumask_t *need_tlb_flush; | ||
| 842 | |||
| 843 | /* | ||
| 844 | * On POWER9, individual threads can come in here, but the | ||
| 845 | * TLB is shared between the 4 threads in a core, hence | ||
| 846 | * invalidating on one thread invalidates for all. | ||
| 847 | * Thus we make all 4 threads use the same bit. | ||
| 848 | */ | ||
| 849 | if (cpu_has_feature(CPU_FTR_ARCH_300)) | ||
| 850 | pcpu = cpu_first_thread_sibling(pcpu); | ||
| 851 | |||
| 852 | if (nested) | ||
| 853 | need_tlb_flush = &nested->need_tlb_flush; | ||
| 854 | else | ||
| 855 | need_tlb_flush = &kvm->arch.need_tlb_flush; | ||
| 856 | |||
| 857 | if (cpumask_test_cpu(pcpu, need_tlb_flush)) { | ||
| 858 | flush_guest_tlb(kvm); | ||
| 859 | |||
| 860 | /* Clear the bit after the TLB flush */ | ||
| 861 | cpumask_clear_cpu(pcpu, need_tlb_flush); | ||
| 862 | } | ||
| 863 | } | ||
| 864 | EXPORT_SYMBOL_GPL(kvmppc_check_need_tlb_flush); | ||
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 3b3791ed74a6..8431ad1e8391 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/hugetlb.h> | 13 | #include <linux/hugetlb.h> |
| 14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
| 15 | #include <linux/log2.h> | 15 | #include <linux/log2.h> |
| 16 | #include <linux/sizes.h> | ||
| 16 | 17 | ||
| 17 | #include <asm/trace.h> | 18 | #include <asm/trace.h> |
| 18 | #include <asm/kvm_ppc.h> | 19 | #include <asm/kvm_ppc.h> |
| @@ -867,6 +868,149 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, | |||
| 867 | return ret; | 868 | return ret; |
| 868 | } | 869 | } |
| 869 | 870 | ||
| 871 | static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long gpa, | ||
| 872 | int writing, unsigned long *hpa, | ||
| 873 | struct kvm_memory_slot **memslot_p) | ||
| 874 | { | ||
| 875 | struct kvm *kvm = vcpu->kvm; | ||
| 876 | struct kvm_memory_slot *memslot; | ||
| 877 | unsigned long gfn, hva, pa, psize = PAGE_SHIFT; | ||
| 878 | unsigned int shift; | ||
| 879 | pte_t *ptep, pte; | ||
| 880 | |||
| 881 | /* Find the memslot for this address */ | ||
| 882 | gfn = gpa >> PAGE_SHIFT; | ||
| 883 | memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); | ||
| 884 | if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) | ||
| 885 | return H_PARAMETER; | ||
| 886 | |||
| 887 | /* Translate to host virtual address */ | ||
| 888 | hva = __gfn_to_hva_memslot(memslot, gfn); | ||
| 889 | |||
| 890 | /* Try to find the host pte for that virtual address */ | ||
| 891 | ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); | ||
| 892 | if (!ptep) | ||
| 893 | return H_TOO_HARD; | ||
| 894 | pte = kvmppc_read_update_linux_pte(ptep, writing); | ||
| 895 | if (!pte_present(pte)) | ||
| 896 | return H_TOO_HARD; | ||
| 897 | |||
| 898 | /* Convert to a physical address */ | ||
| 899 | if (shift) | ||
| 900 | psize = 1UL << shift; | ||
| 901 | pa = pte_pfn(pte) << PAGE_SHIFT; | ||
| 902 | pa |= hva & (psize - 1); | ||
| 903 | pa |= gpa & ~PAGE_MASK; | ||
| 904 | |||
| 905 | if (hpa) | ||
| 906 | *hpa = pa; | ||
| 907 | if (memslot_p) | ||
| 908 | *memslot_p = memslot; | ||
| 909 | |||
| 910 | return H_SUCCESS; | ||
| 911 | } | ||
| 912 | |||
| 913 | static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu, | ||
| 914 | unsigned long dest) | ||
| 915 | { | ||
| 916 | struct kvm_memory_slot *memslot; | ||
| 917 | struct kvm *kvm = vcpu->kvm; | ||
| 918 | unsigned long pa, mmu_seq; | ||
| 919 | long ret = H_SUCCESS; | ||
| 920 | int i; | ||
| 921 | |||
| 922 | /* Used later to detect if we might have been invalidated */ | ||
| 923 | mmu_seq = kvm->mmu_notifier_seq; | ||
| 924 | smp_rmb(); | ||
| 925 | |||
| 926 | ret = kvmppc_get_hpa(vcpu, dest, 1, &pa, &memslot); | ||
| 927 | if (ret != H_SUCCESS) | ||
| 928 | return ret; | ||
| 929 | |||
| 930 | /* Check if we've been invalidated */ | ||
| 931 | raw_spin_lock(&kvm->mmu_lock.rlock); | ||
| 932 | if (mmu_notifier_retry(kvm, mmu_seq)) { | ||
| 933 | ret = H_TOO_HARD; | ||
| 934 | goto out_unlock; | ||
| 935 | } | ||
| 936 | |||
| 937 | /* Zero the page */ | ||
| 938 | for (i = 0; i < SZ_4K; i += L1_CACHE_BYTES, pa += L1_CACHE_BYTES) | ||
| 939 | dcbz((void *)pa); | ||
| 940 | kvmppc_update_dirty_map(memslot, dest >> PAGE_SHIFT, PAGE_SIZE); | ||
| 941 | |||
| 942 | out_unlock: | ||
| 943 | raw_spin_unlock(&kvm->mmu_lock.rlock); | ||
| 944 | return ret; | ||
| 945 | } | ||
| 946 | |||
| 947 | static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu, | ||
| 948 | unsigned long dest, unsigned long src) | ||
| 949 | { | ||
| 950 | unsigned long dest_pa, src_pa, mmu_seq; | ||
| 951 | struct kvm_memory_slot *dest_memslot; | ||
| 952 | struct kvm *kvm = vcpu->kvm; | ||
| 953 | long ret = H_SUCCESS; | ||
| 954 | |||
| 955 | /* Used later to detect if we might have been invalidated */ | ||
| 956 | mmu_seq = kvm->mmu_notifier_seq; | ||
| 957 | smp_rmb(); | ||
| 958 | |||
| 959 | ret = kvmppc_get_hpa(vcpu, dest, 1, &dest_pa, &dest_memslot); | ||
| 960 | if (ret != H_SUCCESS) | ||
| 961 | return ret; | ||
| 962 | ret = kvmppc_get_hpa(vcpu, src, 0, &src_pa, NULL); | ||
| 963 | if (ret != H_SUCCESS) | ||
| 964 | return ret; | ||
| 965 | |||
| 966 | /* Check if we've been invalidated */ | ||
| 967 | raw_spin_lock(&kvm->mmu_lock.rlock); | ||
| 968 | if (mmu_notifier_retry(kvm, mmu_seq)) { | ||
| 969 | ret = H_TOO_HARD; | ||
| 970 | goto out_unlock; | ||
| 971 | } | ||
| 972 | |||
| 973 | /* Copy the page */ | ||
| 974 | memcpy((void *)dest_pa, (void *)src_pa, SZ_4K); | ||
| 975 | |||
| 976 | kvmppc_update_dirty_map(dest_memslot, dest >> PAGE_SHIFT, PAGE_SIZE); | ||
| 977 | |||
| 978 | out_unlock: | ||
| 979 | raw_spin_unlock(&kvm->mmu_lock.rlock); | ||
| 980 | return ret; | ||
| 981 | } | ||
| 982 | |||
| 983 | long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags, | ||
| 984 | unsigned long dest, unsigned long src) | ||
| 985 | { | ||
| 986 | struct kvm *kvm = vcpu->kvm; | ||
| 987 | u64 pg_mask = SZ_4K - 1; /* 4K page size */ | ||
| 988 | long ret = H_SUCCESS; | ||
| 989 | |||
| 990 | /* Don't handle radix mode here, go up to the virtual mode handler */ | ||
| 991 | if (kvm_is_radix(kvm)) | ||
| 992 | return H_TOO_HARD; | ||
| 993 | |||
| 994 | /* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */ | ||
| 995 | if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE | | ||
| 996 | H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED)) | ||
| 997 | return H_PARAMETER; | ||
| 998 | |||
| 999 | /* dest (and src if copy_page flag set) must be page aligned */ | ||
| 1000 | if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask))) | ||
| 1001 | return H_PARAMETER; | ||
| 1002 | |||
| 1003 | /* zero and/or copy the page as determined by the flags */ | ||
| 1004 | if (flags & H_COPY_PAGE) | ||
| 1005 | ret = kvmppc_do_h_page_init_copy(vcpu, dest, src); | ||
| 1006 | else if (flags & H_ZERO_PAGE) | ||
| 1007 | ret = kvmppc_do_h_page_init_zero(vcpu, dest); | ||
| 1008 | |||
| 1009 | /* We can ignore the other flags */ | ||
| 1010 | |||
| 1011 | return ret; | ||
| 1012 | } | ||
| 1013 | |||
| 870 | void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, | 1014 | void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, |
| 871 | unsigned long pte_index) | 1015 | unsigned long pte_index) |
| 872 | { | 1016 | { |
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 3a5e719ef032..ad7bee97de77 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S | |||
| @@ -581,11 +581,8 @@ kvmppc_hv_entry: | |||
| 581 | 1: | 581 | 1: |
| 582 | #endif | 582 | #endif |
| 583 | 583 | ||
| 584 | /* Use cr7 as an indication of radix mode */ | ||
| 585 | ld r5, HSTATE_KVM_VCORE(r13) | 584 | ld r5, HSTATE_KVM_VCORE(r13) |
| 586 | ld r9, VCORE_KVM(r5) /* pointer to struct kvm */ | 585 | ld r9, VCORE_KVM(r5) /* pointer to struct kvm */ |
| 587 | lbz r0, KVM_RADIX(r9) | ||
| 588 | cmpwi cr7, r0, 0 | ||
| 589 | 586 | ||
| 590 | /* | 587 | /* |
| 591 | * POWER7/POWER8 host -> guest partition switch code. | 588 | * POWER7/POWER8 host -> guest partition switch code. |
| @@ -608,9 +605,6 @@ kvmppc_hv_entry: | |||
| 608 | cmpwi r6,0 | 605 | cmpwi r6,0 |
| 609 | bne 10f | 606 | bne 10f |
| 610 | 607 | ||
| 611 | /* Radix has already switched LPID and flushed core TLB */ | ||
| 612 | bne cr7, 22f | ||
| 613 | |||
| 614 | lwz r7,KVM_LPID(r9) | 608 | lwz r7,KVM_LPID(r9) |
| 615 | BEGIN_FTR_SECTION | 609 | BEGIN_FTR_SECTION |
| 616 | ld r6,KVM_SDR1(r9) | 610 | ld r6,KVM_SDR1(r9) |
| @@ -622,41 +616,13 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) | |||
| 622 | mtspr SPRN_LPID,r7 | 616 | mtspr SPRN_LPID,r7 |
| 623 | isync | 617 | isync |
| 624 | 618 | ||
| 625 | /* See if we need to flush the TLB. Hash has to be done in RM */ | 619 | /* See if we need to flush the TLB. */ |
| 626 | lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */ | 620 | mr r3, r9 /* kvm pointer */ |
| 627 | BEGIN_FTR_SECTION | 621 | lhz r4, PACAPACAINDEX(r13) /* physical cpu number */ |
| 628 | /* | 622 | li r5, 0 /* nested vcpu pointer */ |
| 629 | * On POWER9, individual threads can come in here, but the | 623 | bl kvmppc_check_need_tlb_flush |
| 630 | * TLB is shared between the 4 threads in a core, hence | 624 | nop |
| 631 | * invalidating on one thread invalidates for all. | 625 | ld r5, HSTATE_KVM_VCORE(r13) |
| 632 | * Thus we make all 4 threads use the same bit here. | ||
| 633 | */ | ||
| 634 | clrrdi r6,r6,2 | ||
| 635 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) | ||
| 636 | clrldi r7,r6,64-6 /* extract bit number (6 bits) */ | ||
| 637 | srdi r6,r6,6 /* doubleword number */ | ||
| 638 | sldi r6,r6,3 /* address offset */ | ||
| 639 | add r6,r6,r9 | ||
| 640 | addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */ | ||
| 641 | li r8,1 | ||
| 642 | sld r8,r8,r7 | ||
| 643 | ld r7,0(r6) | ||
| 644 | and. r7,r7,r8 | ||
| 645 | beq 22f | ||
| 646 | /* Flush the TLB of any entries for this LPID */ | ||
| 647 | lwz r0,KVM_TLB_SETS(r9) | ||
| 648 | mtctr r0 | ||
| 649 | li r7,0x800 /* IS field = 0b10 */ | ||
| 650 | ptesync | ||
| 651 | li r0,0 /* RS for P9 version of tlbiel */ | ||
| 652 | 28: tlbiel r7 /* On P9, rs=0, RIC=0, PRS=0, R=0 */ | ||
| 653 | addi r7,r7,0x1000 | ||
| 654 | bdnz 28b | ||
| 655 | ptesync | ||
| 656 | 23: ldarx r7,0,r6 /* clear the bit after TLB flushed */ | ||
| 657 | andc r7,r7,r8 | ||
| 658 | stdcx. r7,0,r6 | ||
| 659 | bne 23b | ||
| 660 | 626 | ||
| 661 | /* Add timebase offset onto timebase */ | 627 | /* Add timebase offset onto timebase */ |
| 662 | 22: ld r8,VCORE_TB_OFFSET(r5) | 628 | 22: ld r8,VCORE_TB_OFFSET(r5) |
| @@ -822,18 +788,21 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) | |||
| 822 | mtspr SPRN_IAMR, r5 | 788 | mtspr SPRN_IAMR, r5 |
| 823 | mtspr SPRN_PSPB, r6 | 789 | mtspr SPRN_PSPB, r6 |
| 824 | mtspr SPRN_FSCR, r7 | 790 | mtspr SPRN_FSCR, r7 |
| 825 | ld r5, VCPU_DAWR(r4) | ||
| 826 | ld r6, VCPU_DAWRX(r4) | ||
| 827 | ld r7, VCPU_CIABR(r4) | ||
| 828 | ld r8, VCPU_TAR(r4) | ||
| 829 | /* | 791 | /* |
| 830 | * Handle broken DAWR case by not writing it. This means we | 792 | * Handle broken DAWR case by not writing it. This means we |
| 831 | * can still store the DAWR register for migration. | 793 | * can still store the DAWR register for migration. |
| 832 | */ | 794 | */ |
| 833 | BEGIN_FTR_SECTION | 795 | LOAD_REG_ADDR(r5, dawr_force_enable) |
| 796 | lbz r5, 0(r5) | ||
| 797 | cmpdi r5, 0 | ||
| 798 | beq 1f | ||
| 799 | ld r5, VCPU_DAWR(r4) | ||
| 800 | ld r6, VCPU_DAWRX(r4) | ||
| 834 | mtspr SPRN_DAWR, r5 | 801 | mtspr SPRN_DAWR, r5 |
| 835 | mtspr SPRN_DAWRX, r6 | 802 | mtspr SPRN_DAWRX, r6 |
| 836 | END_FTR_SECTION_IFSET(CPU_FTR_DAWR) | 803 | 1: |
| 804 | ld r7, VCPU_CIABR(r4) | ||
| 805 | ld r8, VCPU_TAR(r4) | ||
| 837 | mtspr SPRN_CIABR, r7 | 806 | mtspr SPRN_CIABR, r7 |
| 838 | mtspr SPRN_TAR, r8 | 807 | mtspr SPRN_TAR, r8 |
| 839 | ld r5, VCPU_IC(r4) | 808 | ld r5, VCPU_IC(r4) |
| @@ -969,17 +938,27 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) | |||
| 969 | 938 | ||
| 970 | #ifdef CONFIG_KVM_XICS | 939 | #ifdef CONFIG_KVM_XICS |
| 971 | /* We are entering the guest on that thread, push VCPU to XIVE */ | 940 | /* We are entering the guest on that thread, push VCPU to XIVE */ |
| 972 | ld r10, HSTATE_XIVE_TIMA_PHYS(r13) | ||
| 973 | cmpldi cr0, r10, 0 | ||
| 974 | beq no_xive | ||
| 975 | ld r11, VCPU_XIVE_SAVED_STATE(r4) | 941 | ld r11, VCPU_XIVE_SAVED_STATE(r4) |
| 976 | li r9, TM_QW1_OS | 942 | li r9, TM_QW1_OS |
| 943 | lwz r8, VCPU_XIVE_CAM_WORD(r4) | ||
| 944 | li r7, TM_QW1_OS + TM_WORD2 | ||
| 945 | mfmsr r0 | ||
| 946 | andi. r0, r0, MSR_DR /* in real mode? */ | ||
| 947 | beq 2f | ||
| 948 | ld r10, HSTATE_XIVE_TIMA_VIRT(r13) | ||
| 949 | cmpldi cr1, r10, 0 | ||
| 950 | beq cr1, no_xive | ||
| 951 | eieio | ||
| 952 | stdx r11,r9,r10 | ||
| 953 | stwx r8,r7,r10 | ||
| 954 | b 3f | ||
| 955 | 2: ld r10, HSTATE_XIVE_TIMA_PHYS(r13) | ||
| 956 | cmpldi cr1, r10, 0 | ||
| 957 | beq cr1, no_xive | ||
| 977 | eieio | 958 | eieio |
| 978 | stdcix r11,r9,r10 | 959 | stdcix r11,r9,r10 |
| 979 | lwz r11, VCPU_XIVE_CAM_WORD(r4) | 960 | stwcix r8,r7,r10 |
| 980 | li r9, TM_QW1_OS + TM_WORD2 | 961 | 3: li r9, 1 |
| 981 | stwcix r11,r9,r10 | ||
| 982 | li r9, 1 | ||
| 983 | stb r9, VCPU_XIVE_PUSHED(r4) | 962 | stb r9, VCPU_XIVE_PUSHED(r4) |
| 984 | eieio | 963 | eieio |
| 985 | 964 | ||
| @@ -998,12 +977,16 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) | |||
| 998 | * on, we mask it. | 977 | * on, we mask it. |
| 999 | */ | 978 | */ |
| 1000 | lbz r0, VCPU_XIVE_ESC_ON(r4) | 979 | lbz r0, VCPU_XIVE_ESC_ON(r4) |
| 1001 | cmpwi r0,0 | 980 | cmpwi cr1, r0,0 |
| 1002 | beq 1f | 981 | beq cr1, 1f |
| 1003 | ld r10, VCPU_XIVE_ESC_RADDR(r4) | ||
| 1004 | li r9, XIVE_ESB_SET_PQ_01 | 982 | li r9, XIVE_ESB_SET_PQ_01 |
| 983 | beq 4f /* in real mode? */ | ||
| 984 | ld r10, VCPU_XIVE_ESC_VADDR(r4) | ||
| 985 | ldx r0, r10, r9 | ||
| 986 | b 5f | ||
| 987 | 4: ld r10, VCPU_XIVE_ESC_RADDR(r4) | ||
| 1005 | ldcix r0, r10, r9 | 988 | ldcix r0, r10, r9 |
| 1006 | sync | 989 | 5: sync |
| 1007 | 990 | ||
| 1008 | /* We have a possible subtle race here: The escalation interrupt might | 991 | /* We have a possible subtle race here: The escalation interrupt might |
| 1009 | * have fired and be on its way to the host queue while we mask it, | 992 | * have fired and be on its way to the host queue while we mask it, |
| @@ -2281,7 +2264,7 @@ hcall_real_table: | |||
| 2281 | #endif | 2264 | #endif |
| 2282 | .long 0 /* 0x24 - H_SET_SPRG0 */ | 2265 | .long 0 /* 0x24 - H_SET_SPRG0 */ |
| 2283 | .long DOTSYM(kvmppc_h_set_dabr) - hcall_real_table | 2266 | .long DOTSYM(kvmppc_h_set_dabr) - hcall_real_table |
| 2284 | .long 0 /* 0x2c */ | 2267 | .long DOTSYM(kvmppc_rm_h_page_init) - hcall_real_table |
| 2285 | .long 0 /* 0x30 */ | 2268 | .long 0 /* 0x30 */ |
| 2286 | .long 0 /* 0x34 */ | 2269 | .long 0 /* 0x34 */ |
| 2287 | .long 0 /* 0x38 */ | 2270 | .long 0 /* 0x38 */ |
| @@ -2513,11 +2496,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) | |||
| 2513 | blr | 2496 | blr |
| 2514 | 2497 | ||
| 2515 | 2: | 2498 | 2: |
| 2516 | BEGIN_FTR_SECTION | 2499 | LOAD_REG_ADDR(r11, dawr_force_enable) |
| 2517 | /* POWER9 with disabled DAWR */ | 2500 | lbz r11, 0(r11) |
| 2501 | cmpdi r11, 0 | ||
| 2518 | li r3, H_HARDWARE | 2502 | li r3, H_HARDWARE |
| 2519 | blr | 2503 | beqlr |
| 2520 | END_FTR_SECTION_IFCLR(CPU_FTR_DAWR) | ||
| 2521 | /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */ | 2504 | /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */ |
| 2522 | rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW | 2505 | rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW |
| 2523 | rlwimi r5, r4, 2, DAWRX_WT | 2506 | rlwimi r5, r4, 2, DAWRX_WT |
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index f78d002f0fe0..4953957333b7 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c | |||
| @@ -166,7 +166,8 @@ static irqreturn_t xive_esc_irq(int irq, void *data) | |||
| 166 | return IRQ_HANDLED; | 166 | return IRQ_HANDLED; |
| 167 | } | 167 | } |
| 168 | 168 | ||
| 169 | static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) | 169 | int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, |
| 170 | bool single_escalation) | ||
| 170 | { | 171 | { |
| 171 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | 172 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
| 172 | struct xive_q *q = &xc->queues[prio]; | 173 | struct xive_q *q = &xc->queues[prio]; |
| @@ -185,7 +186,7 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) | |||
| 185 | return -EIO; | 186 | return -EIO; |
| 186 | } | 187 | } |
| 187 | 188 | ||
| 188 | if (xc->xive->single_escalation) | 189 | if (single_escalation) |
| 189 | name = kasprintf(GFP_KERNEL, "kvm-%d-%d", | 190 | name = kasprintf(GFP_KERNEL, "kvm-%d-%d", |
| 190 | vcpu->kvm->arch.lpid, xc->server_num); | 191 | vcpu->kvm->arch.lpid, xc->server_num); |
| 191 | else | 192 | else |
| @@ -217,7 +218,7 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) | |||
| 217 | * interrupt, thus leaving it effectively masked after | 218 | * interrupt, thus leaving it effectively masked after |
| 218 | * it fires once. | 219 | * it fires once. |
| 219 | */ | 220 | */ |
| 220 | if (xc->xive->single_escalation) { | 221 | if (single_escalation) { |
| 221 | struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]); | 222 | struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]); |
| 222 | struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); | 223 | struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); |
| 223 | 224 | ||
| @@ -291,7 +292,8 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio) | |||
| 291 | continue; | 292 | continue; |
| 292 | rc = xive_provision_queue(vcpu, prio); | 293 | rc = xive_provision_queue(vcpu, prio); |
| 293 | if (rc == 0 && !xive->single_escalation) | 294 | if (rc == 0 && !xive->single_escalation) |
| 294 | xive_attach_escalation(vcpu, prio); | 295 | kvmppc_xive_attach_escalation(vcpu, prio, |
| 296 | xive->single_escalation); | ||
| 295 | if (rc) | 297 | if (rc) |
| 296 | return rc; | 298 | return rc; |
| 297 | } | 299 | } |
| @@ -342,7 +344,7 @@ static int xive_try_pick_queue(struct kvm_vcpu *vcpu, u8 prio) | |||
| 342 | return atomic_add_unless(&q->count, 1, max) ? 0 : -EBUSY; | 344 | return atomic_add_unless(&q->count, 1, max) ? 0 : -EBUSY; |
| 343 | } | 345 | } |
| 344 | 346 | ||
| 345 | static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio) | 347 | int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio) |
| 346 | { | 348 | { |
| 347 | struct kvm_vcpu *vcpu; | 349 | struct kvm_vcpu *vcpu; |
| 348 | int i, rc; | 350 | int i, rc; |
| @@ -380,11 +382,6 @@ static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio) | |||
| 380 | return -EBUSY; | 382 | return -EBUSY; |
| 381 | } | 383 | } |
| 382 | 384 | ||
| 383 | static u32 xive_vp(struct kvmppc_xive *xive, u32 server) | ||
| 384 | { | ||
| 385 | return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server); | ||
| 386 | } | ||
| 387 | |||
| 388 | static u8 xive_lock_and_mask(struct kvmppc_xive *xive, | 385 | static u8 xive_lock_and_mask(struct kvmppc_xive *xive, |
| 389 | struct kvmppc_xive_src_block *sb, | 386 | struct kvmppc_xive_src_block *sb, |
| 390 | struct kvmppc_xive_irq_state *state) | 387 | struct kvmppc_xive_irq_state *state) |
| @@ -430,8 +427,8 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive, | |||
| 430 | */ | 427 | */ |
| 431 | if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { | 428 | if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { |
| 432 | xive_native_configure_irq(hw_num, | 429 | xive_native_configure_irq(hw_num, |
| 433 | xive_vp(xive, state->act_server), | 430 | kvmppc_xive_vp(xive, state->act_server), |
| 434 | MASKED, state->number); | 431 | MASKED, state->number); |
| 435 | /* set old_p so we can track if an H_EOI was done */ | 432 | /* set old_p so we can track if an H_EOI was done */ |
| 436 | state->old_p = true; | 433 | state->old_p = true; |
| 437 | state->old_q = false; | 434 | state->old_q = false; |
| @@ -486,8 +483,8 @@ static void xive_finish_unmask(struct kvmppc_xive *xive, | |||
| 486 | */ | 483 | */ |
| 487 | if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { | 484 | if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { |
| 488 | xive_native_configure_irq(hw_num, | 485 | xive_native_configure_irq(hw_num, |
| 489 | xive_vp(xive, state->act_server), | 486 | kvmppc_xive_vp(xive, state->act_server), |
| 490 | state->act_priority, state->number); | 487 | state->act_priority, state->number); |
| 491 | /* If an EOI is needed, do it here */ | 488 | /* If an EOI is needed, do it here */ |
| 492 | if (!state->old_p) | 489 | if (!state->old_p) |
| 493 | xive_vm_source_eoi(hw_num, xd); | 490 | xive_vm_source_eoi(hw_num, xd); |
| @@ -535,7 +532,7 @@ static int xive_target_interrupt(struct kvm *kvm, | |||
| 535 | * priority. The count for that new target will have | 532 | * priority. The count for that new target will have |
| 536 | * already been incremented. | 533 | * already been incremented. |
| 537 | */ | 534 | */ |
| 538 | rc = xive_select_target(kvm, &server, prio); | 535 | rc = kvmppc_xive_select_target(kvm, &server, prio); |
| 539 | 536 | ||
| 540 | /* | 537 | /* |
| 541 | * We failed to find a target ? Not much we can do | 538 | * We failed to find a target ? Not much we can do |
| @@ -563,7 +560,7 @@ static int xive_target_interrupt(struct kvm *kvm, | |||
| 563 | kvmppc_xive_select_irq(state, &hw_num, NULL); | 560 | kvmppc_xive_select_irq(state, &hw_num, NULL); |
| 564 | 561 | ||
| 565 | return xive_native_configure_irq(hw_num, | 562 | return xive_native_configure_irq(hw_num, |
| 566 | xive_vp(xive, server), | 563 | kvmppc_xive_vp(xive, server), |
| 567 | prio, state->number); | 564 | prio, state->number); |
| 568 | } | 565 | } |
| 569 | 566 | ||
| @@ -849,7 +846,8 @@ int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) | |||
| 849 | 846 | ||
| 850 | /* | 847 | /* |
| 851 | * We can't update the state of a "pushed" VCPU, but that | 848 | * We can't update the state of a "pushed" VCPU, but that |
| 852 | * shouldn't happen. | 849 | * shouldn't happen because the vcpu->mutex makes running a |
| 850 | * vcpu mutually exclusive with doing one_reg get/set on it. | ||
| 853 | */ | 851 | */ |
| 854 | if (WARN_ON(vcpu->arch.xive_pushed)) | 852 | if (WARN_ON(vcpu->arch.xive_pushed)) |
| 855 | return -EIO; | 853 | return -EIO; |
| @@ -940,6 +938,13 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, | |||
| 940 | /* Turn the IPI hard off */ | 938 | /* Turn the IPI hard off */ |
| 941 | xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); | 939 | xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); |
| 942 | 940 | ||
| 941 | /* | ||
| 942 | * Reset ESB guest mapping. Needed when ESB pages are exposed | ||
| 943 | * to the guest in XIVE native mode | ||
| 944 | */ | ||
| 945 | if (xive->ops && xive->ops->reset_mapped) | ||
| 946 | xive->ops->reset_mapped(kvm, guest_irq); | ||
| 947 | |||
| 943 | /* Grab info about irq */ | 948 | /* Grab info about irq */ |
| 944 | state->pt_number = hw_irq; | 949 | state->pt_number = hw_irq; |
| 945 | state->pt_data = irq_data_get_irq_handler_data(host_data); | 950 | state->pt_data = irq_data_get_irq_handler_data(host_data); |
| @@ -951,7 +956,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, | |||
| 951 | * which is fine for a never started interrupt. | 956 | * which is fine for a never started interrupt. |
| 952 | */ | 957 | */ |
| 953 | xive_native_configure_irq(hw_irq, | 958 | xive_native_configure_irq(hw_irq, |
| 954 | xive_vp(xive, state->act_server), | 959 | kvmppc_xive_vp(xive, state->act_server), |
| 955 | state->act_priority, state->number); | 960 | state->act_priority, state->number); |
| 956 | 961 | ||
| 957 | /* | 962 | /* |
| @@ -1025,9 +1030,17 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq, | |||
| 1025 | state->pt_number = 0; | 1030 | state->pt_number = 0; |
| 1026 | state->pt_data = NULL; | 1031 | state->pt_data = NULL; |
| 1027 | 1032 | ||
| 1033 | /* | ||
| 1034 | * Reset ESB guest mapping. Needed when ESB pages are exposed | ||
| 1035 | * to the guest in XIVE native mode | ||
| 1036 | */ | ||
| 1037 | if (xive->ops && xive->ops->reset_mapped) { | ||
| 1038 | xive->ops->reset_mapped(kvm, guest_irq); | ||
| 1039 | } | ||
| 1040 | |||
| 1028 | /* Reconfigure the IPI */ | 1041 | /* Reconfigure the IPI */ |
| 1029 | xive_native_configure_irq(state->ipi_number, | 1042 | xive_native_configure_irq(state->ipi_number, |
| 1030 | xive_vp(xive, state->act_server), | 1043 | kvmppc_xive_vp(xive, state->act_server), |
| 1031 | state->act_priority, state->number); | 1044 | state->act_priority, state->number); |
| 1032 | 1045 | ||
| 1033 | /* | 1046 | /* |
| @@ -1049,7 +1062,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq, | |||
| 1049 | } | 1062 | } |
| 1050 | EXPORT_SYMBOL_GPL(kvmppc_xive_clr_mapped); | 1063 | EXPORT_SYMBOL_GPL(kvmppc_xive_clr_mapped); |
| 1051 | 1064 | ||
| 1052 | static void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) | 1065 | void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) |
| 1053 | { | 1066 | { |
| 1054 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | 1067 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
| 1055 | struct kvm *kvm = vcpu->kvm; | 1068 | struct kvm *kvm = vcpu->kvm; |
| @@ -1083,14 +1096,35 @@ static void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) | |||
| 1083 | arch_spin_unlock(&sb->lock); | 1096 | arch_spin_unlock(&sb->lock); |
| 1084 | } | 1097 | } |
| 1085 | } | 1098 | } |
| 1099 | |||
| 1100 | /* Disable vcpu's escalation interrupt */ | ||
| 1101 | if (vcpu->arch.xive_esc_on) { | ||
| 1102 | __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr + | ||
| 1103 | XIVE_ESB_SET_PQ_01)); | ||
| 1104 | vcpu->arch.xive_esc_on = false; | ||
| 1105 | } | ||
| 1106 | |||
| 1107 | /* | ||
| 1108 | * Clear pointers to escalation interrupt ESB. | ||
| 1109 | * This is safe because the vcpu->mutex is held, preventing | ||
| 1110 | * any other CPU from concurrently executing a KVM_RUN ioctl. | ||
| 1111 | */ | ||
| 1112 | vcpu->arch.xive_esc_vaddr = 0; | ||
| 1113 | vcpu->arch.xive_esc_raddr = 0; | ||
| 1086 | } | 1114 | } |
| 1087 | 1115 | ||
| 1088 | void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) | 1116 | void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) |
| 1089 | { | 1117 | { |
| 1090 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | 1118 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
| 1091 | struct kvmppc_xive *xive = xc->xive; | 1119 | struct kvmppc_xive *xive = vcpu->kvm->arch.xive; |
| 1092 | int i; | 1120 | int i; |
| 1093 | 1121 | ||
| 1122 | if (!kvmppc_xics_enabled(vcpu)) | ||
| 1123 | return; | ||
| 1124 | |||
| 1125 | if (!xc) | ||
| 1126 | return; | ||
| 1127 | |||
| 1094 | pr_devel("cleanup_vcpu(cpu=%d)\n", xc->server_num); | 1128 | pr_devel("cleanup_vcpu(cpu=%d)\n", xc->server_num); |
| 1095 | 1129 | ||
| 1096 | /* Ensure no interrupt is still routed to that VP */ | 1130 | /* Ensure no interrupt is still routed to that VP */ |
| @@ -1129,6 +1163,10 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) | |||
| 1129 | } | 1163 | } |
| 1130 | /* Free the VP */ | 1164 | /* Free the VP */ |
| 1131 | kfree(xc); | 1165 | kfree(xc); |
| 1166 | |||
| 1167 | /* Cleanup the vcpu */ | ||
| 1168 | vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; | ||
| 1169 | vcpu->arch.xive_vcpu = NULL; | ||
| 1132 | } | 1170 | } |
| 1133 | 1171 | ||
| 1134 | int kvmppc_xive_connect_vcpu(struct kvm_device *dev, | 1172 | int kvmppc_xive_connect_vcpu(struct kvm_device *dev, |
| @@ -1146,7 +1184,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, | |||
| 1146 | } | 1184 | } |
| 1147 | if (xive->kvm != vcpu->kvm) | 1185 | if (xive->kvm != vcpu->kvm) |
| 1148 | return -EPERM; | 1186 | return -EPERM; |
| 1149 | if (vcpu->arch.irq_type) | 1187 | if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) |
| 1150 | return -EBUSY; | 1188 | return -EBUSY; |
| 1151 | if (kvmppc_xive_find_server(vcpu->kvm, cpu)) { | 1189 | if (kvmppc_xive_find_server(vcpu->kvm, cpu)) { |
| 1152 | pr_devel("Duplicate !\n"); | 1190 | pr_devel("Duplicate !\n"); |
| @@ -1166,7 +1204,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, | |||
| 1166 | xc->xive = xive; | 1204 | xc->xive = xive; |
| 1167 | xc->vcpu = vcpu; | 1205 | xc->vcpu = vcpu; |
| 1168 | xc->server_num = cpu; | 1206 | xc->server_num = cpu; |
| 1169 | xc->vp_id = xive_vp(xive, cpu); | 1207 | xc->vp_id = kvmppc_xive_vp(xive, cpu); |
| 1170 | xc->mfrr = 0xff; | 1208 | xc->mfrr = 0xff; |
| 1171 | xc->valid = true; | 1209 | xc->valid = true; |
| 1172 | 1210 | ||
| @@ -1219,7 +1257,8 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, | |||
| 1219 | if (xive->qmap & (1 << i)) { | 1257 | if (xive->qmap & (1 << i)) { |
| 1220 | r = xive_provision_queue(vcpu, i); | 1258 | r = xive_provision_queue(vcpu, i); |
| 1221 | if (r == 0 && !xive->single_escalation) | 1259 | if (r == 0 && !xive->single_escalation) |
| 1222 | xive_attach_escalation(vcpu, i); | 1260 | kvmppc_xive_attach_escalation( |
| 1261 | vcpu, i, xive->single_escalation); | ||
| 1223 | if (r) | 1262 | if (r) |
| 1224 | goto bail; | 1263 | goto bail; |
| 1225 | } else { | 1264 | } else { |
| @@ -1234,7 +1273,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, | |||
| 1234 | } | 1273 | } |
| 1235 | 1274 | ||
| 1236 | /* If not done above, attach priority 0 escalation */ | 1275 | /* If not done above, attach priority 0 escalation */ |
| 1237 | r = xive_attach_escalation(vcpu, 0); | 1276 | r = kvmppc_xive_attach_escalation(vcpu, 0, xive->single_escalation); |
| 1238 | if (r) | 1277 | if (r) |
| 1239 | goto bail; | 1278 | goto bail; |
| 1240 | 1279 | ||
| @@ -1485,8 +1524,8 @@ static int xive_get_source(struct kvmppc_xive *xive, long irq, u64 addr) | |||
| 1485 | return 0; | 1524 | return 0; |
| 1486 | } | 1525 | } |
| 1487 | 1526 | ||
| 1488 | static struct kvmppc_xive_src_block *xive_create_src_block(struct kvmppc_xive *xive, | 1527 | struct kvmppc_xive_src_block *kvmppc_xive_create_src_block( |
| 1489 | int irq) | 1528 | struct kvmppc_xive *xive, int irq) |
| 1490 | { | 1529 | { |
| 1491 | struct kvm *kvm = xive->kvm; | 1530 | struct kvm *kvm = xive->kvm; |
| 1492 | struct kvmppc_xive_src_block *sb; | 1531 | struct kvmppc_xive_src_block *sb; |
| @@ -1509,6 +1548,7 @@ static struct kvmppc_xive_src_block *xive_create_src_block(struct kvmppc_xive *x | |||
| 1509 | 1548 | ||
| 1510 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { | 1549 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { |
| 1511 | sb->irq_state[i].number = (bid << KVMPPC_XICS_ICS_SHIFT) | i; | 1550 | sb->irq_state[i].number = (bid << KVMPPC_XICS_ICS_SHIFT) | i; |
| 1551 | sb->irq_state[i].eisn = 0; | ||
| 1512 | sb->irq_state[i].guest_priority = MASKED; | 1552 | sb->irq_state[i].guest_priority = MASKED; |
| 1513 | sb->irq_state[i].saved_priority = MASKED; | 1553 | sb->irq_state[i].saved_priority = MASKED; |
| 1514 | sb->irq_state[i].act_priority = MASKED; | 1554 | sb->irq_state[i].act_priority = MASKED; |
| @@ -1565,7 +1605,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) | |||
| 1565 | sb = kvmppc_xive_find_source(xive, irq, &idx); | 1605 | sb = kvmppc_xive_find_source(xive, irq, &idx); |
| 1566 | if (!sb) { | 1606 | if (!sb) { |
| 1567 | pr_devel("No source, creating source block...\n"); | 1607 | pr_devel("No source, creating source block...\n"); |
| 1568 | sb = xive_create_src_block(xive, irq); | 1608 | sb = kvmppc_xive_create_src_block(xive, irq); |
| 1569 | if (!sb) { | 1609 | if (!sb) { |
| 1570 | pr_devel("Failed to create block...\n"); | 1610 | pr_devel("Failed to create block...\n"); |
| 1571 | return -ENOMEM; | 1611 | return -ENOMEM; |
| @@ -1789,7 +1829,7 @@ static void kvmppc_xive_cleanup_irq(u32 hw_num, struct xive_irq_data *xd) | |||
| 1789 | xive_cleanup_irq_data(xd); | 1829 | xive_cleanup_irq_data(xd); |
| 1790 | } | 1830 | } |
| 1791 | 1831 | ||
| 1792 | static void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb) | 1832 | void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb) |
| 1793 | { | 1833 | { |
| 1794 | int i; | 1834 | int i; |
| 1795 | 1835 | ||
| @@ -1810,16 +1850,55 @@ static void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb) | |||
| 1810 | } | 1850 | } |
| 1811 | } | 1851 | } |
| 1812 | 1852 | ||
| 1813 | static void kvmppc_xive_free(struct kvm_device *dev) | 1853 | /* |
| 1854 | * Called when device fd is closed. kvm->lock is held. | ||
| 1855 | */ | ||
| 1856 | static void kvmppc_xive_release(struct kvm_device *dev) | ||
| 1814 | { | 1857 | { |
| 1815 | struct kvmppc_xive *xive = dev->private; | 1858 | struct kvmppc_xive *xive = dev->private; |
| 1816 | struct kvm *kvm = xive->kvm; | 1859 | struct kvm *kvm = xive->kvm; |
| 1860 | struct kvm_vcpu *vcpu; | ||
| 1817 | int i; | 1861 | int i; |
| 1862 | int was_ready; | ||
| 1863 | |||
| 1864 | pr_devel("Releasing xive device\n"); | ||
| 1818 | 1865 | ||
| 1819 | debugfs_remove(xive->dentry); | 1866 | debugfs_remove(xive->dentry); |
| 1820 | 1867 | ||
| 1821 | if (kvm) | 1868 | /* |
| 1822 | kvm->arch.xive = NULL; | 1869 | * Clearing mmu_ready temporarily while holding kvm->lock |
| 1870 | * is a way of ensuring that no vcpus can enter the guest | ||
| 1871 | * until we drop kvm->lock. Doing kick_all_cpus_sync() | ||
| 1872 | * ensures that any vcpu executing inside the guest has | ||
| 1873 | * exited the guest. Once kick_all_cpus_sync() has finished, | ||
| 1874 | * we know that no vcpu can be executing the XIVE push or | ||
| 1875 | * pull code, or executing a XICS hcall. | ||
| 1876 | * | ||
| 1877 | * Since this is the device release function, we know that | ||
| 1878 | * userspace does not have any open fd referring to the | ||
| 1879 | * device. Therefore there can not be any of the device | ||
| 1880 | * attribute set/get functions being executed concurrently, | ||
| 1881 | * and similarly, the connect_vcpu and set/clr_mapped | ||
| 1882 | * functions also cannot be being executed. | ||
| 1883 | */ | ||
| 1884 | was_ready = kvm->arch.mmu_ready; | ||
| 1885 | kvm->arch.mmu_ready = 0; | ||
| 1886 | kick_all_cpus_sync(); | ||
| 1887 | |||
| 1888 | /* | ||
| 1889 | * We should clean up the vCPU interrupt presenters first. | ||
| 1890 | */ | ||
| 1891 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
| 1892 | /* | ||
| 1893 | * Take vcpu->mutex to ensure that no one_reg get/set ioctl | ||
| 1894 | * (i.e. kvmppc_xive_[gs]et_icp) can be done concurrently. | ||
| 1895 | */ | ||
| 1896 | mutex_lock(&vcpu->mutex); | ||
| 1897 | kvmppc_xive_cleanup_vcpu(vcpu); | ||
| 1898 | mutex_unlock(&vcpu->mutex); | ||
| 1899 | } | ||
| 1900 | |||
| 1901 | kvm->arch.xive = NULL; | ||
| 1823 | 1902 | ||
| 1824 | /* Mask and free interrupts */ | 1903 | /* Mask and free interrupts */ |
| 1825 | for (i = 0; i <= xive->max_sbid; i++) { | 1904 | for (i = 0; i <= xive->max_sbid; i++) { |
| @@ -1832,11 +1911,47 @@ static void kvmppc_xive_free(struct kvm_device *dev) | |||
| 1832 | if (xive->vp_base != XIVE_INVALID_VP) | 1911 | if (xive->vp_base != XIVE_INVALID_VP) |
| 1833 | xive_native_free_vp_block(xive->vp_base); | 1912 | xive_native_free_vp_block(xive->vp_base); |
| 1834 | 1913 | ||
| 1914 | kvm->arch.mmu_ready = was_ready; | ||
| 1915 | |||
| 1916 | /* | ||
| 1917 | * A reference of the kvmppc_xive pointer is now kept under | ||
| 1918 | * the xive_devices struct of the machine for reuse. It is | ||
| 1919 | * freed when the VM is destroyed for now until we fix all the | ||
| 1920 | * execution paths. | ||
| 1921 | */ | ||
| 1835 | 1922 | ||
| 1836 | kfree(xive); | ||
| 1837 | kfree(dev); | 1923 | kfree(dev); |
| 1838 | } | 1924 | } |
| 1839 | 1925 | ||
| 1926 | /* | ||
| 1927 | * When the guest chooses the interrupt mode (XICS legacy or XIVE | ||
| 1928 | * native), the VM will switch of KVM device. The previous device will | ||
| 1929 | * be "released" before the new one is created. | ||
| 1930 | * | ||
| 1931 | * Until we are sure all execution paths are well protected, provide a | ||
| 1932 | * fail safe (transitional) method for device destruction, in which | ||
| 1933 | * the XIVE device pointer is recycled and not directly freed. | ||
| 1934 | */ | ||
| 1935 | struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type) | ||
| 1936 | { | ||
| 1937 | struct kvmppc_xive **kvm_xive_device = type == KVM_DEV_TYPE_XIVE ? | ||
| 1938 | &kvm->arch.xive_devices.native : | ||
| 1939 | &kvm->arch.xive_devices.xics_on_xive; | ||
| 1940 | struct kvmppc_xive *xive = *kvm_xive_device; | ||
| 1941 | |||
| 1942 | if (!xive) { | ||
| 1943 | xive = kzalloc(sizeof(*xive), GFP_KERNEL); | ||
| 1944 | *kvm_xive_device = xive; | ||
| 1945 | } else { | ||
| 1946 | memset(xive, 0, sizeof(*xive)); | ||
| 1947 | } | ||
| 1948 | |||
| 1949 | return xive; | ||
| 1950 | } | ||
| 1951 | |||
| 1952 | /* | ||
| 1953 | * Create a XICS device with XIVE backend. kvm->lock is held. | ||
| 1954 | */ | ||
| 1840 | static int kvmppc_xive_create(struct kvm_device *dev, u32 type) | 1955 | static int kvmppc_xive_create(struct kvm_device *dev, u32 type) |
| 1841 | { | 1956 | { |
| 1842 | struct kvmppc_xive *xive; | 1957 | struct kvmppc_xive *xive; |
| @@ -1845,7 +1960,7 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) | |||
| 1845 | 1960 | ||
| 1846 | pr_devel("Creating xive for partition\n"); | 1961 | pr_devel("Creating xive for partition\n"); |
| 1847 | 1962 | ||
| 1848 | xive = kzalloc(sizeof(*xive), GFP_KERNEL); | 1963 | xive = kvmppc_xive_get_device(kvm, type); |
| 1849 | if (!xive) | 1964 | if (!xive) |
| 1850 | return -ENOMEM; | 1965 | return -ENOMEM; |
| 1851 | 1966 | ||
| @@ -1883,6 +1998,43 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) | |||
| 1883 | return 0; | 1998 | return 0; |
| 1884 | } | 1999 | } |
| 1885 | 2000 | ||
| 2001 | int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu) | ||
| 2002 | { | ||
| 2003 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | ||
| 2004 | unsigned int i; | ||
| 2005 | |||
| 2006 | for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { | ||
| 2007 | struct xive_q *q = &xc->queues[i]; | ||
| 2008 | u32 i0, i1, idx; | ||
| 2009 | |||
| 2010 | if (!q->qpage && !xc->esc_virq[i]) | ||
| 2011 | continue; | ||
| 2012 | |||
| 2013 | seq_printf(m, " [q%d]: ", i); | ||
| 2014 | |||
| 2015 | if (q->qpage) { | ||
| 2016 | idx = q->idx; | ||
| 2017 | i0 = be32_to_cpup(q->qpage + idx); | ||
| 2018 | idx = (idx + 1) & q->msk; | ||
| 2019 | i1 = be32_to_cpup(q->qpage + idx); | ||
| 2020 | seq_printf(m, "T=%d %08x %08x...\n", q->toggle, | ||
| 2021 | i0, i1); | ||
| 2022 | } | ||
| 2023 | if (xc->esc_virq[i]) { | ||
| 2024 | struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]); | ||
| 2025 | struct xive_irq_data *xd = | ||
| 2026 | irq_data_get_irq_handler_data(d); | ||
| 2027 | u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); | ||
| 2028 | |||
| 2029 | seq_printf(m, "E:%c%c I(%d:%llx:%llx)", | ||
| 2030 | (pq & XIVE_ESB_VAL_P) ? 'P' : 'p', | ||
| 2031 | (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q', | ||
| 2032 | xc->esc_virq[i], pq, xd->eoi_page); | ||
| 2033 | seq_puts(m, "\n"); | ||
| 2034 | } | ||
| 2035 | } | ||
| 2036 | return 0; | ||
| 2037 | } | ||
| 1886 | 2038 | ||
| 1887 | static int xive_debug_show(struct seq_file *m, void *private) | 2039 | static int xive_debug_show(struct seq_file *m, void *private) |
| 1888 | { | 2040 | { |
| @@ -1908,7 +2060,6 @@ static int xive_debug_show(struct seq_file *m, void *private) | |||
| 1908 | 2060 | ||
| 1909 | kvm_for_each_vcpu(i, vcpu, kvm) { | 2061 | kvm_for_each_vcpu(i, vcpu, kvm) { |
| 1910 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | 2062 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
| 1911 | unsigned int i; | ||
| 1912 | 2063 | ||
| 1913 | if (!xc) | 2064 | if (!xc) |
| 1914 | continue; | 2065 | continue; |
| @@ -1918,33 +2069,8 @@ static int xive_debug_show(struct seq_file *m, void *private) | |||
| 1918 | xc->server_num, xc->cppr, xc->hw_cppr, | 2069 | xc->server_num, xc->cppr, xc->hw_cppr, |
| 1919 | xc->mfrr, xc->pending, | 2070 | xc->mfrr, xc->pending, |
| 1920 | xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); | 2071 | xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); |
| 1921 | for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { | ||
| 1922 | struct xive_q *q = &xc->queues[i]; | ||
| 1923 | u32 i0, i1, idx; | ||
| 1924 | |||
| 1925 | if (!q->qpage && !xc->esc_virq[i]) | ||
| 1926 | continue; | ||
| 1927 | 2072 | ||
| 1928 | seq_printf(m, " [q%d]: ", i); | 2073 | kvmppc_xive_debug_show_queues(m, vcpu); |
| 1929 | |||
| 1930 | if (q->qpage) { | ||
| 1931 | idx = q->idx; | ||
| 1932 | i0 = be32_to_cpup(q->qpage + idx); | ||
| 1933 | idx = (idx + 1) & q->msk; | ||
| 1934 | i1 = be32_to_cpup(q->qpage + idx); | ||
| 1935 | seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1); | ||
| 1936 | } | ||
| 1937 | if (xc->esc_virq[i]) { | ||
| 1938 | struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]); | ||
| 1939 | struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); | ||
| 1940 | u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); | ||
| 1941 | seq_printf(m, "E:%c%c I(%d:%llx:%llx)", | ||
| 1942 | (pq & XIVE_ESB_VAL_P) ? 'P' : 'p', | ||
| 1943 | (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q', | ||
| 1944 | xc->esc_virq[i], pq, xd->eoi_page); | ||
| 1945 | seq_printf(m, "\n"); | ||
| 1946 | } | ||
| 1947 | } | ||
| 1948 | 2074 | ||
| 1949 | t_rm_h_xirr += xc->stat_rm_h_xirr; | 2075 | t_rm_h_xirr += xc->stat_rm_h_xirr; |
| 1950 | t_rm_h_ipoll += xc->stat_rm_h_ipoll; | 2076 | t_rm_h_ipoll += xc->stat_rm_h_ipoll; |
| @@ -1999,7 +2125,7 @@ struct kvm_device_ops kvm_xive_ops = { | |||
| 1999 | .name = "kvm-xive", | 2125 | .name = "kvm-xive", |
| 2000 | .create = kvmppc_xive_create, | 2126 | .create = kvmppc_xive_create, |
| 2001 | .init = kvmppc_xive_init, | 2127 | .init = kvmppc_xive_init, |
| 2002 | .destroy = kvmppc_xive_free, | 2128 | .release = kvmppc_xive_release, |
| 2003 | .set_attr = xive_set_attr, | 2129 | .set_attr = xive_set_attr, |
| 2004 | .get_attr = xive_get_attr, | 2130 | .get_attr = xive_get_attr, |
| 2005 | .has_attr = xive_has_attr, | 2131 | .has_attr = xive_has_attr, |
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index a08ae6fd4c51..426146332984 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h | |||
| @@ -13,6 +13,13 @@ | |||
| 13 | #include "book3s_xics.h" | 13 | #include "book3s_xics.h" |
| 14 | 14 | ||
| 15 | /* | 15 | /* |
| 16 | * The XIVE Interrupt source numbers are within the range 0 to | ||
| 17 | * KVMPPC_XICS_NR_IRQS. | ||
| 18 | */ | ||
| 19 | #define KVMPPC_XIVE_FIRST_IRQ 0 | ||
| 20 | #define KVMPPC_XIVE_NR_IRQS KVMPPC_XICS_NR_IRQS | ||
| 21 | |||
| 22 | /* | ||
| 16 | * State for one guest irq source. | 23 | * State for one guest irq source. |
| 17 | * | 24 | * |
| 18 | * For each guest source we allocate a HW interrupt in the XIVE | 25 | * For each guest source we allocate a HW interrupt in the XIVE |
| @@ -54,6 +61,9 @@ struct kvmppc_xive_irq_state { | |||
| 54 | bool saved_p; | 61 | bool saved_p; |
| 55 | bool saved_q; | 62 | bool saved_q; |
| 56 | u8 saved_scan_prio; | 63 | u8 saved_scan_prio; |
| 64 | |||
| 65 | /* Xive native */ | ||
| 66 | u32 eisn; /* Guest Effective IRQ number */ | ||
| 57 | }; | 67 | }; |
| 58 | 68 | ||
| 59 | /* Select the "right" interrupt (IPI vs. passthrough) */ | 69 | /* Select the "right" interrupt (IPI vs. passthrough) */ |
| @@ -84,6 +94,11 @@ struct kvmppc_xive_src_block { | |||
| 84 | struct kvmppc_xive_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS]; | 94 | struct kvmppc_xive_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS]; |
| 85 | }; | 95 | }; |
| 86 | 96 | ||
| 97 | struct kvmppc_xive; | ||
| 98 | |||
| 99 | struct kvmppc_xive_ops { | ||
| 100 | int (*reset_mapped)(struct kvm *kvm, unsigned long guest_irq); | ||
| 101 | }; | ||
| 87 | 102 | ||
| 88 | struct kvmppc_xive { | 103 | struct kvmppc_xive { |
| 89 | struct kvm *kvm; | 104 | struct kvm *kvm; |
| @@ -122,6 +137,10 @@ struct kvmppc_xive { | |||
| 122 | 137 | ||
| 123 | /* Flags */ | 138 | /* Flags */ |
| 124 | u8 single_escalation; | 139 | u8 single_escalation; |
| 140 | |||
| 141 | struct kvmppc_xive_ops *ops; | ||
| 142 | struct address_space *mapping; | ||
| 143 | struct mutex mapping_lock; | ||
| 125 | }; | 144 | }; |
| 126 | 145 | ||
| 127 | #define KVMPPC_XIVE_Q_COUNT 8 | 146 | #define KVMPPC_XIVE_Q_COUNT 8 |
| @@ -198,6 +217,11 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp | |||
| 198 | return xive->src_blocks[bid]; | 217 | return xive->src_blocks[bid]; |
| 199 | } | 218 | } |
| 200 | 219 | ||
| 220 | static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server) | ||
| 221 | { | ||
| 222 | return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server); | ||
| 223 | } | ||
| 224 | |||
| 201 | /* | 225 | /* |
| 202 | * Mapping between guest priorities and host priorities | 226 | * Mapping between guest priorities and host priorities |
| 203 | * is as follow. | 227 | * is as follow. |
| @@ -248,5 +272,18 @@ extern int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server, | |||
| 248 | extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr); | 272 | extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr); |
| 249 | extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr); | 273 | extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr); |
| 250 | 274 | ||
| 275 | /* | ||
| 276 | * Common Xive routines for XICS-over-XIVE and XIVE native | ||
| 277 | */ | ||
| 278 | void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu); | ||
| 279 | int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu); | ||
| 280 | struct kvmppc_xive_src_block *kvmppc_xive_create_src_block( | ||
| 281 | struct kvmppc_xive *xive, int irq); | ||
| 282 | void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb); | ||
| 283 | int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio); | ||
| 284 | int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, | ||
| 285 | bool single_escalation); | ||
| 286 | struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type); | ||
| 287 | |||
| 251 | #endif /* CONFIG_KVM_XICS */ | 288 | #endif /* CONFIG_KVM_XICS */ |
| 252 | #endif /* _KVM_PPC_BOOK3S_XICS_H */ | 289 | #endif /* _KVM_PPC_BOOK3S_XICS_H */ |
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c new file mode 100644 index 000000000000..6a8e698c4b6e --- /dev/null +++ b/arch/powerpc/kvm/book3s_xive_native.c | |||
| @@ -0,0 +1,1249 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | ||
| 2 | /* | ||
| 3 | * Copyright (c) 2017-2019, IBM Corporation. | ||
| 4 | */ | ||
| 5 | |||
| 6 | #define pr_fmt(fmt) "xive-kvm: " fmt | ||
| 7 | |||
| 8 | #include <linux/kernel.h> | ||
| 9 | #include <linux/kvm_host.h> | ||
| 10 | #include <linux/err.h> | ||
| 11 | #include <linux/gfp.h> | ||
| 12 | #include <linux/spinlock.h> | ||
| 13 | #include <linux/delay.h> | ||
| 14 | #include <linux/file.h> | ||
| 15 | #include <asm/uaccess.h> | ||
| 16 | #include <asm/kvm_book3s.h> | ||
| 17 | #include <asm/kvm_ppc.h> | ||
| 18 | #include <asm/hvcall.h> | ||
| 19 | #include <asm/xive.h> | ||
| 20 | #include <asm/xive-regs.h> | ||
| 21 | #include <asm/debug.h> | ||
| 22 | #include <asm/debugfs.h> | ||
| 23 | #include <asm/opal.h> | ||
| 24 | |||
| 25 | #include <linux/debugfs.h> | ||
| 26 | #include <linux/seq_file.h> | ||
| 27 | |||
| 28 | #include "book3s_xive.h" | ||
| 29 | |||
| 30 | static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset) | ||
| 31 | { | ||
| 32 | u64 val; | ||
| 33 | |||
| 34 | if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) | ||
| 35 | offset |= offset << 4; | ||
| 36 | |||
| 37 | val = in_be64(xd->eoi_mmio + offset); | ||
| 38 | return (u8)val; | ||
| 39 | } | ||
| 40 | |||
| 41 | static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio) | ||
| 42 | { | ||
| 43 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | ||
| 44 | struct xive_q *q = &xc->queues[prio]; | ||
| 45 | |||
| 46 | xive_native_disable_queue(xc->vp_id, q, prio); | ||
| 47 | if (q->qpage) { | ||
| 48 | put_page(virt_to_page(q->qpage)); | ||
| 49 | q->qpage = NULL; | ||
| 50 | } | ||
| 51 | } | ||
| 52 | |||
| 53 | void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) | ||
| 54 | { | ||
| 55 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | ||
| 56 | int i; | ||
| 57 | |||
| 58 | if (!kvmppc_xive_enabled(vcpu)) | ||
| 59 | return; | ||
| 60 | |||
| 61 | if (!xc) | ||
| 62 | return; | ||
| 63 | |||
| 64 | pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num); | ||
| 65 | |||
| 66 | /* Ensure no interrupt is still routed to that VP */ | ||
| 67 | xc->valid = false; | ||
| 68 | kvmppc_xive_disable_vcpu_interrupts(vcpu); | ||
| 69 | |||
| 70 | /* Disable the VP */ | ||
| 71 | xive_native_disable_vp(xc->vp_id); | ||
| 72 | |||
| 73 | /* Free the queues & associated interrupts */ | ||
| 74 | for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { | ||
| 75 | /* Free the escalation irq */ | ||
| 76 | if (xc->esc_virq[i]) { | ||
| 77 | free_irq(xc->esc_virq[i], vcpu); | ||
| 78 | irq_dispose_mapping(xc->esc_virq[i]); | ||
| 79 | kfree(xc->esc_virq_names[i]); | ||
| 80 | xc->esc_virq[i] = 0; | ||
| 81 | } | ||
| 82 | |||
| 83 | /* Free the queue */ | ||
| 84 | kvmppc_xive_native_cleanup_queue(vcpu, i); | ||
| 85 | } | ||
| 86 | |||
| 87 | /* Free the VP */ | ||
| 88 | kfree(xc); | ||
| 89 | |||
| 90 | /* Cleanup the vcpu */ | ||
| 91 | vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; | ||
| 92 | vcpu->arch.xive_vcpu = NULL; | ||
| 93 | } | ||
| 94 | |||
| 95 | int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, | ||
| 96 | struct kvm_vcpu *vcpu, u32 server_num) | ||
| 97 | { | ||
| 98 | struct kvmppc_xive *xive = dev->private; | ||
| 99 | struct kvmppc_xive_vcpu *xc = NULL; | ||
| 100 | int rc; | ||
| 101 | |||
| 102 | pr_devel("native_connect_vcpu(server=%d)\n", server_num); | ||
| 103 | |||
| 104 | if (dev->ops != &kvm_xive_native_ops) { | ||
| 105 | pr_devel("Wrong ops !\n"); | ||
| 106 | return -EPERM; | ||
| 107 | } | ||
| 108 | if (xive->kvm != vcpu->kvm) | ||
| 109 | return -EPERM; | ||
| 110 | if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) | ||
| 111 | return -EBUSY; | ||
| 112 | if (server_num >= KVM_MAX_VCPUS) { | ||
| 113 | pr_devel("Out of bounds !\n"); | ||
| 114 | return -EINVAL; | ||
| 115 | } | ||
| 116 | |||
| 117 | mutex_lock(&vcpu->kvm->lock); | ||
| 118 | |||
| 119 | if (kvmppc_xive_find_server(vcpu->kvm, server_num)) { | ||
| 120 | pr_devel("Duplicate !\n"); | ||
| 121 | rc = -EEXIST; | ||
| 122 | goto bail; | ||
| 123 | } | ||
| 124 | |||
| 125 | xc = kzalloc(sizeof(*xc), GFP_KERNEL); | ||
| 126 | if (!xc) { | ||
| 127 | rc = -ENOMEM; | ||
| 128 | goto bail; | ||
| 129 | } | ||
| 130 | |||
| 131 | vcpu->arch.xive_vcpu = xc; | ||
| 132 | xc->xive = xive; | ||
| 133 | xc->vcpu = vcpu; | ||
| 134 | xc->server_num = server_num; | ||
| 135 | |||
| 136 | xc->vp_id = kvmppc_xive_vp(xive, server_num); | ||
| 137 | xc->valid = true; | ||
| 138 | vcpu->arch.irq_type = KVMPPC_IRQ_XIVE; | ||
| 139 | |||
| 140 | rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id); | ||
| 141 | if (rc) { | ||
| 142 | pr_err("Failed to get VP info from OPAL: %d\n", rc); | ||
| 143 | goto bail; | ||
| 144 | } | ||
| 145 | |||
| 146 | /* | ||
| 147 | * Enable the VP first as the single escalation mode will | ||
| 148 | * affect escalation interrupts numbering | ||
| 149 | */ | ||
| 150 | rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation); | ||
| 151 | if (rc) { | ||
| 152 | pr_err("Failed to enable VP in OPAL: %d\n", rc); | ||
| 153 | goto bail; | ||
| 154 | } | ||
| 155 | |||
| 156 | /* Configure VCPU fields for use by assembly push/pull */ | ||
| 157 | vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000); | ||
| 158 | vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO); | ||
| 159 | |||
| 160 | /* TODO: reset all queues to a clean state ? */ | ||
| 161 | bail: | ||
| 162 | mutex_unlock(&vcpu->kvm->lock); | ||
| 163 | if (rc) | ||
| 164 | kvmppc_xive_native_cleanup_vcpu(vcpu); | ||
| 165 | |||
| 166 | return rc; | ||
| 167 | } | ||
| 168 | |||
| 169 | /* | ||
| 170 | * Device passthrough support | ||
| 171 | */ | ||
| 172 | static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq) | ||
| 173 | { | ||
| 174 | struct kvmppc_xive *xive = kvm->arch.xive; | ||
| 175 | |||
| 176 | if (irq >= KVMPPC_XIVE_NR_IRQS) | ||
| 177 | return -EINVAL; | ||
| 178 | |||
| 179 | /* | ||
| 180 | * Clear the ESB pages of the IRQ number being mapped (or | ||
| 181 | * unmapped) into the guest and let the the VM fault handler | ||
| 182 | * repopulate with the appropriate ESB pages (device or IC) | ||
| 183 | */ | ||
| 184 | pr_debug("clearing esb pages for girq 0x%lx\n", irq); | ||
| 185 | mutex_lock(&xive->mapping_lock); | ||
| 186 | if (xive->mapping) | ||
| 187 | unmap_mapping_range(xive->mapping, | ||
| 188 | irq * (2ull << PAGE_SHIFT), | ||
| 189 | 2ull << PAGE_SHIFT, 1); | ||
| 190 | mutex_unlock(&xive->mapping_lock); | ||
| 191 | return 0; | ||
| 192 | } | ||
| 193 | |||
| 194 | static struct kvmppc_xive_ops kvmppc_xive_native_ops = { | ||
| 195 | .reset_mapped = kvmppc_xive_native_reset_mapped, | ||
| 196 | }; | ||
| 197 | |||
| 198 | static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf) | ||
| 199 | { | ||
| 200 | struct vm_area_struct *vma = vmf->vma; | ||
| 201 | struct kvm_device *dev = vma->vm_file->private_data; | ||
| 202 | struct kvmppc_xive *xive = dev->private; | ||
| 203 | struct kvmppc_xive_src_block *sb; | ||
| 204 | struct kvmppc_xive_irq_state *state; | ||
| 205 | struct xive_irq_data *xd; | ||
| 206 | u32 hw_num; | ||
| 207 | u16 src; | ||
| 208 | u64 page; | ||
| 209 | unsigned long irq; | ||
| 210 | u64 page_offset; | ||
| 211 | |||
| 212 | /* | ||
| 213 | * Linux/KVM uses a two pages ESB setting, one for trigger and | ||
| 214 | * one for EOI | ||
| 215 | */ | ||
| 216 | page_offset = vmf->pgoff - vma->vm_pgoff; | ||
| 217 | irq = page_offset / 2; | ||
| 218 | |||
| 219 | sb = kvmppc_xive_find_source(xive, irq, &src); | ||
| 220 | if (!sb) { | ||
| 221 | pr_devel("%s: source %lx not found !\n", __func__, irq); | ||
| 222 | return VM_FAULT_SIGBUS; | ||
| 223 | } | ||
| 224 | |||
| 225 | state = &sb->irq_state[src]; | ||
| 226 | kvmppc_xive_select_irq(state, &hw_num, &xd); | ||
| 227 | |||
| 228 | arch_spin_lock(&sb->lock); | ||
| 229 | |||
| 230 | /* | ||
| 231 | * first/even page is for trigger | ||
| 232 | * second/odd page is for EOI and management. | ||
| 233 | */ | ||
| 234 | page = page_offset % 2 ? xd->eoi_page : xd->trig_page; | ||
| 235 | arch_spin_unlock(&sb->lock); | ||
| 236 | |||
| 237 | if (WARN_ON(!page)) { | ||
| 238 | pr_err("%s: accessing invalid ESB page for source %lx !\n", | ||
| 239 | __func__, irq); | ||
| 240 | return VM_FAULT_SIGBUS; | ||
| 241 | } | ||
| 242 | |||
| 243 | vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT); | ||
| 244 | return VM_FAULT_NOPAGE; | ||
| 245 | } | ||
| 246 | |||
| 247 | static const struct vm_operations_struct xive_native_esb_vmops = { | ||
| 248 | .fault = xive_native_esb_fault, | ||
| 249 | }; | ||
| 250 | |||
| 251 | static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf) | ||
| 252 | { | ||
| 253 | struct vm_area_struct *vma = vmf->vma; | ||
| 254 | |||
| 255 | switch (vmf->pgoff - vma->vm_pgoff) { | ||
| 256 | case 0: /* HW - forbid access */ | ||
| 257 | case 1: /* HV - forbid access */ | ||
| 258 | return VM_FAULT_SIGBUS; | ||
| 259 | case 2: /* OS */ | ||
| 260 | vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT); | ||
| 261 | return VM_FAULT_NOPAGE; | ||
| 262 | case 3: /* USER - TODO */ | ||
| 263 | default: | ||
| 264 | return VM_FAULT_SIGBUS; | ||
| 265 | } | ||
| 266 | } | ||
| 267 | |||
| 268 | static const struct vm_operations_struct xive_native_tima_vmops = { | ||
| 269 | .fault = xive_native_tima_fault, | ||
| 270 | }; | ||
| 271 | |||
| 272 | static int kvmppc_xive_native_mmap(struct kvm_device *dev, | ||
| 273 | struct vm_area_struct *vma) | ||
| 274 | { | ||
| 275 | struct kvmppc_xive *xive = dev->private; | ||
| 276 | |||
| 277 | /* We only allow mappings at fixed offset for now */ | ||
| 278 | if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) { | ||
| 279 | if (vma_pages(vma) > 4) | ||
| 280 | return -EINVAL; | ||
| 281 | vma->vm_ops = &xive_native_tima_vmops; | ||
| 282 | } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) { | ||
| 283 | if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2) | ||
| 284 | return -EINVAL; | ||
| 285 | vma->vm_ops = &xive_native_esb_vmops; | ||
| 286 | } else { | ||
| 287 | return -EINVAL; | ||
| 288 | } | ||
| 289 | |||
| 290 | vma->vm_flags |= VM_IO | VM_PFNMAP; | ||
| 291 | vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); | ||
| 292 | |||
| 293 | /* | ||
| 294 | * Grab the KVM device file address_space to be able to clear | ||
| 295 | * the ESB pages mapping when a device is passed-through into | ||
| 296 | * the guest. | ||
| 297 | */ | ||
| 298 | xive->mapping = vma->vm_file->f_mapping; | ||
| 299 | return 0; | ||
| 300 | } | ||
| 301 | |||
| 302 | static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq, | ||
| 303 | u64 addr) | ||
| 304 | { | ||
| 305 | struct kvmppc_xive_src_block *sb; | ||
| 306 | struct kvmppc_xive_irq_state *state; | ||
| 307 | u64 __user *ubufp = (u64 __user *) addr; | ||
| 308 | u64 val; | ||
| 309 | u16 idx; | ||
| 310 | int rc; | ||
| 311 | |||
| 312 | pr_devel("%s irq=0x%lx\n", __func__, irq); | ||
| 313 | |||
| 314 | if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS) | ||
| 315 | return -E2BIG; | ||
| 316 | |||
| 317 | sb = kvmppc_xive_find_source(xive, irq, &idx); | ||
| 318 | if (!sb) { | ||
| 319 | pr_debug("No source, creating source block...\n"); | ||
| 320 | sb = kvmppc_xive_create_src_block(xive, irq); | ||
| 321 | if (!sb) { | ||
| 322 | pr_err("Failed to create block...\n"); | ||
| 323 | return -ENOMEM; | ||
| 324 | } | ||
| 325 | } | ||
| 326 | state = &sb->irq_state[idx]; | ||
| 327 | |||
| 328 | if (get_user(val, ubufp)) { | ||
| 329 | pr_err("fault getting user info !\n"); | ||
| 330 | return -EFAULT; | ||
| 331 | } | ||
| 332 | |||
| 333 | arch_spin_lock(&sb->lock); | ||
| 334 | |||
| 335 | /* | ||
| 336 | * If the source doesn't already have an IPI, allocate | ||
| 337 | * one and get the corresponding data | ||
| 338 | */ | ||
| 339 | if (!state->ipi_number) { | ||
| 340 | state->ipi_number = xive_native_alloc_irq(); | ||
| 341 | if (state->ipi_number == 0) { | ||
| 342 | pr_err("Failed to allocate IRQ !\n"); | ||
| 343 | rc = -ENXIO; | ||
| 344 | goto unlock; | ||
| 345 | } | ||
| 346 | xive_native_populate_irq_data(state->ipi_number, | ||
| 347 | &state->ipi_data); | ||
| 348 | pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__, | ||
| 349 | state->ipi_number, irq); | ||
| 350 | } | ||
| 351 | |||
| 352 | /* Restore LSI state */ | ||
| 353 | if (val & KVM_XIVE_LEVEL_SENSITIVE) { | ||
| 354 | state->lsi = true; | ||
| 355 | if (val & KVM_XIVE_LEVEL_ASSERTED) | ||
| 356 | state->asserted = true; | ||
| 357 | pr_devel(" LSI ! Asserted=%d\n", state->asserted); | ||
| 358 | } | ||
| 359 | |||
| 360 | /* Mask IRQ to start with */ | ||
| 361 | state->act_server = 0; | ||
| 362 | state->act_priority = MASKED; | ||
| 363 | xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); | ||
| 364 | xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); | ||
| 365 | |||
| 366 | /* Increment the number of valid sources and mark this one valid */ | ||
| 367 | if (!state->valid) | ||
| 368 | xive->src_count++; | ||
| 369 | state->valid = true; | ||
| 370 | |||
| 371 | rc = 0; | ||
| 372 | |||
| 373 | unlock: | ||
| 374 | arch_spin_unlock(&sb->lock); | ||
| 375 | |||
| 376 | return rc; | ||
| 377 | } | ||
| 378 | |||
| 379 | static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive, | ||
| 380 | struct kvmppc_xive_src_block *sb, | ||
| 381 | struct kvmppc_xive_irq_state *state, | ||
| 382 | u32 server, u8 priority, bool masked, | ||
| 383 | u32 eisn) | ||
| 384 | { | ||
| 385 | struct kvm *kvm = xive->kvm; | ||
| 386 | u32 hw_num; | ||
| 387 | int rc = 0; | ||
| 388 | |||
| 389 | arch_spin_lock(&sb->lock); | ||
| 390 | |||
| 391 | if (state->act_server == server && state->act_priority == priority && | ||
| 392 | state->eisn == eisn) | ||
| 393 | goto unlock; | ||
| 394 | |||
| 395 | pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n", | ||
| 396 | priority, server, masked, state->act_server, | ||
| 397 | state->act_priority); | ||
| 398 | |||
| 399 | kvmppc_xive_select_irq(state, &hw_num, NULL); | ||
| 400 | |||
| 401 | if (priority != MASKED && !masked) { | ||
| 402 | rc = kvmppc_xive_select_target(kvm, &server, priority); | ||
| 403 | if (rc) | ||
| 404 | goto unlock; | ||
| 405 | |||
| 406 | state->act_priority = priority; | ||
| 407 | state->act_server = server; | ||
| 408 | state->eisn = eisn; | ||
| 409 | |||
| 410 | rc = xive_native_configure_irq(hw_num, | ||
| 411 | kvmppc_xive_vp(xive, server), | ||
| 412 | priority, eisn); | ||
| 413 | } else { | ||
| 414 | state->act_priority = MASKED; | ||
| 415 | state->act_server = 0; | ||
| 416 | state->eisn = 0; | ||
| 417 | |||
| 418 | rc = xive_native_configure_irq(hw_num, 0, MASKED, 0); | ||
| 419 | } | ||
| 420 | |||
| 421 | unlock: | ||
| 422 | arch_spin_unlock(&sb->lock); | ||
| 423 | return rc; | ||
| 424 | } | ||
| 425 | |||
| 426 | static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive, | ||
| 427 | long irq, u64 addr) | ||
| 428 | { | ||
| 429 | struct kvmppc_xive_src_block *sb; | ||
| 430 | struct kvmppc_xive_irq_state *state; | ||
| 431 | u64 __user *ubufp = (u64 __user *) addr; | ||
| 432 | u16 src; | ||
| 433 | u64 kvm_cfg; | ||
| 434 | u32 server; | ||
| 435 | u8 priority; | ||
| 436 | bool masked; | ||
| 437 | u32 eisn; | ||
| 438 | |||
| 439 | sb = kvmppc_xive_find_source(xive, irq, &src); | ||
| 440 | if (!sb) | ||
| 441 | return -ENOENT; | ||
| 442 | |||
| 443 | state = &sb->irq_state[src]; | ||
| 444 | |||
| 445 | if (!state->valid) | ||
| 446 | return -EINVAL; | ||
| 447 | |||
| 448 | if (get_user(kvm_cfg, ubufp)) | ||
| 449 | return -EFAULT; | ||
| 450 | |||
| 451 | pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg); | ||
| 452 | |||
| 453 | priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >> | ||
| 454 | KVM_XIVE_SOURCE_PRIORITY_SHIFT; | ||
| 455 | server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >> | ||
| 456 | KVM_XIVE_SOURCE_SERVER_SHIFT; | ||
| 457 | masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >> | ||
| 458 | KVM_XIVE_SOURCE_MASKED_SHIFT; | ||
| 459 | eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >> | ||
| 460 | KVM_XIVE_SOURCE_EISN_SHIFT; | ||
| 461 | |||
| 462 | if (priority != xive_prio_from_guest(priority)) { | ||
| 463 | pr_err("invalid priority for queue %d for VCPU %d\n", | ||
| 464 | priority, server); | ||
| 465 | return -EINVAL; | ||
| 466 | } | ||
| 467 | |||
| 468 | return kvmppc_xive_native_update_source_config(xive, sb, state, server, | ||
| 469 | priority, masked, eisn); | ||
| 470 | } | ||
| 471 | |||
| 472 | static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive, | ||
| 473 | long irq, u64 addr) | ||
| 474 | { | ||
| 475 | struct kvmppc_xive_src_block *sb; | ||
| 476 | struct kvmppc_xive_irq_state *state; | ||
| 477 | struct xive_irq_data *xd; | ||
| 478 | u32 hw_num; | ||
| 479 | u16 src; | ||
| 480 | int rc = 0; | ||
| 481 | |||
| 482 | pr_devel("%s irq=0x%lx", __func__, irq); | ||
| 483 | |||
| 484 | sb = kvmppc_xive_find_source(xive, irq, &src); | ||
| 485 | if (!sb) | ||
| 486 | return -ENOENT; | ||
| 487 | |||
| 488 | state = &sb->irq_state[src]; | ||
| 489 | |||
| 490 | rc = -EINVAL; | ||
| 491 | |||
| 492 | arch_spin_lock(&sb->lock); | ||
| 493 | |||
| 494 | if (state->valid) { | ||
| 495 | kvmppc_xive_select_irq(state, &hw_num, &xd); | ||
| 496 | xive_native_sync_source(hw_num); | ||
| 497 | rc = 0; | ||
| 498 | } | ||
| 499 | |||
| 500 | arch_spin_unlock(&sb->lock); | ||
| 501 | return rc; | ||
| 502 | } | ||
| 503 | |||
| 504 | static int xive_native_validate_queue_size(u32 qshift) | ||
| 505 | { | ||
| 506 | /* | ||
| 507 | * We only support 64K pages for the moment. This is also | ||
| 508 | * advertised in the DT property "ibm,xive-eq-sizes" | ||
| 509 | */ | ||
| 510 | switch (qshift) { | ||
| 511 | case 0: /* EQ reset */ | ||
| 512 | case 16: | ||
| 513 | return 0; | ||
| 514 | case 12: | ||
| 515 | case 21: | ||
| 516 | case 24: | ||
| 517 | default: | ||
| 518 | return -EINVAL; | ||
| 519 | } | ||
| 520 | } | ||
| 521 | |||
| 522 | static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, | ||
| 523 | long eq_idx, u64 addr) | ||
| 524 | { | ||
| 525 | struct kvm *kvm = xive->kvm; | ||
| 526 | struct kvm_vcpu *vcpu; | ||
| 527 | struct kvmppc_xive_vcpu *xc; | ||
| 528 | void __user *ubufp = (void __user *) addr; | ||
| 529 | u32 server; | ||
| 530 | u8 priority; | ||
| 531 | struct kvm_ppc_xive_eq kvm_eq; | ||
| 532 | int rc; | ||
| 533 | __be32 *qaddr = 0; | ||
| 534 | struct page *page; | ||
| 535 | struct xive_q *q; | ||
| 536 | gfn_t gfn; | ||
| 537 | unsigned long page_size; | ||
| 538 | |||
| 539 | /* | ||
| 540 | * Demangle priority/server tuple from the EQ identifier | ||
| 541 | */ | ||
| 542 | priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> | ||
| 543 | KVM_XIVE_EQ_PRIORITY_SHIFT; | ||
| 544 | server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> | ||
| 545 | KVM_XIVE_EQ_SERVER_SHIFT; | ||
| 546 | |||
| 547 | if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq))) | ||
| 548 | return -EFAULT; | ||
| 549 | |||
| 550 | vcpu = kvmppc_xive_find_server(kvm, server); | ||
| 551 | if (!vcpu) { | ||
| 552 | pr_err("Can't find server %d\n", server); | ||
| 553 | return -ENOENT; | ||
| 554 | } | ||
| 555 | xc = vcpu->arch.xive_vcpu; | ||
| 556 | |||
| 557 | if (priority != xive_prio_from_guest(priority)) { | ||
| 558 | pr_err("Trying to restore invalid queue %d for VCPU %d\n", | ||
| 559 | priority, server); | ||
| 560 | return -EINVAL; | ||
| 561 | } | ||
| 562 | q = &xc->queues[priority]; | ||
| 563 | |||
| 564 | pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", | ||
| 565 | __func__, server, priority, kvm_eq.flags, | ||
| 566 | kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); | ||
| 567 | |||
| 568 | /* | ||
| 569 | * sPAPR specifies a "Unconditional Notify (n) flag" for the | ||
| 570 | * H_INT_SET_QUEUE_CONFIG hcall which forces notification | ||
| 571 | * without using the coalescing mechanisms provided by the | ||
| 572 | * XIVE END ESBs. This is required on KVM as notification | ||
| 573 | * using the END ESBs is not supported. | ||
| 574 | */ | ||
| 575 | if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) { | ||
| 576 | pr_err("invalid flags %d\n", kvm_eq.flags); | ||
| 577 | return -EINVAL; | ||
| 578 | } | ||
| 579 | |||
| 580 | rc = xive_native_validate_queue_size(kvm_eq.qshift); | ||
| 581 | if (rc) { | ||
| 582 | pr_err("invalid queue size %d\n", kvm_eq.qshift); | ||
| 583 | return rc; | ||
| 584 | } | ||
| 585 | |||
| 586 | /* reset queue and disable queueing */ | ||
| 587 | if (!kvm_eq.qshift) { | ||
| 588 | q->guest_qaddr = 0; | ||
| 589 | q->guest_qshift = 0; | ||
| 590 | |||
| 591 | rc = xive_native_configure_queue(xc->vp_id, q, priority, | ||
| 592 | NULL, 0, true); | ||
| 593 | if (rc) { | ||
| 594 | pr_err("Failed to reset queue %d for VCPU %d: %d\n", | ||
| 595 | priority, xc->server_num, rc); | ||
| 596 | return rc; | ||
| 597 | } | ||
| 598 | |||
| 599 | if (q->qpage) { | ||
| 600 | put_page(virt_to_page(q->qpage)); | ||
| 601 | q->qpage = NULL; | ||
| 602 | } | ||
| 603 | |||
| 604 | return 0; | ||
| 605 | } | ||
| 606 | |||
| 607 | if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) { | ||
| 608 | pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr, | ||
| 609 | 1ull << kvm_eq.qshift); | ||
| 610 | return -EINVAL; | ||
| 611 | } | ||
| 612 | |||
| 613 | gfn = gpa_to_gfn(kvm_eq.qaddr); | ||
| 614 | page = gfn_to_page(kvm, gfn); | ||
| 615 | if (is_error_page(page)) { | ||
| 616 | pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); | ||
| 617 | return -EINVAL; | ||
| 618 | } | ||
| 619 | |||
| 620 | page_size = kvm_host_page_size(kvm, gfn); | ||
| 621 | if (1ull << kvm_eq.qshift > page_size) { | ||
| 622 | pr_warn("Incompatible host page size %lx!\n", page_size); | ||
| 623 | return -EINVAL; | ||
| 624 | } | ||
| 625 | |||
| 626 | qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK); | ||
| 627 | |||
| 628 | /* | ||
| 629 | * Backup the queue page guest address to the mark EQ page | ||
| 630 | * dirty for migration. | ||
| 631 | */ | ||
| 632 | q->guest_qaddr = kvm_eq.qaddr; | ||
| 633 | q->guest_qshift = kvm_eq.qshift; | ||
| 634 | |||
| 635 | /* | ||
| 636 | * Unconditional Notification is forced by default at the | ||
| 637 | * OPAL level because the use of END ESBs is not supported by | ||
| 638 | * Linux. | ||
| 639 | */ | ||
| 640 | rc = xive_native_configure_queue(xc->vp_id, q, priority, | ||
| 641 | (__be32 *) qaddr, kvm_eq.qshift, true); | ||
| 642 | if (rc) { | ||
| 643 | pr_err("Failed to configure queue %d for VCPU %d: %d\n", | ||
| 644 | priority, xc->server_num, rc); | ||
| 645 | put_page(page); | ||
| 646 | return rc; | ||
| 647 | } | ||
| 648 | |||
| 649 | /* | ||
| 650 | * Only restore the queue state when needed. When doing the | ||
| 651 | * H_INT_SET_SOURCE_CONFIG hcall, it should not. | ||
| 652 | */ | ||
| 653 | if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) { | ||
| 654 | rc = xive_native_set_queue_state(xc->vp_id, priority, | ||
| 655 | kvm_eq.qtoggle, | ||
| 656 | kvm_eq.qindex); | ||
| 657 | if (rc) | ||
| 658 | goto error; | ||
| 659 | } | ||
| 660 | |||
| 661 | rc = kvmppc_xive_attach_escalation(vcpu, priority, | ||
| 662 | xive->single_escalation); | ||
| 663 | error: | ||
| 664 | if (rc) | ||
| 665 | kvmppc_xive_native_cleanup_queue(vcpu, priority); | ||
| 666 | return rc; | ||
| 667 | } | ||
| 668 | |||
| 669 | static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive, | ||
| 670 | long eq_idx, u64 addr) | ||
| 671 | { | ||
| 672 | struct kvm *kvm = xive->kvm; | ||
| 673 | struct kvm_vcpu *vcpu; | ||
| 674 | struct kvmppc_xive_vcpu *xc; | ||
| 675 | struct xive_q *q; | ||
| 676 | void __user *ubufp = (u64 __user *) addr; | ||
| 677 | u32 server; | ||
| 678 | u8 priority; | ||
| 679 | struct kvm_ppc_xive_eq kvm_eq; | ||
| 680 | u64 qaddr; | ||
| 681 | u64 qshift; | ||
| 682 | u64 qeoi_page; | ||
| 683 | u32 escalate_irq; | ||
| 684 | u64 qflags; | ||
| 685 | int rc; | ||
| 686 | |||
| 687 | /* | ||
| 688 | * Demangle priority/server tuple from the EQ identifier | ||
| 689 | */ | ||
| 690 | priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> | ||
| 691 | KVM_XIVE_EQ_PRIORITY_SHIFT; | ||
| 692 | server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> | ||
| 693 | KVM_XIVE_EQ_SERVER_SHIFT; | ||
| 694 | |||
| 695 | vcpu = kvmppc_xive_find_server(kvm, server); | ||
| 696 | if (!vcpu) { | ||
| 697 | pr_err("Can't find server %d\n", server); | ||
| 698 | return -ENOENT; | ||
| 699 | } | ||
| 700 | xc = vcpu->arch.xive_vcpu; | ||
| 701 | |||
| 702 | if (priority != xive_prio_from_guest(priority)) { | ||
| 703 | pr_err("invalid priority for queue %d for VCPU %d\n", | ||
| 704 | priority, server); | ||
| 705 | return -EINVAL; | ||
| 706 | } | ||
| 707 | q = &xc->queues[priority]; | ||
| 708 | |||
| 709 | memset(&kvm_eq, 0, sizeof(kvm_eq)); | ||
| 710 | |||
| 711 | if (!q->qpage) | ||
| 712 | return 0; | ||
| 713 | |||
| 714 | rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift, | ||
| 715 | &qeoi_page, &escalate_irq, &qflags); | ||
| 716 | if (rc) | ||
| 717 | return rc; | ||
| 718 | |||
| 719 | kvm_eq.flags = 0; | ||
| 720 | if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY) | ||
| 721 | kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY; | ||
| 722 | |||
| 723 | kvm_eq.qshift = q->guest_qshift; | ||
| 724 | kvm_eq.qaddr = q->guest_qaddr; | ||
| 725 | |||
| 726 | rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle, | ||
| 727 | &kvm_eq.qindex); | ||
| 728 | if (rc) | ||
| 729 | return rc; | ||
| 730 | |||
| 731 | pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", | ||
| 732 | __func__, server, priority, kvm_eq.flags, | ||
| 733 | kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); | ||
| 734 | |||
| 735 | if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq))) | ||
| 736 | return -EFAULT; | ||
| 737 | |||
| 738 | return 0; | ||
| 739 | } | ||
| 740 | |||
| 741 | static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb) | ||
| 742 | { | ||
| 743 | int i; | ||
| 744 | |||
| 745 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { | ||
| 746 | struct kvmppc_xive_irq_state *state = &sb->irq_state[i]; | ||
| 747 | |||
| 748 | if (!state->valid) | ||
| 749 | continue; | ||
| 750 | |||
| 751 | if (state->act_priority == MASKED) | ||
| 752 | continue; | ||
| 753 | |||
| 754 | state->eisn = 0; | ||
| 755 | state->act_server = 0; | ||
| 756 | state->act_priority = MASKED; | ||
| 757 | xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); | ||
| 758 | xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); | ||
| 759 | if (state->pt_number) { | ||
| 760 | xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01); | ||
| 761 | xive_native_configure_irq(state->pt_number, | ||
| 762 | 0, MASKED, 0); | ||
| 763 | } | ||
| 764 | } | ||
| 765 | } | ||
| 766 | |||
| 767 | static int kvmppc_xive_reset(struct kvmppc_xive *xive) | ||
| 768 | { | ||
| 769 | struct kvm *kvm = xive->kvm; | ||
| 770 | struct kvm_vcpu *vcpu; | ||
| 771 | unsigned int i; | ||
| 772 | |||
| 773 | pr_devel("%s\n", __func__); | ||
| 774 | |||
| 775 | mutex_lock(&kvm->lock); | ||
| 776 | |||
| 777 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
| 778 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | ||
| 779 | unsigned int prio; | ||
| 780 | |||
| 781 | if (!xc) | ||
| 782 | continue; | ||
| 783 | |||
| 784 | kvmppc_xive_disable_vcpu_interrupts(vcpu); | ||
| 785 | |||
| 786 | for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { | ||
| 787 | |||
| 788 | /* Single escalation, no queue 7 */ | ||
| 789 | if (prio == 7 && xive->single_escalation) | ||
| 790 | break; | ||
| 791 | |||
| 792 | if (xc->esc_virq[prio]) { | ||
| 793 | free_irq(xc->esc_virq[prio], vcpu); | ||
| 794 | irq_dispose_mapping(xc->esc_virq[prio]); | ||
| 795 | kfree(xc->esc_virq_names[prio]); | ||
| 796 | xc->esc_virq[prio] = 0; | ||
| 797 | } | ||
| 798 | |||
| 799 | kvmppc_xive_native_cleanup_queue(vcpu, prio); | ||
| 800 | } | ||
| 801 | } | ||
| 802 | |||
| 803 | for (i = 0; i <= xive->max_sbid; i++) { | ||
| 804 | struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; | ||
| 805 | |||
| 806 | if (sb) { | ||
| 807 | arch_spin_lock(&sb->lock); | ||
| 808 | kvmppc_xive_reset_sources(sb); | ||
| 809 | arch_spin_unlock(&sb->lock); | ||
| 810 | } | ||
| 811 | } | ||
| 812 | |||
| 813 | mutex_unlock(&kvm->lock); | ||
| 814 | |||
| 815 | return 0; | ||
| 816 | } | ||
| 817 | |||
| 818 | static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb) | ||
| 819 | { | ||
| 820 | int j; | ||
| 821 | |||
| 822 | for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) { | ||
| 823 | struct kvmppc_xive_irq_state *state = &sb->irq_state[j]; | ||
| 824 | struct xive_irq_data *xd; | ||
| 825 | u32 hw_num; | ||
| 826 | |||
| 827 | if (!state->valid) | ||
| 828 | continue; | ||
| 829 | |||
| 830 | /* | ||
| 831 | * The struct kvmppc_xive_irq_state reflects the state | ||
| 832 | * of the EAS configuration and not the state of the | ||
| 833 | * source. The source is masked setting the PQ bits to | ||
| 834 | * '-Q', which is what is being done before calling | ||
| 835 | * the KVM_DEV_XIVE_EQ_SYNC control. | ||
| 836 | * | ||
| 837 | * If a source EAS is configured, OPAL syncs the XIVE | ||
| 838 | * IC of the source and the XIVE IC of the previous | ||
| 839 | * target if any. | ||
| 840 | * | ||
| 841 | * So it should be fine ignoring MASKED sources as | ||
| 842 | * they have been synced already. | ||
| 843 | */ | ||
| 844 | if (state->act_priority == MASKED) | ||
| 845 | continue; | ||
| 846 | |||
| 847 | kvmppc_xive_select_irq(state, &hw_num, &xd); | ||
| 848 | xive_native_sync_source(hw_num); | ||
| 849 | xive_native_sync_queue(hw_num); | ||
| 850 | } | ||
| 851 | } | ||
| 852 | |||
| 853 | static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu) | ||
| 854 | { | ||
| 855 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | ||
| 856 | unsigned int prio; | ||
| 857 | |||
| 858 | if (!xc) | ||
| 859 | return -ENOENT; | ||
| 860 | |||
| 861 | for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { | ||
| 862 | struct xive_q *q = &xc->queues[prio]; | ||
| 863 | |||
| 864 | if (!q->qpage) | ||
| 865 | continue; | ||
| 866 | |||
| 867 | /* Mark EQ page dirty for migration */ | ||
| 868 | mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr)); | ||
| 869 | } | ||
| 870 | return 0; | ||
| 871 | } | ||
| 872 | |||
| 873 | static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive) | ||
| 874 | { | ||
| 875 | struct kvm *kvm = xive->kvm; | ||
| 876 | struct kvm_vcpu *vcpu; | ||
| 877 | unsigned int i; | ||
| 878 | |||
| 879 | pr_devel("%s\n", __func__); | ||
| 880 | |||
| 881 | mutex_lock(&kvm->lock); | ||
| 882 | for (i = 0; i <= xive->max_sbid; i++) { | ||
| 883 | struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; | ||
| 884 | |||
| 885 | if (sb) { | ||
| 886 | arch_spin_lock(&sb->lock); | ||
| 887 | kvmppc_xive_native_sync_sources(sb); | ||
| 888 | arch_spin_unlock(&sb->lock); | ||
| 889 | } | ||
| 890 | } | ||
| 891 | |||
| 892 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
| 893 | kvmppc_xive_native_vcpu_eq_sync(vcpu); | ||
| 894 | } | ||
| 895 | mutex_unlock(&kvm->lock); | ||
| 896 | |||
| 897 | return 0; | ||
| 898 | } | ||
| 899 | |||
| 900 | static int kvmppc_xive_native_set_attr(struct kvm_device *dev, | ||
| 901 | struct kvm_device_attr *attr) | ||
| 902 | { | ||
| 903 | struct kvmppc_xive *xive = dev->private; | ||
| 904 | |||
| 905 | switch (attr->group) { | ||
| 906 | case KVM_DEV_XIVE_GRP_CTRL: | ||
| 907 | switch (attr->attr) { | ||
| 908 | case KVM_DEV_XIVE_RESET: | ||
| 909 | return kvmppc_xive_reset(xive); | ||
| 910 | case KVM_DEV_XIVE_EQ_SYNC: | ||
| 911 | return kvmppc_xive_native_eq_sync(xive); | ||
| 912 | } | ||
| 913 | break; | ||
| 914 | case KVM_DEV_XIVE_GRP_SOURCE: | ||
| 915 | return kvmppc_xive_native_set_source(xive, attr->attr, | ||
| 916 | attr->addr); | ||
| 917 | case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: | ||
| 918 | return kvmppc_xive_native_set_source_config(xive, attr->attr, | ||
| 919 | attr->addr); | ||
| 920 | case KVM_DEV_XIVE_GRP_EQ_CONFIG: | ||
| 921 | return kvmppc_xive_native_set_queue_config(xive, attr->attr, | ||
| 922 | attr->addr); | ||
| 923 | case KVM_DEV_XIVE_GRP_SOURCE_SYNC: | ||
| 924 | return kvmppc_xive_native_sync_source(xive, attr->attr, | ||
| 925 | attr->addr); | ||
| 926 | } | ||
| 927 | return -ENXIO; | ||
| 928 | } | ||
| 929 | |||
| 930 | static int kvmppc_xive_native_get_attr(struct kvm_device *dev, | ||
| 931 | struct kvm_device_attr *attr) | ||
| 932 | { | ||
| 933 | struct kvmppc_xive *xive = dev->private; | ||
| 934 | |||
| 935 | switch (attr->group) { | ||
| 936 | case KVM_DEV_XIVE_GRP_EQ_CONFIG: | ||
| 937 | return kvmppc_xive_native_get_queue_config(xive, attr->attr, | ||
| 938 | attr->addr); | ||
| 939 | } | ||
| 940 | return -ENXIO; | ||
| 941 | } | ||
| 942 | |||
| 943 | static int kvmppc_xive_native_has_attr(struct kvm_device *dev, | ||
| 944 | struct kvm_device_attr *attr) | ||
| 945 | { | ||
| 946 | switch (attr->group) { | ||
| 947 | case KVM_DEV_XIVE_GRP_CTRL: | ||
| 948 | switch (attr->attr) { | ||
| 949 | case KVM_DEV_XIVE_RESET: | ||
| 950 | case KVM_DEV_XIVE_EQ_SYNC: | ||
| 951 | return 0; | ||
| 952 | } | ||
| 953 | break; | ||
| 954 | case KVM_DEV_XIVE_GRP_SOURCE: | ||
| 955 | case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: | ||
| 956 | case KVM_DEV_XIVE_GRP_SOURCE_SYNC: | ||
| 957 | if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ && | ||
| 958 | attr->attr < KVMPPC_XIVE_NR_IRQS) | ||
| 959 | return 0; | ||
| 960 | break; | ||
| 961 | case KVM_DEV_XIVE_GRP_EQ_CONFIG: | ||
| 962 | return 0; | ||
| 963 | } | ||
| 964 | return -ENXIO; | ||
| 965 | } | ||
| 966 | |||
| 967 | /* | ||
| 968 | * Called when device fd is closed | ||
| 969 | */ | ||
| 970 | static void kvmppc_xive_native_release(struct kvm_device *dev) | ||
| 971 | { | ||
| 972 | struct kvmppc_xive *xive = dev->private; | ||
| 973 | struct kvm *kvm = xive->kvm; | ||
| 974 | struct kvm_vcpu *vcpu; | ||
| 975 | int i; | ||
| 976 | int was_ready; | ||
| 977 | |||
| 978 | debugfs_remove(xive->dentry); | ||
| 979 | |||
| 980 | pr_devel("Releasing xive native device\n"); | ||
| 981 | |||
| 982 | /* | ||
| 983 | * Clearing mmu_ready temporarily while holding kvm->lock | ||
| 984 | * is a way of ensuring that no vcpus can enter the guest | ||
| 985 | * until we drop kvm->lock. Doing kick_all_cpus_sync() | ||
| 986 | * ensures that any vcpu executing inside the guest has | ||
| 987 | * exited the guest. Once kick_all_cpus_sync() has finished, | ||
| 988 | * we know that no vcpu can be executing the XIVE push or | ||
| 989 | * pull code or accessing the XIVE MMIO regions. | ||
| 990 | * | ||
| 991 | * Since this is the device release function, we know that | ||
| 992 | * userspace does not have any open fd or mmap referring to | ||
| 993 | * the device. Therefore there can not be any of the | ||
| 994 | * device attribute set/get, mmap, or page fault functions | ||
| 995 | * being executed concurrently, and similarly, the | ||
| 996 | * connect_vcpu and set/clr_mapped functions also cannot | ||
| 997 | * be being executed. | ||
| 998 | */ | ||
| 999 | was_ready = kvm->arch.mmu_ready; | ||
| 1000 | kvm->arch.mmu_ready = 0; | ||
| 1001 | kick_all_cpus_sync(); | ||
| 1002 | |||
| 1003 | /* | ||
| 1004 | * We should clean up the vCPU interrupt presenters first. | ||
| 1005 | */ | ||
| 1006 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
| 1007 | /* | ||
| 1008 | * Take vcpu->mutex to ensure that no one_reg get/set ioctl | ||
| 1009 | * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done. | ||
| 1010 | */ | ||
| 1011 | mutex_lock(&vcpu->mutex); | ||
| 1012 | kvmppc_xive_native_cleanup_vcpu(vcpu); | ||
| 1013 | mutex_unlock(&vcpu->mutex); | ||
| 1014 | } | ||
| 1015 | |||
| 1016 | kvm->arch.xive = NULL; | ||
| 1017 | |||
| 1018 | for (i = 0; i <= xive->max_sbid; i++) { | ||
| 1019 | if (xive->src_blocks[i]) | ||
| 1020 | kvmppc_xive_free_sources(xive->src_blocks[i]); | ||
| 1021 | kfree(xive->src_blocks[i]); | ||
| 1022 | xive->src_blocks[i] = NULL; | ||
| 1023 | } | ||
| 1024 | |||
| 1025 | if (xive->vp_base != XIVE_INVALID_VP) | ||
| 1026 | xive_native_free_vp_block(xive->vp_base); | ||
| 1027 | |||
| 1028 | kvm->arch.mmu_ready = was_ready; | ||
| 1029 | |||
| 1030 | /* | ||
| 1031 | * A reference of the kvmppc_xive pointer is now kept under | ||
| 1032 | * the xive_devices struct of the machine for reuse. It is | ||
| 1033 | * freed when the VM is destroyed for now until we fix all the | ||
| 1034 | * execution paths. | ||
| 1035 | */ | ||
| 1036 | |||
| 1037 | kfree(dev); | ||
| 1038 | } | ||
| 1039 | |||
| 1040 | /* | ||
| 1041 | * Create a XIVE device. kvm->lock is held. | ||
| 1042 | */ | ||
| 1043 | static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) | ||
| 1044 | { | ||
| 1045 | struct kvmppc_xive *xive; | ||
| 1046 | struct kvm *kvm = dev->kvm; | ||
| 1047 | int ret = 0; | ||
| 1048 | |||
| 1049 | pr_devel("Creating xive native device\n"); | ||
| 1050 | |||
| 1051 | if (kvm->arch.xive) | ||
| 1052 | return -EEXIST; | ||
| 1053 | |||
| 1054 | xive = kvmppc_xive_get_device(kvm, type); | ||
| 1055 | if (!xive) | ||
| 1056 | return -ENOMEM; | ||
| 1057 | |||
| 1058 | dev->private = xive; | ||
| 1059 | xive->dev = dev; | ||
| 1060 | xive->kvm = kvm; | ||
| 1061 | kvm->arch.xive = xive; | ||
| 1062 | mutex_init(&xive->mapping_lock); | ||
| 1063 | |||
| 1064 | /* | ||
| 1065 | * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for | ||
| 1066 | * a default. Getting the max number of CPUs the VM was | ||
| 1067 | * configured with would improve our usage of the XIVE VP space. | ||
| 1068 | */ | ||
| 1069 | xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS); | ||
| 1070 | pr_devel("VP_Base=%x\n", xive->vp_base); | ||
| 1071 | |||
| 1072 | if (xive->vp_base == XIVE_INVALID_VP) | ||
| 1073 | ret = -ENXIO; | ||
| 1074 | |||
| 1075 | xive->single_escalation = xive_native_has_single_escalation(); | ||
| 1076 | xive->ops = &kvmppc_xive_native_ops; | ||
| 1077 | |||
| 1078 | if (ret) | ||
| 1079 | kfree(xive); | ||
| 1080 | |||
| 1081 | return ret; | ||
| 1082 | } | ||
| 1083 | |||
| 1084 | /* | ||
| 1085 | * Interrupt Pending Buffer (IPB) offset | ||
| 1086 | */ | ||
| 1087 | #define TM_IPB_SHIFT 40 | ||
| 1088 | #define TM_IPB_MASK (((u64) 0xFF) << TM_IPB_SHIFT) | ||
| 1089 | |||
| 1090 | int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) | ||
| 1091 | { | ||
| 1092 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | ||
| 1093 | u64 opal_state; | ||
| 1094 | int rc; | ||
| 1095 | |||
| 1096 | if (!kvmppc_xive_enabled(vcpu)) | ||
| 1097 | return -EPERM; | ||
| 1098 | |||
| 1099 | if (!xc) | ||
| 1100 | return -ENOENT; | ||
| 1101 | |||
| 1102 | /* Thread context registers. We only care about IPB and CPPR */ | ||
| 1103 | val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01; | ||
| 1104 | |||
| 1105 | /* Get the VP state from OPAL */ | ||
| 1106 | rc = xive_native_get_vp_state(xc->vp_id, &opal_state); | ||
| 1107 | if (rc) | ||
| 1108 | return rc; | ||
| 1109 | |||
| 1110 | /* | ||
| 1111 | * Capture the backup of IPB register in the NVT structure and | ||
| 1112 | * merge it in our KVM VP state. | ||
| 1113 | */ | ||
| 1114 | val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK); | ||
| 1115 | |||
| 1116 | pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n", | ||
| 1117 | __func__, | ||
| 1118 | vcpu->arch.xive_saved_state.nsr, | ||
| 1119 | vcpu->arch.xive_saved_state.cppr, | ||
| 1120 | vcpu->arch.xive_saved_state.ipb, | ||
| 1121 | vcpu->arch.xive_saved_state.pipr, | ||
| 1122 | vcpu->arch.xive_saved_state.w01, | ||
| 1123 | (u32) vcpu->arch.xive_cam_word, opal_state); | ||
| 1124 | |||
| 1125 | return 0; | ||
| 1126 | } | ||
| 1127 | |||
| 1128 | int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) | ||
| 1129 | { | ||
| 1130 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | ||
| 1131 | struct kvmppc_xive *xive = vcpu->kvm->arch.xive; | ||
| 1132 | |||
| 1133 | pr_devel("%s w01=%016llx vp=%016llx\n", __func__, | ||
| 1134 | val->xive_timaval[0], val->xive_timaval[1]); | ||
| 1135 | |||
| 1136 | if (!kvmppc_xive_enabled(vcpu)) | ||
| 1137 | return -EPERM; | ||
| 1138 | |||
| 1139 | if (!xc || !xive) | ||
| 1140 | return -ENOENT; | ||
| 1141 | |||
| 1142 | /* We can't update the state of a "pushed" VCPU */ | ||
| 1143 | if (WARN_ON(vcpu->arch.xive_pushed)) | ||
| 1144 | return -EBUSY; | ||
| 1145 | |||
| 1146 | /* | ||
| 1147 | * Restore the thread context registers. IPB and CPPR should | ||
| 1148 | * be the only ones that matter. | ||
| 1149 | */ | ||
| 1150 | vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0]; | ||
| 1151 | |||
| 1152 | /* | ||
| 1153 | * There is no need to restore the XIVE internal state (IPB | ||
| 1154 | * stored in the NVT) as the IPB register was merged in KVM VP | ||
| 1155 | * state when captured. | ||
| 1156 | */ | ||
| 1157 | return 0; | ||
| 1158 | } | ||
| 1159 | |||
| 1160 | static int xive_native_debug_show(struct seq_file *m, void *private) | ||
| 1161 | { | ||
| 1162 | struct kvmppc_xive *xive = m->private; | ||
| 1163 | struct kvm *kvm = xive->kvm; | ||
| 1164 | struct kvm_vcpu *vcpu; | ||
| 1165 | unsigned int i; | ||
| 1166 | |||
| 1167 | if (!kvm) | ||
| 1168 | return 0; | ||
| 1169 | |||
| 1170 | seq_puts(m, "=========\nVCPU state\n=========\n"); | ||
| 1171 | |||
| 1172 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
| 1173 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | ||
| 1174 | |||
| 1175 | if (!xc) | ||
| 1176 | continue; | ||
| 1177 | |||
| 1178 | seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", | ||
| 1179 | xc->server_num, | ||
| 1180 | vcpu->arch.xive_saved_state.nsr, | ||
| 1181 | vcpu->arch.xive_saved_state.cppr, | ||
| 1182 | vcpu->arch.xive_saved_state.ipb, | ||
| 1183 | vcpu->arch.xive_saved_state.pipr, | ||
| 1184 | vcpu->arch.xive_saved_state.w01, | ||
| 1185 | (u32) vcpu->arch.xive_cam_word); | ||
| 1186 | |||
| 1187 | kvmppc_xive_debug_show_queues(m, vcpu); | ||
| 1188 | } | ||
| 1189 | |||
| 1190 | return 0; | ||
| 1191 | } | ||
| 1192 | |||
| 1193 | static int xive_native_debug_open(struct inode *inode, struct file *file) | ||
| 1194 | { | ||
| 1195 | return single_open(file, xive_native_debug_show, inode->i_private); | ||
| 1196 | } | ||
| 1197 | |||
| 1198 | static const struct file_operations xive_native_debug_fops = { | ||
| 1199 | .open = xive_native_debug_open, | ||
| 1200 | .read = seq_read, | ||
| 1201 | .llseek = seq_lseek, | ||
| 1202 | .release = single_release, | ||
| 1203 | }; | ||
| 1204 | |||
| 1205 | static void xive_native_debugfs_init(struct kvmppc_xive *xive) | ||
| 1206 | { | ||
| 1207 | char *name; | ||
| 1208 | |||
| 1209 | name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive); | ||
| 1210 | if (!name) { | ||
| 1211 | pr_err("%s: no memory for name\n", __func__); | ||
| 1212 | return; | ||
| 1213 | } | ||
| 1214 | |||
| 1215 | xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root, | ||
| 1216 | xive, &xive_native_debug_fops); | ||
| 1217 | |||
| 1218 | pr_debug("%s: created %s\n", __func__, name); | ||
| 1219 | kfree(name); | ||
| 1220 | } | ||
| 1221 | |||
| 1222 | static void kvmppc_xive_native_init(struct kvm_device *dev) | ||
| 1223 | { | ||
| 1224 | struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private; | ||
| 1225 | |||
| 1226 | /* Register some debug interfaces */ | ||
| 1227 | xive_native_debugfs_init(xive); | ||
| 1228 | } | ||
| 1229 | |||
| 1230 | struct kvm_device_ops kvm_xive_native_ops = { | ||
| 1231 | .name = "kvm-xive-native", | ||
| 1232 | .create = kvmppc_xive_native_create, | ||
| 1233 | .init = kvmppc_xive_native_init, | ||
| 1234 | .release = kvmppc_xive_native_release, | ||
| 1235 | .set_attr = kvmppc_xive_native_set_attr, | ||
| 1236 | .get_attr = kvmppc_xive_native_get_attr, | ||
| 1237 | .has_attr = kvmppc_xive_native_has_attr, | ||
| 1238 | .mmap = kvmppc_xive_native_mmap, | ||
| 1239 | }; | ||
| 1240 | |||
| 1241 | void kvmppc_xive_native_init_module(void) | ||
| 1242 | { | ||
| 1243 | ; | ||
| 1244 | } | ||
| 1245 | |||
| 1246 | void kvmppc_xive_native_exit_module(void) | ||
| 1247 | { | ||
| 1248 | ; | ||
| 1249 | } | ||
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index 033363d6e764..0737acfd17f1 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c | |||
| @@ -130,24 +130,14 @@ static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc, | |||
| 130 | */ | 130 | */ |
| 131 | prio = ffs(pending) - 1; | 131 | prio = ffs(pending) - 1; |
| 132 | 132 | ||
| 133 | /* | ||
| 134 | * If the most favoured prio we found pending is less | ||
| 135 | * favored (or equal) than a pending IPI, we return | ||
| 136 | * the IPI instead. | ||
| 137 | * | ||
| 138 | * Note: If pending was 0 and mfrr is 0xff, we will | ||
| 139 | * not spurriously take an IPI because mfrr cannot | ||
| 140 | * then be smaller than cppr. | ||
| 141 | */ | ||
| 142 | if (prio >= xc->mfrr && xc->mfrr < xc->cppr) { | ||
| 143 | prio = xc->mfrr; | ||
| 144 | hirq = XICS_IPI; | ||
| 145 | break; | ||
| 146 | } | ||
| 147 | |||
| 148 | /* Don't scan past the guest cppr */ | 133 | /* Don't scan past the guest cppr */ |
| 149 | if (prio >= xc->cppr || prio > 7) | 134 | if (prio >= xc->cppr || prio > 7) { |
| 135 | if (xc->mfrr < xc->cppr) { | ||
| 136 | prio = xc->mfrr; | ||
| 137 | hirq = XICS_IPI; | ||
| 138 | } | ||
| 150 | break; | 139 | break; |
| 140 | } | ||
| 151 | 141 | ||
| 152 | /* Grab queue and pointers */ | 142 | /* Grab queue and pointers */ |
| 153 | q = &xc->queues[prio]; | 143 | q = &xc->queues[prio]; |
| @@ -184,9 +174,12 @@ skip_ipi: | |||
| 184 | * been set and another occurrence of the IPI will trigger. | 174 | * been set and another occurrence of the IPI will trigger. |
| 185 | */ | 175 | */ |
| 186 | if (hirq == XICS_IPI || (prio == 0 && !qpage)) { | 176 | if (hirq == XICS_IPI || (prio == 0 && !qpage)) { |
| 187 | if (scan_type == scan_fetch) | 177 | if (scan_type == scan_fetch) { |
| 188 | GLUE(X_PFX,source_eoi)(xc->vp_ipi, | 178 | GLUE(X_PFX,source_eoi)(xc->vp_ipi, |
| 189 | &xc->vp_ipi_data); | 179 | &xc->vp_ipi_data); |
| 180 | q->idx = idx; | ||
| 181 | q->toggle = toggle; | ||
| 182 | } | ||
| 190 | /* Loop back on same queue with updated idx/toggle */ | 183 | /* Loop back on same queue with updated idx/toggle */ |
| 191 | #ifdef XIVE_RUNTIME_CHECKS | 184 | #ifdef XIVE_RUNTIME_CHECKS |
| 192 | WARN_ON(hirq && hirq != XICS_IPI); | 185 | WARN_ON(hirq && hirq != XICS_IPI); |
| @@ -199,32 +192,41 @@ skip_ipi: | |||
| 199 | if (hirq == XICS_DUMMY) | 192 | if (hirq == XICS_DUMMY) |
| 200 | goto skip_ipi; | 193 | goto skip_ipi; |
| 201 | 194 | ||
| 202 | /* If fetching, update queue pointers */ | 195 | /* Clear the pending bit if the queue is now empty */ |
| 203 | if (scan_type == scan_fetch) { | 196 | if (!hirq) { |
| 204 | q->idx = idx; | 197 | pending &= ~(1 << prio); |
| 205 | q->toggle = toggle; | ||
| 206 | } | ||
| 207 | |||
| 208 | /* Something found, stop searching */ | ||
| 209 | if (hirq) | ||
| 210 | break; | ||
| 211 | |||
| 212 | /* Clear the pending bit on the now empty queue */ | ||
| 213 | pending &= ~(1 << prio); | ||
| 214 | 198 | ||
| 215 | /* | 199 | /* |
| 216 | * Check if the queue count needs adjusting due to | 200 | * Check if the queue count needs adjusting due to |
| 217 | * interrupts being moved away. | 201 | * interrupts being moved away. |
| 218 | */ | 202 | */ |
| 219 | if (atomic_read(&q->pending_count)) { | 203 | if (atomic_read(&q->pending_count)) { |
| 220 | int p = atomic_xchg(&q->pending_count, 0); | 204 | int p = atomic_xchg(&q->pending_count, 0); |
| 221 | if (p) { | 205 | if (p) { |
| 222 | #ifdef XIVE_RUNTIME_CHECKS | 206 | #ifdef XIVE_RUNTIME_CHECKS |
| 223 | WARN_ON(p > atomic_read(&q->count)); | 207 | WARN_ON(p > atomic_read(&q->count)); |
| 224 | #endif | 208 | #endif |
| 225 | atomic_sub(p, &q->count); | 209 | atomic_sub(p, &q->count); |
| 210 | } | ||
| 226 | } | 211 | } |
| 227 | } | 212 | } |
| 213 | |||
| 214 | /* | ||
| 215 | * If the most favoured prio we found pending is less | ||
| 216 | * favored (or equal) than a pending IPI, we return | ||
| 217 | * the IPI instead. | ||
| 218 | */ | ||
| 219 | if (prio >= xc->mfrr && xc->mfrr < xc->cppr) { | ||
| 220 | prio = xc->mfrr; | ||
| 221 | hirq = XICS_IPI; | ||
| 222 | break; | ||
| 223 | } | ||
| 224 | |||
| 225 | /* If fetching, update queue pointers */ | ||
| 226 | if (scan_type == scan_fetch) { | ||
| 227 | q->idx = idx; | ||
| 228 | q->toggle = toggle; | ||
| 229 | } | ||
| 228 | } | 230 | } |
| 229 | 231 | ||
| 230 | /* If we are just taking a "peek", do nothing else */ | 232 | /* If we are just taking a "peek", do nothing else */ |
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 92910b7c5bcc..3393b166817a 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
| @@ -570,6 +570,16 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
| 570 | case KVM_CAP_PPC_GET_CPU_CHAR: | 570 | case KVM_CAP_PPC_GET_CPU_CHAR: |
| 571 | r = 1; | 571 | r = 1; |
| 572 | break; | 572 | break; |
| 573 | #ifdef CONFIG_KVM_XIVE | ||
| 574 | case KVM_CAP_PPC_IRQ_XIVE: | ||
| 575 | /* | ||
| 576 | * We need XIVE to be enabled on the platform (implies | ||
| 577 | * a POWER9 processor) and the PowerNV platform, as | ||
| 578 | * nested is not yet supported. | ||
| 579 | */ | ||
| 580 | r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE); | ||
| 581 | break; | ||
| 582 | #endif | ||
| 573 | 583 | ||
| 574 | case KVM_CAP_PPC_ALLOC_HTAB: | 584 | case KVM_CAP_PPC_ALLOC_HTAB: |
| 575 | r = hv_enabled; | 585 | r = hv_enabled; |
| @@ -750,6 +760,9 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | |||
| 750 | else | 760 | else |
| 751 | kvmppc_xics_free_icp(vcpu); | 761 | kvmppc_xics_free_icp(vcpu); |
| 752 | break; | 762 | break; |
| 763 | case KVMPPC_IRQ_XIVE: | ||
| 764 | kvmppc_xive_native_cleanup_vcpu(vcpu); | ||
| 765 | break; | ||
| 753 | } | 766 | } |
| 754 | 767 | ||
| 755 | kvmppc_core_vcpu_free(vcpu); | 768 | kvmppc_core_vcpu_free(vcpu); |
| @@ -1938,6 +1951,30 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, | |||
| 1938 | break; | 1951 | break; |
| 1939 | } | 1952 | } |
| 1940 | #endif /* CONFIG_KVM_XICS */ | 1953 | #endif /* CONFIG_KVM_XICS */ |
| 1954 | #ifdef CONFIG_KVM_XIVE | ||
| 1955 | case KVM_CAP_PPC_IRQ_XIVE: { | ||
| 1956 | struct fd f; | ||
| 1957 | struct kvm_device *dev; | ||
| 1958 | |||
| 1959 | r = -EBADF; | ||
| 1960 | f = fdget(cap->args[0]); | ||
| 1961 | if (!f.file) | ||
| 1962 | break; | ||
| 1963 | |||
| 1964 | r = -ENXIO; | ||
| 1965 | if (!xive_enabled()) | ||
| 1966 | break; | ||
| 1967 | |||
| 1968 | r = -EPERM; | ||
| 1969 | dev = kvm_device_from_filp(f.file); | ||
| 1970 | if (dev) | ||
| 1971 | r = kvmppc_xive_native_connect_vcpu(dev, vcpu, | ||
| 1972 | cap->args[1]); | ||
| 1973 | |||
| 1974 | fdput(f); | ||
| 1975 | break; | ||
| 1976 | } | ||
| 1977 | #endif /* CONFIG_KVM_XIVE */ | ||
| 1941 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | 1978 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE |
| 1942 | case KVM_CAP_PPC_FWNMI: | 1979 | case KVM_CAP_PPC_FWNMI: |
| 1943 | r = -EINVAL; | 1980 | r = -EINVAL; |
diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c index daad8c45c8e7..7472244e7f30 100644 --- a/arch/powerpc/platforms/powernv/opal-call.c +++ b/arch/powerpc/platforms/powernv/opal-call.c | |||
| @@ -260,6 +260,9 @@ OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO); | |||
| 260 | OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO); | 260 | OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO); |
| 261 | OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC); | 261 | OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC); |
| 262 | OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP); | 262 | OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP); |
| 263 | OPAL_CALL(opal_xive_get_queue_state, OPAL_XIVE_GET_QUEUE_STATE); | ||
| 264 | OPAL_CALL(opal_xive_set_queue_state, OPAL_XIVE_SET_QUEUE_STATE); | ||
| 265 | OPAL_CALL(opal_xive_get_vp_state, OPAL_XIVE_GET_VP_STATE); | ||
| 263 | OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET); | 266 | OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET); |
| 264 | OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT); | 267 | OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT); |
| 265 | OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT); | 268 | OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT); |
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 1ca127d052a6..7782201e5fe8 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c | |||
| @@ -437,6 +437,12 @@ void xive_native_sync_source(u32 hw_irq) | |||
| 437 | } | 437 | } |
| 438 | EXPORT_SYMBOL_GPL(xive_native_sync_source); | 438 | EXPORT_SYMBOL_GPL(xive_native_sync_source); |
| 439 | 439 | ||
| 440 | void xive_native_sync_queue(u32 hw_irq) | ||
| 441 | { | ||
| 442 | opal_xive_sync(XIVE_SYNC_QUEUE, hw_irq); | ||
| 443 | } | ||
| 444 | EXPORT_SYMBOL_GPL(xive_native_sync_queue); | ||
| 445 | |||
| 440 | static const struct xive_ops xive_native_ops = { | 446 | static const struct xive_ops xive_native_ops = { |
| 441 | .populate_irq_data = xive_native_populate_irq_data, | 447 | .populate_irq_data = xive_native_populate_irq_data, |
| 442 | .configure_irq = xive_native_configure_irq, | 448 | .configure_irq = xive_native_configure_irq, |
| @@ -515,6 +521,9 @@ u32 xive_native_default_eq_shift(void) | |||
| 515 | } | 521 | } |
| 516 | EXPORT_SYMBOL_GPL(xive_native_default_eq_shift); | 522 | EXPORT_SYMBOL_GPL(xive_native_default_eq_shift); |
| 517 | 523 | ||
| 524 | unsigned long xive_tima_os; | ||
| 525 | EXPORT_SYMBOL_GPL(xive_tima_os); | ||
| 526 | |||
| 518 | bool __init xive_native_init(void) | 527 | bool __init xive_native_init(void) |
| 519 | { | 528 | { |
| 520 | struct device_node *np; | 529 | struct device_node *np; |
| @@ -567,6 +576,14 @@ bool __init xive_native_init(void) | |||
| 567 | for_each_possible_cpu(cpu) | 576 | for_each_possible_cpu(cpu) |
| 568 | kvmppc_set_xive_tima(cpu, r.start, tima); | 577 | kvmppc_set_xive_tima(cpu, r.start, tima); |
| 569 | 578 | ||
| 579 | /* Resource 2 is OS window */ | ||
| 580 | if (of_address_to_resource(np, 2, &r)) { | ||
| 581 | pr_err("Failed to get thread mgmnt area resource\n"); | ||
| 582 | return false; | ||
| 583 | } | ||
| 584 | |||
| 585 | xive_tima_os = r.start; | ||
| 586 | |||
| 570 | /* Grab size of provisionning pages */ | 587 | /* Grab size of provisionning pages */ |
| 571 | xive_parse_provisioning(np); | 588 | xive_parse_provisioning(np); |
| 572 | 589 | ||
| @@ -711,3 +728,96 @@ bool xive_native_has_single_escalation(void) | |||
| 711 | return xive_has_single_esc; | 728 | return xive_has_single_esc; |
| 712 | } | 729 | } |
| 713 | EXPORT_SYMBOL_GPL(xive_native_has_single_escalation); | 730 | EXPORT_SYMBOL_GPL(xive_native_has_single_escalation); |
| 731 | |||
| 732 | int xive_native_get_queue_info(u32 vp_id, u32 prio, | ||
| 733 | u64 *out_qpage, | ||
| 734 | u64 *out_qsize, | ||
| 735 | u64 *out_qeoi_page, | ||
| 736 | u32 *out_escalate_irq, | ||
| 737 | u64 *out_qflags) | ||
| 738 | { | ||
| 739 | __be64 qpage; | ||
| 740 | __be64 qsize; | ||
| 741 | __be64 qeoi_page; | ||
| 742 | __be32 escalate_irq; | ||
| 743 | __be64 qflags; | ||
| 744 | s64 rc; | ||
| 745 | |||
| 746 | rc = opal_xive_get_queue_info(vp_id, prio, &qpage, &qsize, | ||
| 747 | &qeoi_page, &escalate_irq, &qflags); | ||
| 748 | if (rc) { | ||
| 749 | pr_err("OPAL failed to get queue info for VCPU %d/%d : %lld\n", | ||
| 750 | vp_id, prio, rc); | ||
| 751 | return -EIO; | ||
| 752 | } | ||
| 753 | |||
| 754 | if (out_qpage) | ||
| 755 | *out_qpage = be64_to_cpu(qpage); | ||
| 756 | if (out_qsize) | ||
| 757 | *out_qsize = be32_to_cpu(qsize); | ||
| 758 | if (out_qeoi_page) | ||
| 759 | *out_qeoi_page = be64_to_cpu(qeoi_page); | ||
| 760 | if (out_escalate_irq) | ||
| 761 | *out_escalate_irq = be32_to_cpu(escalate_irq); | ||
| 762 | if (out_qflags) | ||
| 763 | *out_qflags = be64_to_cpu(qflags); | ||
| 764 | |||
| 765 | return 0; | ||
| 766 | } | ||
| 767 | EXPORT_SYMBOL_GPL(xive_native_get_queue_info); | ||
| 768 | |||
| 769 | int xive_native_get_queue_state(u32 vp_id, u32 prio, u32 *qtoggle, u32 *qindex) | ||
| 770 | { | ||
| 771 | __be32 opal_qtoggle; | ||
| 772 | __be32 opal_qindex; | ||
| 773 | s64 rc; | ||
| 774 | |||
| 775 | rc = opal_xive_get_queue_state(vp_id, prio, &opal_qtoggle, | ||
| 776 | &opal_qindex); | ||
| 777 | if (rc) { | ||
| 778 | pr_err("OPAL failed to get queue state for VCPU %d/%d : %lld\n", | ||
| 779 | vp_id, prio, rc); | ||
| 780 | return -EIO; | ||
| 781 | } | ||
| 782 | |||
| 783 | if (qtoggle) | ||
| 784 | *qtoggle = be32_to_cpu(opal_qtoggle); | ||
| 785 | if (qindex) | ||
| 786 | *qindex = be32_to_cpu(opal_qindex); | ||
| 787 | |||
| 788 | return 0; | ||
| 789 | } | ||
| 790 | EXPORT_SYMBOL_GPL(xive_native_get_queue_state); | ||
| 791 | |||
| 792 | int xive_native_set_queue_state(u32 vp_id, u32 prio, u32 qtoggle, u32 qindex) | ||
| 793 | { | ||
| 794 | s64 rc; | ||
| 795 | |||
| 796 | rc = opal_xive_set_queue_state(vp_id, prio, qtoggle, qindex); | ||
| 797 | if (rc) { | ||
| 798 | pr_err("OPAL failed to set queue state for VCPU %d/%d : %lld\n", | ||
| 799 | vp_id, prio, rc); | ||
| 800 | return -EIO; | ||
| 801 | } | ||
| 802 | |||
| 803 | return 0; | ||
| 804 | } | ||
| 805 | EXPORT_SYMBOL_GPL(xive_native_set_queue_state); | ||
| 806 | |||
| 807 | int xive_native_get_vp_state(u32 vp_id, u64 *out_state) | ||
| 808 | { | ||
| 809 | __be64 state; | ||
| 810 | s64 rc; | ||
| 811 | |||
| 812 | rc = opal_xive_get_vp_state(vp_id, &state); | ||
| 813 | if (rc) { | ||
| 814 | pr_err("OPAL failed to get vp state for VCPU %d : %lld\n", | ||
| 815 | vp_id, rc); | ||
| 816 | return -EIO; | ||
| 817 | } | ||
| 818 | |||
| 819 | if (out_state) | ||
| 820 | *out_state = be64_to_cpu(state); | ||
| 821 | return 0; | ||
| 822 | } | ||
| 823 | EXPORT_SYMBOL_GPL(xive_native_get_vp_state); | ||
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6f665c16e31d..79fa4426509c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
| @@ -1270,11 +1270,21 @@ struct kvm_device_ops { | |||
| 1270 | */ | 1270 | */ |
| 1271 | void (*destroy)(struct kvm_device *dev); | 1271 | void (*destroy)(struct kvm_device *dev); |
| 1272 | 1272 | ||
| 1273 | /* | ||
| 1274 | * Release is an alternative method to free the device. It is | ||
| 1275 | * called when the device file descriptor is closed. Once | ||
| 1276 | * release is called, the destroy method will not be called | ||
| 1277 | * anymore as the device is removed from the device list of | ||
| 1278 | * the VM. kvm->lock is held. | ||
| 1279 | */ | ||
| 1280 | void (*release)(struct kvm_device *dev); | ||
| 1281 | |||
| 1273 | int (*set_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); | 1282 | int (*set_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); |
| 1274 | int (*get_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); | 1283 | int (*get_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); |
| 1275 | int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); | 1284 | int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); |
| 1276 | long (*ioctl)(struct kvm_device *dev, unsigned int ioctl, | 1285 | long (*ioctl)(struct kvm_device *dev, unsigned int ioctl, |
| 1277 | unsigned long arg); | 1286 | unsigned long arg); |
| 1287 | int (*mmap)(struct kvm_device *dev, struct vm_area_struct *vma); | ||
| 1278 | }; | 1288 | }; |
| 1279 | 1289 | ||
| 1280 | void kvm_device_get(struct kvm_device *dev); | 1290 | void kvm_device_get(struct kvm_device *dev); |
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d673734c46cb..d727adf07801 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h | |||
| @@ -989,6 +989,7 @@ struct kvm_ppc_resize_hpt { | |||
| 989 | #define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 /* Obsolete */ | 989 | #define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 /* Obsolete */ |
| 990 | #define KVM_CAP_HYPERV_CPUID 167 | 990 | #define KVM_CAP_HYPERV_CPUID 167 |
| 991 | #define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 168 | 991 | #define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 168 |
| 992 | #define KVM_CAP_PPC_IRQ_XIVE 169 | ||
| 992 | 993 | ||
| 993 | #ifdef KVM_CAP_IRQ_ROUTING | 994 | #ifdef KVM_CAP_IRQ_ROUTING |
| 994 | 995 | ||
| @@ -1212,6 +1213,8 @@ enum kvm_device_type { | |||
| 1212 | #define KVM_DEV_TYPE_ARM_VGIC_V3 KVM_DEV_TYPE_ARM_VGIC_V3 | 1213 | #define KVM_DEV_TYPE_ARM_VGIC_V3 KVM_DEV_TYPE_ARM_VGIC_V3 |
| 1213 | KVM_DEV_TYPE_ARM_VGIC_ITS, | 1214 | KVM_DEV_TYPE_ARM_VGIC_ITS, |
| 1214 | #define KVM_DEV_TYPE_ARM_VGIC_ITS KVM_DEV_TYPE_ARM_VGIC_ITS | 1215 | #define KVM_DEV_TYPE_ARM_VGIC_ITS KVM_DEV_TYPE_ARM_VGIC_ITS |
| 1216 | KVM_DEV_TYPE_XIVE, | ||
| 1217 | #define KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_XIVE | ||
| 1215 | KVM_DEV_TYPE_MAX, | 1218 | KVM_DEV_TYPE_MAX, |
| 1216 | }; | 1219 | }; |
| 1217 | 1220 | ||
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f4e02cd8fa43..d22b1f4bfa56 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
| @@ -2950,6 +2950,16 @@ out: | |||
| 2950 | } | 2950 | } |
| 2951 | #endif | 2951 | #endif |
| 2952 | 2952 | ||
| 2953 | static int kvm_device_mmap(struct file *filp, struct vm_area_struct *vma) | ||
| 2954 | { | ||
| 2955 | struct kvm_device *dev = filp->private_data; | ||
| 2956 | |||
| 2957 | if (dev->ops->mmap) | ||
| 2958 | return dev->ops->mmap(dev, vma); | ||
| 2959 | |||
| 2960 | return -ENODEV; | ||
| 2961 | } | ||
| 2962 | |||
| 2953 | static int kvm_device_ioctl_attr(struct kvm_device *dev, | 2963 | static int kvm_device_ioctl_attr(struct kvm_device *dev, |
| 2954 | int (*accessor)(struct kvm_device *dev, | 2964 | int (*accessor)(struct kvm_device *dev, |
| 2955 | struct kvm_device_attr *attr), | 2965 | struct kvm_device_attr *attr), |
| @@ -2994,6 +3004,13 @@ static int kvm_device_release(struct inode *inode, struct file *filp) | |||
| 2994 | struct kvm_device *dev = filp->private_data; | 3004 | struct kvm_device *dev = filp->private_data; |
| 2995 | struct kvm *kvm = dev->kvm; | 3005 | struct kvm *kvm = dev->kvm; |
| 2996 | 3006 | ||
| 3007 | if (dev->ops->release) { | ||
| 3008 | mutex_lock(&kvm->lock); | ||
| 3009 | list_del(&dev->vm_node); | ||
| 3010 | dev->ops->release(dev); | ||
| 3011 | mutex_unlock(&kvm->lock); | ||
| 3012 | } | ||
| 3013 | |||
| 2997 | kvm_put_kvm(kvm); | 3014 | kvm_put_kvm(kvm); |
| 2998 | return 0; | 3015 | return 0; |
| 2999 | } | 3016 | } |
| @@ -3002,6 +3019,7 @@ static const struct file_operations kvm_device_fops = { | |||
| 3002 | .unlocked_ioctl = kvm_device_ioctl, | 3019 | .unlocked_ioctl = kvm_device_ioctl, |
| 3003 | .release = kvm_device_release, | 3020 | .release = kvm_device_release, |
| 3004 | KVM_COMPAT(kvm_device_ioctl), | 3021 | KVM_COMPAT(kvm_device_ioctl), |
| 3022 | .mmap = kvm_device_mmap, | ||
| 3005 | }; | 3023 | }; |
| 3006 | 3024 | ||
| 3007 | struct kvm_device *kvm_device_from_filp(struct file *filp) | 3025 | struct kvm_device *kvm_device_from_filp(struct file *filp) |
