diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-22 21:22:53 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-22 21:22:53 -0500 |
commit | fd7e9a88348472521d999434ee02f25735c7dadf (patch) | |
tree | 90e6249e58d90ba9d590cfed4481c29ca36a05dc | |
parent | 5066e4a34081dd82fb625f2f382bfa29ca421a3f (diff) | |
parent | dd0fd8bca1850ddadf5d33a9ed28f3707cd98ac7 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
"4.11 is going to be a relatively large release for KVM, with a little
over 200 commits and noteworthy changes for most architectures.
ARM:
- GICv3 save/restore
- cache flushing fixes
- working MSI injection for GICv3 ITS
- physical timer emulation
MIPS:
- various improvements under the hood
- support for SMP guests
- a large rewrite of MMU emulation. KVM MIPS can now use MMU
notifiers to support copy-on-write, KSM, idle page tracking,
swapping, ballooning and everything else. KVM_CAP_READONLY_MEM is
also supported, so that writes to some memory regions can be
treated as MMIO. The new MMU also paves the way for hardware
virtualization support.
PPC:
- support for POWER9 using the radix-tree MMU for host and guest
- resizable hashed page table
- bugfixes.
s390:
- expose more features to the guest
- more SIMD extensions
- instruction execution protection
- ESOP2
x86:
- improved hashing in the MMU
- faster PageLRU tracking for Intel CPUs without EPT A/D bits
- some refactoring of nested VMX entry/exit code, preparing for live
migration support of nested hypervisors
- expose yet another AVX512 CPUID bit
- host-to-guest PTP support
- refactoring of interrupt injection, with some optimizations thrown
in and some duct tape removed.
- remove lazy FPU handling
- optimizations of user-mode exits
- optimizations of vcpu_is_preempted() for KVM guests
generic:
- alternative signaling mechanism that doesn't pound on
tsk->sighand->siglock"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (195 commits)
x86/kvm: Provide optimized version of vcpu_is_preempted() for x86-64
x86/paravirt: Change vcp_is_preempted() arg type to long
KVM: VMX: use correct vmcs_read/write for guest segment selector/base
x86/kvm/vmx: Defer TR reload after VM exit
x86/asm/64: Drop __cacheline_aligned from struct x86_hw_tss
x86/kvm/vmx: Simplify segment_base()
x86/kvm/vmx: Get rid of segment_base() on 64-bit kernels
x86/kvm/vmx: Don't fetch the TSS base from the GDT
x86/asm: Define the kernel TSS limit in a macro
kvm: fix page struct leak in handle_vmon
KVM: PPC: Book3S HV: Disable HPT resizing on POWER9 for now
KVM: Return an error code only as a constant in kvm_get_dirty_log()
KVM: Return an error code only as a constant in kvm_get_dirty_log_protect()
KVM: Return directly after a failed copy_from_user() in kvm_vm_compat_ioctl()
KVM: x86: remove code for lazy FPU handling
KVM: race-free exit from KVM_RUN without POSIX signals
KVM: PPC: Book3S HV: Turn "KVM guest htab" message into a debug message
KVM: PPC: Book3S PR: Ratelimit copy data failure error messages
KVM: Support vCPU-based gfn->hva cache
KVM: use separate generations for each address space
...
110 files changed, 7277 insertions, 2968 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 4470671b0c26..069450938b79 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
@@ -2061,6 +2061,8 @@ registers, find a list below: | |||
2061 | MIPS | KVM_REG_MIPS_LO | 64 | 2061 | MIPS | KVM_REG_MIPS_LO | 64 |
2062 | MIPS | KVM_REG_MIPS_PC | 64 | 2062 | MIPS | KVM_REG_MIPS_PC | 64 |
2063 | MIPS | KVM_REG_MIPS_CP0_INDEX | 32 | 2063 | MIPS | KVM_REG_MIPS_CP0_INDEX | 32 |
2064 | MIPS | KVM_REG_MIPS_CP0_ENTRYLO0 | 64 | ||
2065 | MIPS | KVM_REG_MIPS_CP0_ENTRYLO1 | 64 | ||
2064 | MIPS | KVM_REG_MIPS_CP0_CONTEXT | 64 | 2066 | MIPS | KVM_REG_MIPS_CP0_CONTEXT | 64 |
2065 | MIPS | KVM_REG_MIPS_CP0_USERLOCAL | 64 | 2067 | MIPS | KVM_REG_MIPS_CP0_USERLOCAL | 64 |
2066 | MIPS | KVM_REG_MIPS_CP0_PAGEMASK | 32 | 2068 | MIPS | KVM_REG_MIPS_CP0_PAGEMASK | 32 |
@@ -2071,9 +2073,11 @@ registers, find a list below: | |||
2071 | MIPS | KVM_REG_MIPS_CP0_ENTRYHI | 64 | 2073 | MIPS | KVM_REG_MIPS_CP0_ENTRYHI | 64 |
2072 | MIPS | KVM_REG_MIPS_CP0_COMPARE | 32 | 2074 | MIPS | KVM_REG_MIPS_CP0_COMPARE | 32 |
2073 | MIPS | KVM_REG_MIPS_CP0_STATUS | 32 | 2075 | MIPS | KVM_REG_MIPS_CP0_STATUS | 32 |
2076 | MIPS | KVM_REG_MIPS_CP0_INTCTL | 32 | ||
2074 | MIPS | KVM_REG_MIPS_CP0_CAUSE | 32 | 2077 | MIPS | KVM_REG_MIPS_CP0_CAUSE | 32 |
2075 | MIPS | KVM_REG_MIPS_CP0_EPC | 64 | 2078 | MIPS | KVM_REG_MIPS_CP0_EPC | 64 |
2076 | MIPS | KVM_REG_MIPS_CP0_PRID | 32 | 2079 | MIPS | KVM_REG_MIPS_CP0_PRID | 32 |
2080 | MIPS | KVM_REG_MIPS_CP0_EBASE | 64 | ||
2077 | MIPS | KVM_REG_MIPS_CP0_CONFIG | 32 | 2081 | MIPS | KVM_REG_MIPS_CP0_CONFIG | 32 |
2078 | MIPS | KVM_REG_MIPS_CP0_CONFIG1 | 32 | 2082 | MIPS | KVM_REG_MIPS_CP0_CONFIG1 | 32 |
2079 | MIPS | KVM_REG_MIPS_CP0_CONFIG2 | 32 | 2083 | MIPS | KVM_REG_MIPS_CP0_CONFIG2 | 32 |
@@ -2148,6 +2152,12 @@ patterns depending on whether they're 32-bit or 64-bit registers: | |||
2148 | 0x7020 0000 0001 00 <reg:5> <sel:3> (32-bit) | 2152 | 0x7020 0000 0001 00 <reg:5> <sel:3> (32-bit) |
2149 | 0x7030 0000 0001 00 <reg:5> <sel:3> (64-bit) | 2153 | 0x7030 0000 0001 00 <reg:5> <sel:3> (64-bit) |
2150 | 2154 | ||
2155 | Note: KVM_REG_MIPS_CP0_ENTRYLO0 and KVM_REG_MIPS_CP0_ENTRYLO1 are the MIPS64 | ||
2156 | versions of the EntryLo registers regardless of the word size of the host | ||
2157 | hardware, host kernel, guest, and whether XPA is present in the guest, i.e. | ||
2158 | with the RI and XI bits (if they exist) in bits 63 and 62 respectively, and | ||
2159 | the PFNX field starting at bit 30. | ||
2160 | |||
2151 | MIPS KVM control registers (see above) have the following id bit patterns: | 2161 | MIPS KVM control registers (see above) have the following id bit patterns: |
2152 | 0x7030 0000 0002 <reg:16> | 2162 | 0x7030 0000 0002 <reg:16> |
2153 | 2163 | ||
@@ -2443,18 +2453,20 @@ are, it will do nothing and return an EBUSY error. | |||
2443 | The parameter is a pointer to a 32-bit unsigned integer variable | 2453 | The parameter is a pointer to a 32-bit unsigned integer variable |
2444 | containing the order (log base 2) of the desired size of the hash | 2454 | containing the order (log base 2) of the desired size of the hash |
2445 | table, which must be between 18 and 46. On successful return from the | 2455 | table, which must be between 18 and 46. On successful return from the |
2446 | ioctl, it will have been updated with the order of the hash table that | 2456 | ioctl, the value will not be changed by the kernel. |
2447 | was allocated. | ||
2448 | 2457 | ||
2449 | If no hash table has been allocated when any vcpu is asked to run | 2458 | If no hash table has been allocated when any vcpu is asked to run |
2450 | (with the KVM_RUN ioctl), the host kernel will allocate a | 2459 | (with the KVM_RUN ioctl), the host kernel will allocate a |
2451 | default-sized hash table (16 MB). | 2460 | default-sized hash table (16 MB). |
2452 | 2461 | ||
2453 | If this ioctl is called when a hash table has already been allocated, | 2462 | If this ioctl is called when a hash table has already been allocated, |
2454 | the kernel will clear out the existing hash table (zero all HPTEs) and | 2463 | with a different order from the existing hash table, the existing hash |
2455 | return the hash table order in the parameter. (If the guest is using | 2464 | table will be freed and a new one allocated. If this is ioctl is |
2456 | the virtualized real-mode area (VRMA) facility, the kernel will | 2465 | called when a hash table has already been allocated of the same order |
2457 | re-create the VMRA HPTEs on the next KVM_RUN of any vcpu.) | 2466 | as specified, the kernel will clear out the existing hash table (zero |
2467 | all HPTEs). In either case, if the guest is using the virtualized | ||
2468 | real-mode area (VRMA) facility, the kernel will re-create the VMRA | ||
2469 | HPTEs on the next KVM_RUN of any vcpu. | ||
2458 | 2470 | ||
2459 | 4.77 KVM_S390_INTERRUPT | 2471 | 4.77 KVM_S390_INTERRUPT |
2460 | 2472 | ||
@@ -3177,7 +3189,7 @@ of IOMMU pages. | |||
3177 | 3189 | ||
3178 | The rest of functionality is identical to KVM_CREATE_SPAPR_TCE. | 3190 | The rest of functionality is identical to KVM_CREATE_SPAPR_TCE. |
3179 | 3191 | ||
3180 | 4.98 KVM_REINJECT_CONTROL | 3192 | 4.99 KVM_REINJECT_CONTROL |
3181 | 3193 | ||
3182 | Capability: KVM_CAP_REINJECT_CONTROL | 3194 | Capability: KVM_CAP_REINJECT_CONTROL |
3183 | Architectures: x86 | 3195 | Architectures: x86 |
@@ -3201,7 +3213,7 @@ struct kvm_reinject_control { | |||
3201 | pit_reinject = 0 (!reinject mode) is recommended, unless running an old | 3213 | pit_reinject = 0 (!reinject mode) is recommended, unless running an old |
3202 | operating system that uses the PIT for timing (e.g. Linux 2.4.x). | 3214 | operating system that uses the PIT for timing (e.g. Linux 2.4.x). |
3203 | 3215 | ||
3204 | 4.99 KVM_PPC_CONFIGURE_V3_MMU | 3216 | 4.100 KVM_PPC_CONFIGURE_V3_MMU |
3205 | 3217 | ||
3206 | Capability: KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 | 3218 | Capability: KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 |
3207 | Architectures: ppc | 3219 | Architectures: ppc |
@@ -3232,7 +3244,7 @@ process table, which is in the guest's space. This field is formatted | |||
3232 | as the second doubleword of the partition table entry, as defined in | 3244 | as the second doubleword of the partition table entry, as defined in |
3233 | the Power ISA V3.00, Book III section 5.7.6.1. | 3245 | the Power ISA V3.00, Book III section 5.7.6.1. |
3234 | 3246 | ||
3235 | 4.100 KVM_PPC_GET_RMMU_INFO | 3247 | 4.101 KVM_PPC_GET_RMMU_INFO |
3236 | 3248 | ||
3237 | Capability: KVM_CAP_PPC_RADIX_MMU | 3249 | Capability: KVM_CAP_PPC_RADIX_MMU |
3238 | Architectures: ppc | 3250 | Architectures: ppc |
@@ -3266,6 +3278,101 @@ The ap_encodings gives the supported page sizes and their AP field | |||
3266 | encodings, encoded with the AP value in the top 3 bits and the log | 3278 | encodings, encoded with the AP value in the top 3 bits and the log |
3267 | base 2 of the page size in the bottom 6 bits. | 3279 | base 2 of the page size in the bottom 6 bits. |
3268 | 3280 | ||
3281 | 4.102 KVM_PPC_RESIZE_HPT_PREPARE | ||
3282 | |||
3283 | Capability: KVM_CAP_SPAPR_RESIZE_HPT | ||
3284 | Architectures: powerpc | ||
3285 | Type: vm ioctl | ||
3286 | Parameters: struct kvm_ppc_resize_hpt (in) | ||
3287 | Returns: 0 on successful completion, | ||
3288 | >0 if a new HPT is being prepared, the value is an estimated | ||
3289 | number of milliseconds until preparation is complete | ||
3290 | -EFAULT if struct kvm_reinject_control cannot be read, | ||
3291 | -EINVAL if the supplied shift or flags are invalid | ||
3292 | -ENOMEM if unable to allocate the new HPT | ||
3293 | -ENOSPC if there was a hash collision when moving existing | ||
3294 | HPT entries to the new HPT | ||
3295 | -EIO on other error conditions | ||
3296 | |||
3297 | Used to implement the PAPR extension for runtime resizing of a guest's | ||
3298 | Hashed Page Table (HPT). Specifically this starts, stops or monitors | ||
3299 | the preparation of a new potential HPT for the guest, essentially | ||
3300 | implementing the H_RESIZE_HPT_PREPARE hypercall. | ||
3301 | |||
3302 | If called with shift > 0 when there is no pending HPT for the guest, | ||
3303 | this begins preparation of a new pending HPT of size 2^(shift) bytes. | ||
3304 | It then returns a positive integer with the estimated number of | ||
3305 | milliseconds until preparation is complete. | ||
3306 | |||
3307 | If called when there is a pending HPT whose size does not match that | ||
3308 | requested in the parameters, discards the existing pending HPT and | ||
3309 | creates a new one as above. | ||
3310 | |||
3311 | If called when there is a pending HPT of the size requested, will: | ||
3312 | * If preparation of the pending HPT is already complete, return 0 | ||
3313 | * If preparation of the pending HPT has failed, return an error | ||
3314 | code, then discard the pending HPT. | ||
3315 | * If preparation of the pending HPT is still in progress, return an | ||
3316 | estimated number of milliseconds until preparation is complete. | ||
3317 | |||
3318 | If called with shift == 0, discards any currently pending HPT and | ||
3319 | returns 0 (i.e. cancels any in-progress preparation). | ||
3320 | |||
3321 | flags is reserved for future expansion, currently setting any bits in | ||
3322 | flags will result in an -EINVAL. | ||
3323 | |||
3324 | Normally this will be called repeatedly with the same parameters until | ||
3325 | it returns <= 0. The first call will initiate preparation, subsequent | ||
3326 | ones will monitor preparation until it completes or fails. | ||
3327 | |||
3328 | struct kvm_ppc_resize_hpt { | ||
3329 | __u64 flags; | ||
3330 | __u32 shift; | ||
3331 | __u32 pad; | ||
3332 | }; | ||
3333 | |||
3334 | 4.103 KVM_PPC_RESIZE_HPT_COMMIT | ||
3335 | |||
3336 | Capability: KVM_CAP_SPAPR_RESIZE_HPT | ||
3337 | Architectures: powerpc | ||
3338 | Type: vm ioctl | ||
3339 | Parameters: struct kvm_ppc_resize_hpt (in) | ||
3340 | Returns: 0 on successful completion, | ||
3341 | -EFAULT if struct kvm_reinject_control cannot be read, | ||
3342 | -EINVAL if the supplied shift or flags are invalid | ||
3343 | -ENXIO is there is no pending HPT, or the pending HPT doesn't | ||
3344 | have the requested size | ||
3345 | -EBUSY if the pending HPT is not fully prepared | ||
3346 | -ENOSPC if there was a hash collision when moving existing | ||
3347 | HPT entries to the new HPT | ||
3348 | -EIO on other error conditions | ||
3349 | |||
3350 | Used to implement the PAPR extension for runtime resizing of a guest's | ||
3351 | Hashed Page Table (HPT). Specifically this requests that the guest be | ||
3352 | transferred to working with the new HPT, essentially implementing the | ||
3353 | H_RESIZE_HPT_COMMIT hypercall. | ||
3354 | |||
3355 | This should only be called after KVM_PPC_RESIZE_HPT_PREPARE has | ||
3356 | returned 0 with the same parameters. In other cases | ||
3357 | KVM_PPC_RESIZE_HPT_COMMIT will return an error (usually -ENXIO or | ||
3358 | -EBUSY, though others may be possible if the preparation was started, | ||
3359 | but failed). | ||
3360 | |||
3361 | This will have undefined effects on the guest if it has not already | ||
3362 | placed itself in a quiescent state where no vcpu will make MMU enabled | ||
3363 | memory accesses. | ||
3364 | |||
3365 | On succsful completion, the pending HPT will become the guest's active | ||
3366 | HPT and the previous HPT will be discarded. | ||
3367 | |||
3368 | On failure, the guest will still be operating on its previous HPT. | ||
3369 | |||
3370 | struct kvm_ppc_resize_hpt { | ||
3371 | __u64 flags; | ||
3372 | __u32 shift; | ||
3373 | __u32 pad; | ||
3374 | }; | ||
3375 | |||
3269 | 5. The kvm_run structure | 3376 | 5. The kvm_run structure |
3270 | ------------------------ | 3377 | ------------------------ |
3271 | 3378 | ||
@@ -3282,7 +3389,18 @@ struct kvm_run { | |||
3282 | Request that KVM_RUN return when it becomes possible to inject external | 3389 | Request that KVM_RUN return when it becomes possible to inject external |
3283 | interrupts into the guest. Useful in conjunction with KVM_INTERRUPT. | 3390 | interrupts into the guest. Useful in conjunction with KVM_INTERRUPT. |
3284 | 3391 | ||
3285 | __u8 padding1[7]; | 3392 | __u8 immediate_exit; |
3393 | |||
3394 | This field is polled once when KVM_RUN starts; if non-zero, KVM_RUN | ||
3395 | exits immediately, returning -EINTR. In the common scenario where a | ||
3396 | signal is used to "kick" a VCPU out of KVM_RUN, this field can be used | ||
3397 | to avoid usage of KVM_SET_SIGNAL_MASK, which has worse scalability. | ||
3398 | Rather than blocking the signal outside KVM_RUN, userspace can set up | ||
3399 | a signal handler that sets run->immediate_exit to a non-zero value. | ||
3400 | |||
3401 | This field is ignored if KVM_CAP_IMMEDIATE_EXIT is not available. | ||
3402 | |||
3403 | __u8 padding1[6]; | ||
3286 | 3404 | ||
3287 | /* out */ | 3405 | /* out */ |
3288 | __u32 exit_reason; | 3406 | __u32 exit_reason; |
diff --git a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt index 9348b3caccd7..c1a24612c198 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt | |||
@@ -118,7 +118,7 @@ Groups: | |||
118 | -EBUSY: One or more VCPUs are running | 118 | -EBUSY: One or more VCPUs are running |
119 | 119 | ||
120 | 120 | ||
121 | KVM_DEV_ARM_VGIC_CPU_SYSREGS | 121 | KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS |
122 | Attributes: | 122 | Attributes: |
123 | The attr field of kvm_device_attr encodes two values: | 123 | The attr field of kvm_device_attr encodes two values: |
124 | bits: | 63 .... 32 | 31 .... 16 | 15 .... 0 | | 124 | bits: | 63 .... 32 | 31 .... 16 | 15 .... 0 | |
@@ -139,13 +139,15 @@ Groups: | |||
139 | All system regs accessed through this API are (rw, 64-bit) and | 139 | All system regs accessed through this API are (rw, 64-bit) and |
140 | kvm_device_attr.addr points to a __u64 value. | 140 | kvm_device_attr.addr points to a __u64 value. |
141 | 141 | ||
142 | KVM_DEV_ARM_VGIC_CPU_SYSREGS accesses the CPU interface registers for the | 142 | KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS accesses the CPU interface registers for the |
143 | CPU specified by the mpidr field. | 143 | CPU specified by the mpidr field. |
144 | 144 | ||
145 | CPU interface registers access is not implemented for AArch32 mode. | ||
146 | Error -ENXIO is returned when accessed in AArch32 mode. | ||
145 | Errors: | 147 | Errors: |
146 | -ENXIO: Getting or setting this register is not yet supported | 148 | -ENXIO: Getting or setting this register is not yet supported |
147 | -EBUSY: VCPU is running | 149 | -EBUSY: VCPU is running |
148 | -EINVAL: Invalid mpidr supplied | 150 | -EINVAL: Invalid mpidr or register value supplied |
149 | 151 | ||
150 | 152 | ||
151 | KVM_DEV_ARM_VGIC_GRP_NR_IRQS | 153 | KVM_DEV_ARM_VGIC_GRP_NR_IRQS |
@@ -204,3 +206,6 @@ Groups: | |||
204 | architecture defined MPIDR, and the field is encoded as follows: | 206 | architecture defined MPIDR, and the field is encoded as follows: |
205 | | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 | | 207 | | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 | |
206 | | Aff3 | Aff2 | Aff1 | Aff0 | | 208 | | Aff3 | Aff2 | Aff1 | Aff0 | |
209 | Errors: | ||
210 | -EINVAL: vINTID is not multiple of 32 or | ||
211 | info field is not VGIC_LEVEL_INFO_LINE_LEVEL | ||
diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt index c8d040e27046..feaaa634f154 100644 --- a/Documentation/virtual/kvm/hypercalls.txt +++ b/Documentation/virtual/kvm/hypercalls.txt | |||
@@ -81,3 +81,38 @@ the vcpu to sleep until occurrence of an appropriate event. Another vcpu of the | |||
81 | same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall, | 81 | same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall, |
82 | specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0) | 82 | specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0) |
83 | is used in the hypercall for future use. | 83 | is used in the hypercall for future use. |
84 | |||
85 | |||
86 | 6. KVM_HC_CLOCK_PAIRING | ||
87 | ------------------------ | ||
88 | Architecture: x86 | ||
89 | Status: active | ||
90 | Purpose: Hypercall used to synchronize host and guest clocks. | ||
91 | Usage: | ||
92 | |||
93 | a0: guest physical address where host copies | ||
94 | "struct kvm_clock_offset" structure. | ||
95 | |||
96 | a1: clock_type, ATM only KVM_CLOCK_PAIRING_WALLCLOCK (0) | ||
97 | is supported (corresponding to the host's CLOCK_REALTIME clock). | ||
98 | |||
99 | struct kvm_clock_pairing { | ||
100 | __s64 sec; | ||
101 | __s64 nsec; | ||
102 | __u64 tsc; | ||
103 | __u32 flags; | ||
104 | __u32 pad[9]; | ||
105 | }; | ||
106 | |||
107 | Where: | ||
108 | * sec: seconds from clock_type clock. | ||
109 | * nsec: nanoseconds from clock_type clock. | ||
110 | * tsc: guest TSC value used to calculate sec/nsec pair | ||
111 | * flags: flags, unused (0) at the moment. | ||
112 | |||
113 | The hypercall lets a guest compute a precise timestamp across | ||
114 | host and guest. The guest can use the returned TSC value to | ||
115 | compute the CLOCK_REALTIME for its clock, at the same instant. | ||
116 | |||
117 | Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource, | ||
118 | or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK. | ||
diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt index fd013bf4115b..1bb8bcaf8497 100644 --- a/Documentation/virtual/kvm/locking.txt +++ b/Documentation/virtual/kvm/locking.txt | |||
@@ -26,9 +26,16 @@ sections. | |||
26 | Fast page fault: | 26 | Fast page fault: |
27 | 27 | ||
28 | Fast page fault is the fast path which fixes the guest page fault out of | 28 | Fast page fault is the fast path which fixes the guest page fault out of |
29 | the mmu-lock on x86. Currently, the page fault can be fast only if the | 29 | the mmu-lock on x86. Currently, the page fault can be fast in one of the |
30 | shadow page table is present and it is caused by write-protect, that means | 30 | following two cases: |
31 | we just need change the W bit of the spte. | 31 | |
32 | 1. Access Tracking: The SPTE is not present, but it is marked for access | ||
33 | tracking i.e. the SPTE_SPECIAL_MASK is set. That means we need to | ||
34 | restore the saved R/X bits. This is described in more detail later below. | ||
35 | |||
36 | 2. Write-Protection: The SPTE is present and the fault is | ||
37 | caused by write-protect. That means we just need to change the W bit of the | ||
38 | spte. | ||
32 | 39 | ||
33 | What we use to avoid all the race is the SPTE_HOST_WRITEABLE bit and | 40 | What we use to avoid all the race is the SPTE_HOST_WRITEABLE bit and |
34 | SPTE_MMU_WRITEABLE bit on the spte: | 41 | SPTE_MMU_WRITEABLE bit on the spte: |
@@ -38,7 +45,8 @@ SPTE_MMU_WRITEABLE bit on the spte: | |||
38 | page write-protection. | 45 | page write-protection. |
39 | 46 | ||
40 | On fast page fault path, we will use cmpxchg to atomically set the spte W | 47 | On fast page fault path, we will use cmpxchg to atomically set the spte W |
41 | bit if spte.SPTE_HOST_WRITEABLE = 1 and spte.SPTE_WRITE_PROTECT = 1, this | 48 | bit if spte.SPTE_HOST_WRITEABLE = 1 and spte.SPTE_WRITE_PROTECT = 1, or |
49 | restore the saved R/X bits if VMX_EPT_TRACK_ACCESS mask is set, or both. This | ||
42 | is safe because whenever changing these bits can be detected by cmpxchg. | 50 | is safe because whenever changing these bits can be detected by cmpxchg. |
43 | 51 | ||
44 | But we need carefully check these cases: | 52 | But we need carefully check these cases: |
@@ -142,6 +150,21 @@ Since the spte is "volatile" if it can be updated out of mmu-lock, we always | |||
142 | atomically update the spte, the race caused by fast page fault can be avoided, | 150 | atomically update the spte, the race caused by fast page fault can be avoided, |
143 | See the comments in spte_has_volatile_bits() and mmu_spte_update(). | 151 | See the comments in spte_has_volatile_bits() and mmu_spte_update(). |
144 | 152 | ||
153 | Lockless Access Tracking: | ||
154 | |||
155 | This is used for Intel CPUs that are using EPT but do not support the EPT A/D | ||
156 | bits. In this case, when the KVM MMU notifier is called to track accesses to a | ||
157 | page (via kvm_mmu_notifier_clear_flush_young), it marks the PTE as not-present | ||
158 | by clearing the RWX bits in the PTE and storing the original R & X bits in | ||
159 | some unused/ignored bits. In addition, the SPTE_SPECIAL_MASK is also set on the | ||
160 | PTE (using the ignored bit 62). When the VM tries to access the page later on, | ||
161 | a fault is generated and the fast page fault mechanism described above is used | ||
162 | to atomically restore the PTE to a Present state. The W bit is not saved when | ||
163 | the PTE is marked for access tracking and during restoration to the Present | ||
164 | state, the W bit is set depending on whether or not it was a write access. If | ||
165 | it wasn't, then the W bit will remain clear until a write access happens, at | ||
166 | which time it will be set using the Dirty tracking mechanism described above. | ||
167 | |||
145 | 3. Reference | 168 | 3. Reference |
146 | ------------ | 169 | ------------ |
147 | 170 | ||
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index d5423ab15ed5..cc495d799c67 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h | |||
@@ -60,9 +60,6 @@ struct kvm_arch { | |||
60 | /* The last vcpu id that ran on each physical CPU */ | 60 | /* The last vcpu id that ran on each physical CPU */ |
61 | int __percpu *last_vcpu_ran; | 61 | int __percpu *last_vcpu_ran; |
62 | 62 | ||
63 | /* Timer */ | ||
64 | struct arch_timer_kvm timer; | ||
65 | |||
66 | /* | 63 | /* |
67 | * Anything that is not used directly from assembly code goes | 64 | * Anything that is not used directly from assembly code goes |
68 | * here. | 65 | * here. |
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 74a44727f8e1..95f38dcd611d 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h | |||
@@ -129,8 +129,7 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) | |||
129 | 129 | ||
130 | static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, | 130 | static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, |
131 | kvm_pfn_t pfn, | 131 | kvm_pfn_t pfn, |
132 | unsigned long size, | 132 | unsigned long size) |
133 | bool ipa_uncached) | ||
134 | { | 133 | { |
135 | /* | 134 | /* |
136 | * If we are going to insert an instruction page and the icache is | 135 | * If we are going to insert an instruction page and the icache is |
@@ -150,18 +149,12 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, | |||
150 | * and iterate over the range. | 149 | * and iterate over the range. |
151 | */ | 150 | */ |
152 | 151 | ||
153 | bool need_flush = !vcpu_has_cache_enabled(vcpu) || ipa_uncached; | ||
154 | |||
155 | VM_BUG_ON(size & ~PAGE_MASK); | 152 | VM_BUG_ON(size & ~PAGE_MASK); |
156 | 153 | ||
157 | if (!need_flush && !icache_is_pipt()) | ||
158 | goto vipt_cache; | ||
159 | |||
160 | while (size) { | 154 | while (size) { |
161 | void *va = kmap_atomic_pfn(pfn); | 155 | void *va = kmap_atomic_pfn(pfn); |
162 | 156 | ||
163 | if (need_flush) | 157 | kvm_flush_dcache_to_poc(va, PAGE_SIZE); |
164 | kvm_flush_dcache_to_poc(va, PAGE_SIZE); | ||
165 | 158 | ||
166 | if (icache_is_pipt()) | 159 | if (icache_is_pipt()) |
167 | __cpuc_coherent_user_range((unsigned long)va, | 160 | __cpuc_coherent_user_range((unsigned long)va, |
@@ -173,7 +166,6 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, | |||
173 | kunmap_atomic(va); | 166 | kunmap_atomic(va); |
174 | } | 167 | } |
175 | 168 | ||
176 | vipt_cache: | ||
177 | if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) { | 169 | if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) { |
178 | /* any kind of VIPT cache */ | 170 | /* any kind of VIPT cache */ |
179 | __flush_icache_all(); | 171 | __flush_icache_all(); |
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index af05f8e0903e..6ebd3e6a1fd1 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h | |||
@@ -181,10 +181,23 @@ struct kvm_arch_memory_slot { | |||
181 | #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 | 181 | #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 |
182 | #define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 | 182 | #define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 |
183 | #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) | 183 | #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) |
184 | #define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32 | ||
185 | #define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \ | ||
186 | (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT) | ||
184 | #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 | 187 | #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 |
185 | #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) | 188 | #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) |
189 | #define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff) | ||
186 | #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 | 190 | #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 |
187 | #define KVM_DEV_ARM_VGIC_GRP_CTRL 4 | 191 | #define KVM_DEV_ARM_VGIC_GRP_CTRL 4 |
192 | #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 | ||
193 | #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 | ||
194 | #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 | ||
195 | #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 | ||
196 | #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ | ||
197 | (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) | ||
198 | #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff | ||
199 | #define VGIC_LEVEL_INFO_LINE_LEVEL 0 | ||
200 | |||
188 | #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 | 201 | #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 |
189 | 202 | ||
190 | /* KVM_IRQ_LINE irq field index values */ | 203 | /* KVM_IRQ_LINE irq field index values */ |
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index d571243ab4d1..7b3670c2ae7b 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile | |||
@@ -7,7 +7,7 @@ ifeq ($(plus_virt),+virt) | |||
7 | plus_virt_def := -DREQUIRES_VIRT=1 | 7 | plus_virt_def := -DREQUIRES_VIRT=1 |
8 | endif | 8 | endif |
9 | 9 | ||
10 | ccflags-y += -Iarch/arm/kvm | 10 | ccflags-y += -Iarch/arm/kvm -Ivirt/kvm/arm/vgic |
11 | CFLAGS_arm.o := -I. $(plus_virt_def) | 11 | CFLAGS_arm.o := -I. $(plus_virt_def) |
12 | CFLAGS_mmu.o := -I. | 12 | CFLAGS_mmu.o := -I. |
13 | 13 | ||
@@ -20,7 +20,7 @@ kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vf | |||
20 | obj-$(CONFIG_KVM_ARM_HOST) += hyp/ | 20 | obj-$(CONFIG_KVM_ARM_HOST) += hyp/ |
21 | obj-y += kvm-arm.o init.o interrupts.o | 21 | obj-y += kvm-arm.o init.o interrupts.o |
22 | obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o | 22 | obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o |
23 | obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o | 23 | obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o vgic-v3-coproc.o |
24 | obj-y += $(KVM)/arm/aarch32.o | 24 | obj-y += $(KVM)/arm/aarch32.o |
25 | 25 | ||
26 | obj-y += $(KVM)/arm/vgic/vgic.o | 26 | obj-y += $(KVM)/arm/vgic/vgic.o |
@@ -33,5 +33,6 @@ obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o | |||
33 | obj-y += $(KVM)/arm/vgic/vgic-mmio-v3.o | 33 | obj-y += $(KVM)/arm/vgic/vgic-mmio-v3.o |
34 | obj-y += $(KVM)/arm/vgic/vgic-kvm-device.o | 34 | obj-y += $(KVM)/arm/vgic/vgic-kvm-device.o |
35 | obj-y += $(KVM)/arm/vgic/vgic-its.o | 35 | obj-y += $(KVM)/arm/vgic/vgic-its.o |
36 | obj-y += $(KVM)/arm/vgic/vgic-debug.o | ||
36 | obj-y += $(KVM)/irqchip.o | 37 | obj-y += $(KVM)/irqchip.o |
37 | obj-y += $(KVM)/arm/arch_timer.o | 38 | obj-y += $(KVM)/arm/arch_timer.o |
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 9d7446456e0c..c9a2103faeb9 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c | |||
@@ -135,7 +135,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
135 | goto out_free_stage2_pgd; | 135 | goto out_free_stage2_pgd; |
136 | 136 | ||
137 | kvm_vgic_early_init(kvm); | 137 | kvm_vgic_early_init(kvm); |
138 | kvm_timer_init(kvm); | ||
139 | 138 | ||
140 | /* Mark the initial VMID generation invalid */ | 139 | /* Mark the initial VMID generation invalid */ |
141 | kvm->arch.vmid_gen = 0; | 140 | kvm->arch.vmid_gen = 0; |
@@ -207,6 +206,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
207 | case KVM_CAP_ARM_PSCI_0_2: | 206 | case KVM_CAP_ARM_PSCI_0_2: |
208 | case KVM_CAP_READONLY_MEM: | 207 | case KVM_CAP_READONLY_MEM: |
209 | case KVM_CAP_MP_STATE: | 208 | case KVM_CAP_MP_STATE: |
209 | case KVM_CAP_IMMEDIATE_EXIT: | ||
210 | r = 1; | 210 | r = 1; |
211 | break; | 211 | break; |
212 | case KVM_CAP_COALESCED_MMIO: | 212 | case KVM_CAP_COALESCED_MMIO: |
@@ -301,7 +301,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | |||
301 | 301 | ||
302 | int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | 302 | int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) |
303 | { | 303 | { |
304 | return kvm_timer_should_fire(vcpu); | 304 | return kvm_timer_should_fire(vcpu_vtimer(vcpu)) || |
305 | kvm_timer_should_fire(vcpu_ptimer(vcpu)); | ||
305 | } | 306 | } |
306 | 307 | ||
307 | void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) | 308 | void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) |
@@ -604,6 +605,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
604 | return ret; | 605 | return ret; |
605 | } | 606 | } |
606 | 607 | ||
608 | if (run->immediate_exit) | ||
609 | return -EINTR; | ||
610 | |||
607 | if (vcpu->sigset_active) | 611 | if (vcpu->sigset_active) |
608 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | 612 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); |
609 | 613 | ||
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index a5265edbeeab..962616fd4ddd 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c | |||
@@ -1232,9 +1232,9 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, | |||
1232 | } | 1232 | } |
1233 | 1233 | ||
1234 | static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn, | 1234 | static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn, |
1235 | unsigned long size, bool uncached) | 1235 | unsigned long size) |
1236 | { | 1236 | { |
1237 | __coherent_cache_guest_page(vcpu, pfn, size, uncached); | 1237 | __coherent_cache_guest_page(vcpu, pfn, size); |
1238 | } | 1238 | } |
1239 | 1239 | ||
1240 | static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | 1240 | static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, |
@@ -1250,7 +1250,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | |||
1250 | struct vm_area_struct *vma; | 1250 | struct vm_area_struct *vma; |
1251 | kvm_pfn_t pfn; | 1251 | kvm_pfn_t pfn; |
1252 | pgprot_t mem_type = PAGE_S2; | 1252 | pgprot_t mem_type = PAGE_S2; |
1253 | bool fault_ipa_uncached; | ||
1254 | bool logging_active = memslot_is_logging(memslot); | 1253 | bool logging_active = memslot_is_logging(memslot); |
1255 | unsigned long flags = 0; | 1254 | unsigned long flags = 0; |
1256 | 1255 | ||
@@ -1337,8 +1336,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | |||
1337 | if (!hugetlb && !force_pte) | 1336 | if (!hugetlb && !force_pte) |
1338 | hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); | 1337 | hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); |
1339 | 1338 | ||
1340 | fault_ipa_uncached = memslot->flags & KVM_MEMSLOT_INCOHERENT; | ||
1341 | |||
1342 | if (hugetlb) { | 1339 | if (hugetlb) { |
1343 | pmd_t new_pmd = pfn_pmd(pfn, mem_type); | 1340 | pmd_t new_pmd = pfn_pmd(pfn, mem_type); |
1344 | new_pmd = pmd_mkhuge(new_pmd); | 1341 | new_pmd = pmd_mkhuge(new_pmd); |
@@ -1346,7 +1343,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | |||
1346 | new_pmd = kvm_s2pmd_mkwrite(new_pmd); | 1343 | new_pmd = kvm_s2pmd_mkwrite(new_pmd); |
1347 | kvm_set_pfn_dirty(pfn); | 1344 | kvm_set_pfn_dirty(pfn); |
1348 | } | 1345 | } |
1349 | coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached); | 1346 | coherent_cache_guest_page(vcpu, pfn, PMD_SIZE); |
1350 | ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); | 1347 | ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); |
1351 | } else { | 1348 | } else { |
1352 | pte_t new_pte = pfn_pte(pfn, mem_type); | 1349 | pte_t new_pte = pfn_pte(pfn, mem_type); |
@@ -1356,7 +1353,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | |||
1356 | kvm_set_pfn_dirty(pfn); | 1353 | kvm_set_pfn_dirty(pfn); |
1357 | mark_page_dirty(kvm, gfn); | 1354 | mark_page_dirty(kvm, gfn); |
1358 | } | 1355 | } |
1359 | coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached); | 1356 | coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE); |
1360 | ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags); | 1357 | ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags); |
1361 | } | 1358 | } |
1362 | 1359 | ||
@@ -1879,15 +1876,6 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, | |||
1879 | int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | 1876 | int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, |
1880 | unsigned long npages) | 1877 | unsigned long npages) |
1881 | { | 1878 | { |
1882 | /* | ||
1883 | * Readonly memslots are not incoherent with the caches by definition, | ||
1884 | * but in practice, they are used mostly to emulate ROMs or NOR flashes | ||
1885 | * that the guest may consider devices and hence map as uncached. | ||
1886 | * To prevent incoherency issues in these cases, tag all readonly | ||
1887 | * regions as incoherent. | ||
1888 | */ | ||
1889 | if (slot->flags & KVM_MEM_READONLY) | ||
1890 | slot->flags |= KVM_MEMSLOT_INCOHERENT; | ||
1891 | return 0; | 1879 | return 0; |
1892 | } | 1880 | } |
1893 | 1881 | ||
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index 4b5e802e57d1..1da8b2d14550 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c | |||
@@ -37,6 +37,11 @@ static struct kvm_regs cortexa_regs_reset = { | |||
37 | .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, | 37 | .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, |
38 | }; | 38 | }; |
39 | 39 | ||
40 | static const struct kvm_irq_level cortexa_ptimer_irq = { | ||
41 | { .irq = 30 }, | ||
42 | .level = 1, | ||
43 | }; | ||
44 | |||
40 | static const struct kvm_irq_level cortexa_vtimer_irq = { | 45 | static const struct kvm_irq_level cortexa_vtimer_irq = { |
41 | { .irq = 27 }, | 46 | { .irq = 27 }, |
42 | .level = 1, | 47 | .level = 1, |
@@ -58,6 +63,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) | |||
58 | { | 63 | { |
59 | struct kvm_regs *reset_regs; | 64 | struct kvm_regs *reset_regs; |
60 | const struct kvm_irq_level *cpu_vtimer_irq; | 65 | const struct kvm_irq_level *cpu_vtimer_irq; |
66 | const struct kvm_irq_level *cpu_ptimer_irq; | ||
61 | 67 | ||
62 | switch (vcpu->arch.target) { | 68 | switch (vcpu->arch.target) { |
63 | case KVM_ARM_TARGET_CORTEX_A7: | 69 | case KVM_ARM_TARGET_CORTEX_A7: |
@@ -65,6 +71,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) | |||
65 | reset_regs = &cortexa_regs_reset; | 71 | reset_regs = &cortexa_regs_reset; |
66 | vcpu->arch.midr = read_cpuid_id(); | 72 | vcpu->arch.midr = read_cpuid_id(); |
67 | cpu_vtimer_irq = &cortexa_vtimer_irq; | 73 | cpu_vtimer_irq = &cortexa_vtimer_irq; |
74 | cpu_ptimer_irq = &cortexa_ptimer_irq; | ||
68 | break; | 75 | break; |
69 | default: | 76 | default: |
70 | return -ENODEV; | 77 | return -ENODEV; |
@@ -77,5 +84,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) | |||
77 | kvm_reset_coprocs(vcpu); | 84 | kvm_reset_coprocs(vcpu); |
78 | 85 | ||
79 | /* Reset arch_timer context */ | 86 | /* Reset arch_timer context */ |
80 | return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); | 87 | return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq, cpu_ptimer_irq); |
81 | } | 88 | } |
diff --git a/arch/arm/kvm/vgic-v3-coproc.c b/arch/arm/kvm/vgic-v3-coproc.c new file mode 100644 index 000000000000..f41abf76366f --- /dev/null +++ b/arch/arm/kvm/vgic-v3-coproc.c | |||
@@ -0,0 +1,35 @@ | |||
1 | /* | ||
2 | * VGIC system registers handling functions for AArch32 mode | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | */ | ||
13 | |||
14 | #include <linux/kvm.h> | ||
15 | #include <linux/kvm_host.h> | ||
16 | #include <asm/kvm_emulate.h> | ||
17 | #include "vgic.h" | ||
18 | |||
19 | int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id, | ||
20 | u64 *reg) | ||
21 | { | ||
22 | /* | ||
23 | * TODO: Implement for AArch32 | ||
24 | */ | ||
25 | return -ENXIO; | ||
26 | } | ||
27 | |||
28 | int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, u64 id, | ||
29 | u64 *reg) | ||
30 | { | ||
31 | /* | ||
32 | * TODO: Implement for AArch32 | ||
33 | */ | ||
34 | return -ENXIO; | ||
35 | } | ||
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 443b387021f2..f21fd3894370 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h | |||
@@ -70,9 +70,6 @@ struct kvm_arch { | |||
70 | 70 | ||
71 | /* Interrupt controller */ | 71 | /* Interrupt controller */ |
72 | struct vgic_dist vgic; | 72 | struct vgic_dist vgic; |
73 | |||
74 | /* Timer */ | ||
75 | struct arch_timer_kvm timer; | ||
76 | }; | 73 | }; |
77 | 74 | ||
78 | #define KVM_NR_MEM_OBJS 40 | 75 | #define KVM_NR_MEM_OBJS 40 |
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 55772c13a375..ed1246014901 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h | |||
@@ -236,13 +236,11 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) | |||
236 | 236 | ||
237 | static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, | 237 | static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, |
238 | kvm_pfn_t pfn, | 238 | kvm_pfn_t pfn, |
239 | unsigned long size, | 239 | unsigned long size) |
240 | bool ipa_uncached) | ||
241 | { | 240 | { |
242 | void *va = page_address(pfn_to_page(pfn)); | 241 | void *va = page_address(pfn_to_page(pfn)); |
243 | 242 | ||
244 | if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached) | 243 | kvm_flush_dcache_to_poc(va, size); |
245 | kvm_flush_dcache_to_poc(va, size); | ||
246 | 244 | ||
247 | if (!icache_is_aliasing()) { /* PIPT */ | 245 | if (!icache_is_aliasing()) { /* PIPT */ |
248 | flush_icache_range((unsigned long)va, | 246 | flush_icache_range((unsigned long)va, |
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 3051f86a9b5f..c2860358ae3e 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h | |||
@@ -201,10 +201,23 @@ struct kvm_arch_memory_slot { | |||
201 | #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 | 201 | #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 |
202 | #define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 | 202 | #define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 |
203 | #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) | 203 | #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) |
204 | #define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32 | ||
205 | #define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \ | ||
206 | (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT) | ||
204 | #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 | 207 | #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 |
205 | #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) | 208 | #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) |
209 | #define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff) | ||
206 | #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 | 210 | #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 |
207 | #define KVM_DEV_ARM_VGIC_GRP_CTRL 4 | 211 | #define KVM_DEV_ARM_VGIC_GRP_CTRL 4 |
212 | #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 | ||
213 | #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 | ||
214 | #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 | ||
215 | #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 | ||
216 | #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ | ||
217 | (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) | ||
218 | #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff | ||
219 | #define VGIC_LEVEL_INFO_LINE_LEVEL 0 | ||
220 | |||
208 | #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 | 221 | #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 |
209 | 222 | ||
210 | /* Device Control API on vcpu fd */ | 223 | /* Device Control API on vcpu fd */ |
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index d50a82a16ff6..afd51bebb9c5 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile | |||
@@ -2,7 +2,7 @@ | |||
2 | # Makefile for Kernel-based Virtual Machine module | 2 | # Makefile for Kernel-based Virtual Machine module |
3 | # | 3 | # |
4 | 4 | ||
5 | ccflags-y += -Iarch/arm64/kvm | 5 | ccflags-y += -Iarch/arm64/kvm -Ivirt/kvm/arm/vgic |
6 | CFLAGS_arm.o := -I. | 6 | CFLAGS_arm.o := -I. |
7 | CFLAGS_mmu.o := -I. | 7 | CFLAGS_mmu.o := -I. |
8 | 8 | ||
@@ -19,6 +19,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o | |||
19 | kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o | 19 | kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o |
20 | kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o | 20 | kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o |
21 | kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o | 21 | kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o |
22 | kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o | ||
22 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/aarch32.o | 23 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/aarch32.o |
23 | 24 | ||
24 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o | 25 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o |
@@ -31,6 +32,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o | |||
31 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o | 32 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o |
32 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o | 33 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o |
33 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-its.o | 34 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-its.o |
35 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-debug.o | ||
34 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/irqchip.o | 36 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/irqchip.o |
35 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o | 37 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o |
36 | kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o | 38 | kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o |
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index e95d4f68bf54..d9e9697de1b2 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c | |||
@@ -46,6 +46,11 @@ static const struct kvm_regs default_regs_reset32 = { | |||
46 | COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT), | 46 | COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT), |
47 | }; | 47 | }; |
48 | 48 | ||
49 | static const struct kvm_irq_level default_ptimer_irq = { | ||
50 | .irq = 30, | ||
51 | .level = 1, | ||
52 | }; | ||
53 | |||
49 | static const struct kvm_irq_level default_vtimer_irq = { | 54 | static const struct kvm_irq_level default_vtimer_irq = { |
50 | .irq = 27, | 55 | .irq = 27, |
51 | .level = 1, | 56 | .level = 1, |
@@ -104,6 +109,7 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) | |||
104 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu) | 109 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu) |
105 | { | 110 | { |
106 | const struct kvm_irq_level *cpu_vtimer_irq; | 111 | const struct kvm_irq_level *cpu_vtimer_irq; |
112 | const struct kvm_irq_level *cpu_ptimer_irq; | ||
107 | const struct kvm_regs *cpu_reset; | 113 | const struct kvm_regs *cpu_reset; |
108 | 114 | ||
109 | switch (vcpu->arch.target) { | 115 | switch (vcpu->arch.target) { |
@@ -117,6 +123,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) | |||
117 | } | 123 | } |
118 | 124 | ||
119 | cpu_vtimer_irq = &default_vtimer_irq; | 125 | cpu_vtimer_irq = &default_vtimer_irq; |
126 | cpu_ptimer_irq = &default_ptimer_irq; | ||
120 | break; | 127 | break; |
121 | } | 128 | } |
122 | 129 | ||
@@ -130,5 +137,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) | |||
130 | kvm_pmu_vcpu_reset(vcpu); | 137 | kvm_pmu_vcpu_reset(vcpu); |
131 | 138 | ||
132 | /* Reset timer */ | 139 | /* Reset timer */ |
133 | return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); | 140 | return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq, cpu_ptimer_irq); |
134 | } | 141 | } |
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 87e7e6608cd8..0e26f8c2b56f 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c | |||
@@ -820,6 +820,61 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | |||
820 | CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \ | 820 | CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \ |
821 | access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), } | 821 | access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), } |
822 | 822 | ||
823 | static bool access_cntp_tval(struct kvm_vcpu *vcpu, | ||
824 | struct sys_reg_params *p, | ||
825 | const struct sys_reg_desc *r) | ||
826 | { | ||
827 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
828 | u64 now = kvm_phys_timer_read(); | ||
829 | |||
830 | if (p->is_write) | ||
831 | ptimer->cnt_cval = p->regval + now; | ||
832 | else | ||
833 | p->regval = ptimer->cnt_cval - now; | ||
834 | |||
835 | return true; | ||
836 | } | ||
837 | |||
838 | static bool access_cntp_ctl(struct kvm_vcpu *vcpu, | ||
839 | struct sys_reg_params *p, | ||
840 | const struct sys_reg_desc *r) | ||
841 | { | ||
842 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
843 | |||
844 | if (p->is_write) { | ||
845 | /* ISTATUS bit is read-only */ | ||
846 | ptimer->cnt_ctl = p->regval & ~ARCH_TIMER_CTRL_IT_STAT; | ||
847 | } else { | ||
848 | u64 now = kvm_phys_timer_read(); | ||
849 | |||
850 | p->regval = ptimer->cnt_ctl; | ||
851 | /* | ||
852 | * Set ISTATUS bit if it's expired. | ||
853 | * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is | ||
854 | * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit | ||
855 | * regardless of ENABLE bit for our implementation convenience. | ||
856 | */ | ||
857 | if (ptimer->cnt_cval <= now) | ||
858 | p->regval |= ARCH_TIMER_CTRL_IT_STAT; | ||
859 | } | ||
860 | |||
861 | return true; | ||
862 | } | ||
863 | |||
864 | static bool access_cntp_cval(struct kvm_vcpu *vcpu, | ||
865 | struct sys_reg_params *p, | ||
866 | const struct sys_reg_desc *r) | ||
867 | { | ||
868 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
869 | |||
870 | if (p->is_write) | ||
871 | ptimer->cnt_cval = p->regval; | ||
872 | else | ||
873 | p->regval = ptimer->cnt_cval; | ||
874 | |||
875 | return true; | ||
876 | } | ||
877 | |||
823 | /* | 878 | /* |
824 | * Architected system registers. | 879 | * Architected system registers. |
825 | * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 | 880 | * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 |
@@ -1029,6 +1084,16 @@ static const struct sys_reg_desc sys_reg_descs[] = { | |||
1029 | { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011), | 1084 | { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011), |
1030 | NULL, reset_unknown, TPIDRRO_EL0 }, | 1085 | NULL, reset_unknown, TPIDRRO_EL0 }, |
1031 | 1086 | ||
1087 | /* CNTP_TVAL_EL0 */ | ||
1088 | { Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b0010), Op2(0b000), | ||
1089 | access_cntp_tval }, | ||
1090 | /* CNTP_CTL_EL0 */ | ||
1091 | { Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b0010), Op2(0b001), | ||
1092 | access_cntp_ctl }, | ||
1093 | /* CNTP_CVAL_EL0 */ | ||
1094 | { Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b0010), Op2(0b010), | ||
1095 | access_cntp_cval }, | ||
1096 | |||
1032 | /* PMEVCNTRn_EL0 */ | 1097 | /* PMEVCNTRn_EL0 */ |
1033 | PMU_PMEVCNTR_EL0(0), | 1098 | PMU_PMEVCNTR_EL0(0), |
1034 | PMU_PMEVCNTR_EL0(1), | 1099 | PMU_PMEVCNTR_EL0(1), |
@@ -1795,6 +1860,17 @@ static bool index_to_params(u64 id, struct sys_reg_params *params) | |||
1795 | } | 1860 | } |
1796 | } | 1861 | } |
1797 | 1862 | ||
1863 | const struct sys_reg_desc *find_reg_by_id(u64 id, | ||
1864 | struct sys_reg_params *params, | ||
1865 | const struct sys_reg_desc table[], | ||
1866 | unsigned int num) | ||
1867 | { | ||
1868 | if (!index_to_params(id, params)) | ||
1869 | return NULL; | ||
1870 | |||
1871 | return find_reg(params, table, num); | ||
1872 | } | ||
1873 | |||
1798 | /* Decode an index value, and find the sys_reg_desc entry. */ | 1874 | /* Decode an index value, and find the sys_reg_desc entry. */ |
1799 | static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, | 1875 | static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, |
1800 | u64 id) | 1876 | u64 id) |
@@ -1807,11 +1883,8 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, | |||
1807 | if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG) | 1883 | if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG) |
1808 | return NULL; | 1884 | return NULL; |
1809 | 1885 | ||
1810 | if (!index_to_params(id, ¶ms)) | ||
1811 | return NULL; | ||
1812 | |||
1813 | table = get_target_table(vcpu->arch.target, true, &num); | 1886 | table = get_target_table(vcpu->arch.target, true, &num); |
1814 | r = find_reg(¶ms, table, num); | 1887 | r = find_reg_by_id(id, ¶ms, table, num); |
1815 | if (!r) | 1888 | if (!r) |
1816 | r = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); | 1889 | r = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); |
1817 | 1890 | ||
@@ -1918,10 +1991,8 @@ static int get_invariant_sys_reg(u64 id, void __user *uaddr) | |||
1918 | struct sys_reg_params params; | 1991 | struct sys_reg_params params; |
1919 | const struct sys_reg_desc *r; | 1992 | const struct sys_reg_desc *r; |
1920 | 1993 | ||
1921 | if (!index_to_params(id, ¶ms)) | 1994 | r = find_reg_by_id(id, ¶ms, invariant_sys_regs, |
1922 | return -ENOENT; | 1995 | ARRAY_SIZE(invariant_sys_regs)); |
1923 | |||
1924 | r = find_reg(¶ms, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)); | ||
1925 | if (!r) | 1996 | if (!r) |
1926 | return -ENOENT; | 1997 | return -ENOENT; |
1927 | 1998 | ||
@@ -1935,9 +2006,8 @@ static int set_invariant_sys_reg(u64 id, void __user *uaddr) | |||
1935 | int err; | 2006 | int err; |
1936 | u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */ | 2007 | u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */ |
1937 | 2008 | ||
1938 | if (!index_to_params(id, ¶ms)) | 2009 | r = find_reg_by_id(id, ¶ms, invariant_sys_regs, |
1939 | return -ENOENT; | 2010 | ARRAY_SIZE(invariant_sys_regs)); |
1940 | r = find_reg(¶ms, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)); | ||
1941 | if (!r) | 2011 | if (!r) |
1942 | return -ENOENT; | 2012 | return -ENOENT; |
1943 | 2013 | ||
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index dbbb01cfbee9..9c6ffd0f0196 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h | |||
@@ -136,6 +136,10 @@ static inline int cmp_sys_reg(const struct sys_reg_desc *i1, | |||
136 | return i1->Op2 - i2->Op2; | 136 | return i1->Op2 - i2->Op2; |
137 | } | 137 | } |
138 | 138 | ||
139 | const struct sys_reg_desc *find_reg_by_id(u64 id, | ||
140 | struct sys_reg_params *params, | ||
141 | const struct sys_reg_desc table[], | ||
142 | unsigned int num); | ||
139 | 143 | ||
140 | #define Op0(_x) .Op0 = _x | 144 | #define Op0(_x) .Op0 = _x |
141 | #define Op1(_x) .Op1 = _x | 145 | #define Op1(_x) .Op1 = _x |
diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c new file mode 100644 index 000000000000..79f37e37d367 --- /dev/null +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c | |||
@@ -0,0 +1,346 @@ | |||
1 | /* | ||
2 | * VGIC system registers handling functions for AArch64 mode | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | */ | ||
13 | |||
14 | #include <linux/irqchip/arm-gic-v3.h> | ||
15 | #include <linux/kvm.h> | ||
16 | #include <linux/kvm_host.h> | ||
17 | #include <asm/kvm_emulate.h> | ||
18 | #include "vgic.h" | ||
19 | #include "sys_regs.h" | ||
20 | |||
21 | static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
22 | const struct sys_reg_desc *r) | ||
23 | { | ||
24 | u32 host_pri_bits, host_id_bits, host_seis, host_a3v, seis, a3v; | ||
25 | struct vgic_cpu *vgic_v3_cpu = &vcpu->arch.vgic_cpu; | ||
26 | struct vgic_vmcr vmcr; | ||
27 | u64 val; | ||
28 | |||
29 | vgic_get_vmcr(vcpu, &vmcr); | ||
30 | if (p->is_write) { | ||
31 | val = p->regval; | ||
32 | |||
33 | /* | ||
34 | * Disallow restoring VM state if not supported by this | ||
35 | * hardware. | ||
36 | */ | ||
37 | host_pri_bits = ((val & ICC_CTLR_EL1_PRI_BITS_MASK) >> | ||
38 | ICC_CTLR_EL1_PRI_BITS_SHIFT) + 1; | ||
39 | if (host_pri_bits > vgic_v3_cpu->num_pri_bits) | ||
40 | return false; | ||
41 | |||
42 | vgic_v3_cpu->num_pri_bits = host_pri_bits; | ||
43 | |||
44 | host_id_bits = (val & ICC_CTLR_EL1_ID_BITS_MASK) >> | ||
45 | ICC_CTLR_EL1_ID_BITS_SHIFT; | ||
46 | if (host_id_bits > vgic_v3_cpu->num_id_bits) | ||
47 | return false; | ||
48 | |||
49 | vgic_v3_cpu->num_id_bits = host_id_bits; | ||
50 | |||
51 | host_seis = ((kvm_vgic_global_state.ich_vtr_el2 & | ||
52 | ICH_VTR_SEIS_MASK) >> ICH_VTR_SEIS_SHIFT); | ||
53 | seis = (val & ICC_CTLR_EL1_SEIS_MASK) >> | ||
54 | ICC_CTLR_EL1_SEIS_SHIFT; | ||
55 | if (host_seis != seis) | ||
56 | return false; | ||
57 | |||
58 | host_a3v = ((kvm_vgic_global_state.ich_vtr_el2 & | ||
59 | ICH_VTR_A3V_MASK) >> ICH_VTR_A3V_SHIFT); | ||
60 | a3v = (val & ICC_CTLR_EL1_A3V_MASK) >> ICC_CTLR_EL1_A3V_SHIFT; | ||
61 | if (host_a3v != a3v) | ||
62 | return false; | ||
63 | |||
64 | /* | ||
65 | * Here set VMCR.CTLR in ICC_CTLR_EL1 layout. | ||
66 | * The vgic_set_vmcr() will convert to ICH_VMCR layout. | ||
67 | */ | ||
68 | vmcr.ctlr = val & ICC_CTLR_EL1_CBPR_MASK; | ||
69 | vmcr.ctlr |= val & ICC_CTLR_EL1_EOImode_MASK; | ||
70 | vgic_set_vmcr(vcpu, &vmcr); | ||
71 | } else { | ||
72 | val = 0; | ||
73 | val |= (vgic_v3_cpu->num_pri_bits - 1) << | ||
74 | ICC_CTLR_EL1_PRI_BITS_SHIFT; | ||
75 | val |= vgic_v3_cpu->num_id_bits << ICC_CTLR_EL1_ID_BITS_SHIFT; | ||
76 | val |= ((kvm_vgic_global_state.ich_vtr_el2 & | ||
77 | ICH_VTR_SEIS_MASK) >> ICH_VTR_SEIS_SHIFT) << | ||
78 | ICC_CTLR_EL1_SEIS_SHIFT; | ||
79 | val |= ((kvm_vgic_global_state.ich_vtr_el2 & | ||
80 | ICH_VTR_A3V_MASK) >> ICH_VTR_A3V_SHIFT) << | ||
81 | ICC_CTLR_EL1_A3V_SHIFT; | ||
82 | /* | ||
83 | * The VMCR.CTLR value is in ICC_CTLR_EL1 layout. | ||
84 | * Extract it directly using ICC_CTLR_EL1 reg definitions. | ||
85 | */ | ||
86 | val |= vmcr.ctlr & ICC_CTLR_EL1_CBPR_MASK; | ||
87 | val |= vmcr.ctlr & ICC_CTLR_EL1_EOImode_MASK; | ||
88 | |||
89 | p->regval = val; | ||
90 | } | ||
91 | |||
92 | return true; | ||
93 | } | ||
94 | |||
95 | static bool access_gic_pmr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
96 | const struct sys_reg_desc *r) | ||
97 | { | ||
98 | struct vgic_vmcr vmcr; | ||
99 | |||
100 | vgic_get_vmcr(vcpu, &vmcr); | ||
101 | if (p->is_write) { | ||
102 | vmcr.pmr = (p->regval & ICC_PMR_EL1_MASK) >> ICC_PMR_EL1_SHIFT; | ||
103 | vgic_set_vmcr(vcpu, &vmcr); | ||
104 | } else { | ||
105 | p->regval = (vmcr.pmr << ICC_PMR_EL1_SHIFT) & ICC_PMR_EL1_MASK; | ||
106 | } | ||
107 | |||
108 | return true; | ||
109 | } | ||
110 | |||
111 | static bool access_gic_bpr0(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
112 | const struct sys_reg_desc *r) | ||
113 | { | ||
114 | struct vgic_vmcr vmcr; | ||
115 | |||
116 | vgic_get_vmcr(vcpu, &vmcr); | ||
117 | if (p->is_write) { | ||
118 | vmcr.bpr = (p->regval & ICC_BPR0_EL1_MASK) >> | ||
119 | ICC_BPR0_EL1_SHIFT; | ||
120 | vgic_set_vmcr(vcpu, &vmcr); | ||
121 | } else { | ||
122 | p->regval = (vmcr.bpr << ICC_BPR0_EL1_SHIFT) & | ||
123 | ICC_BPR0_EL1_MASK; | ||
124 | } | ||
125 | |||
126 | return true; | ||
127 | } | ||
128 | |||
129 | static bool access_gic_bpr1(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
130 | const struct sys_reg_desc *r) | ||
131 | { | ||
132 | struct vgic_vmcr vmcr; | ||
133 | |||
134 | if (!p->is_write) | ||
135 | p->regval = 0; | ||
136 | |||
137 | vgic_get_vmcr(vcpu, &vmcr); | ||
138 | if (!((vmcr.ctlr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT)) { | ||
139 | if (p->is_write) { | ||
140 | vmcr.abpr = (p->regval & ICC_BPR1_EL1_MASK) >> | ||
141 | ICC_BPR1_EL1_SHIFT; | ||
142 | vgic_set_vmcr(vcpu, &vmcr); | ||
143 | } else { | ||
144 | p->regval = (vmcr.abpr << ICC_BPR1_EL1_SHIFT) & | ||
145 | ICC_BPR1_EL1_MASK; | ||
146 | } | ||
147 | } else { | ||
148 | if (!p->is_write) | ||
149 | p->regval = min((vmcr.bpr + 1), 7U); | ||
150 | } | ||
151 | |||
152 | return true; | ||
153 | } | ||
154 | |||
155 | static bool access_gic_grpen0(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
156 | const struct sys_reg_desc *r) | ||
157 | { | ||
158 | struct vgic_vmcr vmcr; | ||
159 | |||
160 | vgic_get_vmcr(vcpu, &vmcr); | ||
161 | if (p->is_write) { | ||
162 | vmcr.grpen0 = (p->regval & ICC_IGRPEN0_EL1_MASK) >> | ||
163 | ICC_IGRPEN0_EL1_SHIFT; | ||
164 | vgic_set_vmcr(vcpu, &vmcr); | ||
165 | } else { | ||
166 | p->regval = (vmcr.grpen0 << ICC_IGRPEN0_EL1_SHIFT) & | ||
167 | ICC_IGRPEN0_EL1_MASK; | ||
168 | } | ||
169 | |||
170 | return true; | ||
171 | } | ||
172 | |||
173 | static bool access_gic_grpen1(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
174 | const struct sys_reg_desc *r) | ||
175 | { | ||
176 | struct vgic_vmcr vmcr; | ||
177 | |||
178 | vgic_get_vmcr(vcpu, &vmcr); | ||
179 | if (p->is_write) { | ||
180 | vmcr.grpen1 = (p->regval & ICC_IGRPEN1_EL1_MASK) >> | ||
181 | ICC_IGRPEN1_EL1_SHIFT; | ||
182 | vgic_set_vmcr(vcpu, &vmcr); | ||
183 | } else { | ||
184 | p->regval = (vmcr.grpen1 << ICC_IGRPEN1_EL1_SHIFT) & | ||
185 | ICC_IGRPEN1_EL1_MASK; | ||
186 | } | ||
187 | |||
188 | return true; | ||
189 | } | ||
190 | |||
191 | static void vgic_v3_access_apr_reg(struct kvm_vcpu *vcpu, | ||
192 | struct sys_reg_params *p, u8 apr, u8 idx) | ||
193 | { | ||
194 | struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3; | ||
195 | uint32_t *ap_reg; | ||
196 | |||
197 | if (apr) | ||
198 | ap_reg = &vgicv3->vgic_ap1r[idx]; | ||
199 | else | ||
200 | ap_reg = &vgicv3->vgic_ap0r[idx]; | ||
201 | |||
202 | if (p->is_write) | ||
203 | *ap_reg = p->regval; | ||
204 | else | ||
205 | p->regval = *ap_reg; | ||
206 | } | ||
207 | |||
208 | static bool access_gic_aprn(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
209 | const struct sys_reg_desc *r, u8 apr) | ||
210 | { | ||
211 | struct vgic_cpu *vgic_v3_cpu = &vcpu->arch.vgic_cpu; | ||
212 | u8 idx = r->Op2 & 3; | ||
213 | |||
214 | /* | ||
215 | * num_pri_bits are initialized with HW supported values. | ||
216 | * We can rely safely on num_pri_bits even if VM has not | ||
217 | * restored ICC_CTLR_EL1 before restoring APnR registers. | ||
218 | */ | ||
219 | switch (vgic_v3_cpu->num_pri_bits) { | ||
220 | case 7: | ||
221 | vgic_v3_access_apr_reg(vcpu, p, apr, idx); | ||
222 | break; | ||
223 | case 6: | ||
224 | if (idx > 1) | ||
225 | goto err; | ||
226 | vgic_v3_access_apr_reg(vcpu, p, apr, idx); | ||
227 | break; | ||
228 | default: | ||
229 | if (idx > 0) | ||
230 | goto err; | ||
231 | vgic_v3_access_apr_reg(vcpu, p, apr, idx); | ||
232 | } | ||
233 | |||
234 | return true; | ||
235 | err: | ||
236 | if (!p->is_write) | ||
237 | p->regval = 0; | ||
238 | |||
239 | return false; | ||
240 | } | ||
241 | |||
242 | static bool access_gic_ap0r(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
243 | const struct sys_reg_desc *r) | ||
244 | |||
245 | { | ||
246 | return access_gic_aprn(vcpu, p, r, 0); | ||
247 | } | ||
248 | |||
249 | static bool access_gic_ap1r(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
250 | const struct sys_reg_desc *r) | ||
251 | { | ||
252 | return access_gic_aprn(vcpu, p, r, 1); | ||
253 | } | ||
254 | |||
255 | static bool access_gic_sre(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
256 | const struct sys_reg_desc *r) | ||
257 | { | ||
258 | struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3; | ||
259 | |||
260 | /* Validate SRE bit */ | ||
261 | if (p->is_write) { | ||
262 | if (!(p->regval & ICC_SRE_EL1_SRE)) | ||
263 | return false; | ||
264 | } else { | ||
265 | p->regval = vgicv3->vgic_sre; | ||
266 | } | ||
267 | |||
268 | return true; | ||
269 | } | ||
270 | static const struct sys_reg_desc gic_v3_icc_reg_descs[] = { | ||
271 | /* ICC_PMR_EL1 */ | ||
272 | { Op0(3), Op1(0), CRn(4), CRm(6), Op2(0), access_gic_pmr }, | ||
273 | /* ICC_BPR0_EL1 */ | ||
274 | { Op0(3), Op1(0), CRn(12), CRm(8), Op2(3), access_gic_bpr0 }, | ||
275 | /* ICC_AP0R0_EL1 */ | ||
276 | { Op0(3), Op1(0), CRn(12), CRm(8), Op2(4), access_gic_ap0r }, | ||
277 | /* ICC_AP0R1_EL1 */ | ||
278 | { Op0(3), Op1(0), CRn(12), CRm(8), Op2(5), access_gic_ap0r }, | ||
279 | /* ICC_AP0R2_EL1 */ | ||
280 | { Op0(3), Op1(0), CRn(12), CRm(8), Op2(6), access_gic_ap0r }, | ||
281 | /* ICC_AP0R3_EL1 */ | ||
282 | { Op0(3), Op1(0), CRn(12), CRm(8), Op2(7), access_gic_ap0r }, | ||
283 | /* ICC_AP1R0_EL1 */ | ||
284 | { Op0(3), Op1(0), CRn(12), CRm(9), Op2(0), access_gic_ap1r }, | ||
285 | /* ICC_AP1R1_EL1 */ | ||
286 | { Op0(3), Op1(0), CRn(12), CRm(9), Op2(1), access_gic_ap1r }, | ||
287 | /* ICC_AP1R2_EL1 */ | ||
288 | { Op0(3), Op1(0), CRn(12), CRm(9), Op2(2), access_gic_ap1r }, | ||
289 | /* ICC_AP1R3_EL1 */ | ||
290 | { Op0(3), Op1(0), CRn(12), CRm(9), Op2(3), access_gic_ap1r }, | ||
291 | /* ICC_BPR1_EL1 */ | ||
292 | { Op0(3), Op1(0), CRn(12), CRm(12), Op2(3), access_gic_bpr1 }, | ||
293 | /* ICC_CTLR_EL1 */ | ||
294 | { Op0(3), Op1(0), CRn(12), CRm(12), Op2(4), access_gic_ctlr }, | ||
295 | /* ICC_SRE_EL1 */ | ||
296 | { Op0(3), Op1(0), CRn(12), CRm(12), Op2(5), access_gic_sre }, | ||
297 | /* ICC_IGRPEN0_EL1 */ | ||
298 | { Op0(3), Op1(0), CRn(12), CRm(12), Op2(6), access_gic_grpen0 }, | ||
299 | /* ICC_GRPEN1_EL1 */ | ||
300 | { Op0(3), Op1(0), CRn(12), CRm(12), Op2(7), access_gic_grpen1 }, | ||
301 | }; | ||
302 | |||
303 | int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id, | ||
304 | u64 *reg) | ||
305 | { | ||
306 | struct sys_reg_params params; | ||
307 | u64 sysreg = (id & KVM_DEV_ARM_VGIC_SYSREG_MASK) | KVM_REG_SIZE_U64; | ||
308 | |||
309 | params.regval = *reg; | ||
310 | params.is_write = is_write; | ||
311 | params.is_aarch32 = false; | ||
312 | params.is_32bit = false; | ||
313 | |||
314 | if (find_reg_by_id(sysreg, ¶ms, gic_v3_icc_reg_descs, | ||
315 | ARRAY_SIZE(gic_v3_icc_reg_descs))) | ||
316 | return 0; | ||
317 | |||
318 | return -ENXIO; | ||
319 | } | ||
320 | |||
321 | int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, u64 id, | ||
322 | u64 *reg) | ||
323 | { | ||
324 | struct sys_reg_params params; | ||
325 | const struct sys_reg_desc *r; | ||
326 | u64 sysreg = (id & KVM_DEV_ARM_VGIC_SYSREG_MASK) | KVM_REG_SIZE_U64; | ||
327 | |||
328 | if (is_write) | ||
329 | params.regval = *reg; | ||
330 | params.is_write = is_write; | ||
331 | params.is_aarch32 = false; | ||
332 | params.is_32bit = false; | ||
333 | |||
334 | r = find_reg_by_id(sysreg, ¶ms, gic_v3_icc_reg_descs, | ||
335 | ARRAY_SIZE(gic_v3_icc_reg_descs)); | ||
336 | if (!r) | ||
337 | return -ENXIO; | ||
338 | |||
339 | if (!r->access(vcpu, ¶ms, r)) | ||
340 | return -EINVAL; | ||
341 | |||
342 | if (!is_write) | ||
343 | *reg = params.regval; | ||
344 | |||
345 | return 0; | ||
346 | } | ||
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index bebec370324f..05e785fc061d 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h | |||
@@ -43,6 +43,7 @@ | |||
43 | #define KVM_REG_MIPS_CP0_ENTRYHI MIPS_CP0_64(10, 0) | 43 | #define KVM_REG_MIPS_CP0_ENTRYHI MIPS_CP0_64(10, 0) |
44 | #define KVM_REG_MIPS_CP0_COMPARE MIPS_CP0_32(11, 0) | 44 | #define KVM_REG_MIPS_CP0_COMPARE MIPS_CP0_32(11, 0) |
45 | #define KVM_REG_MIPS_CP0_STATUS MIPS_CP0_32(12, 0) | 45 | #define KVM_REG_MIPS_CP0_STATUS MIPS_CP0_32(12, 0) |
46 | #define KVM_REG_MIPS_CP0_INTCTL MIPS_CP0_32(12, 1) | ||
46 | #define KVM_REG_MIPS_CP0_CAUSE MIPS_CP0_32(13, 0) | 47 | #define KVM_REG_MIPS_CP0_CAUSE MIPS_CP0_32(13, 0) |
47 | #define KVM_REG_MIPS_CP0_EPC MIPS_CP0_64(14, 0) | 48 | #define KVM_REG_MIPS_CP0_EPC MIPS_CP0_64(14, 0) |
48 | #define KVM_REG_MIPS_CP0_PRID MIPS_CP0_32(15, 0) | 49 | #define KVM_REG_MIPS_CP0_PRID MIPS_CP0_32(15, 0) |
@@ -64,7 +65,7 @@ | |||
64 | #define KVM_REG_MIPS_CP0_KSCRATCH6 MIPS_CP0_64(31, 7) | 65 | #define KVM_REG_MIPS_CP0_KSCRATCH6 MIPS_CP0_64(31, 7) |
65 | 66 | ||
66 | 67 | ||
67 | #define KVM_MAX_VCPUS 1 | 68 | #define KVM_MAX_VCPUS 8 |
68 | #define KVM_USER_MEM_SLOTS 8 | 69 | #define KVM_USER_MEM_SLOTS 8 |
69 | /* memory slots that does not exposed to userspace */ | 70 | /* memory slots that does not exposed to userspace */ |
70 | #define KVM_PRIVATE_MEM_SLOTS 0 | 71 | #define KVM_PRIVATE_MEM_SLOTS 0 |
@@ -88,6 +89,7 @@ | |||
88 | 89 | ||
89 | #define KVM_GUEST_KUSEG 0x00000000UL | 90 | #define KVM_GUEST_KUSEG 0x00000000UL |
90 | #define KVM_GUEST_KSEG0 0x40000000UL | 91 | #define KVM_GUEST_KSEG0 0x40000000UL |
92 | #define KVM_GUEST_KSEG1 0x40000000UL | ||
91 | #define KVM_GUEST_KSEG23 0x60000000UL | 93 | #define KVM_GUEST_KSEG23 0x60000000UL |
92 | #define KVM_GUEST_KSEGX(a) ((_ACAST32_(a)) & 0xe0000000) | 94 | #define KVM_GUEST_KSEGX(a) ((_ACAST32_(a)) & 0xe0000000) |
93 | #define KVM_GUEST_CPHYSADDR(a) ((_ACAST32_(a)) & 0x1fffffff) | 95 | #define KVM_GUEST_CPHYSADDR(a) ((_ACAST32_(a)) & 0x1fffffff) |
@@ -104,7 +106,6 @@ | |||
104 | #define KVM_GUEST_KSEG23ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG23) | 106 | #define KVM_GUEST_KSEG23ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG23) |
105 | 107 | ||
106 | #define KVM_INVALID_PAGE 0xdeadbeef | 108 | #define KVM_INVALID_PAGE 0xdeadbeef |
107 | #define KVM_INVALID_INST 0xdeadbeef | ||
108 | #define KVM_INVALID_ADDR 0xdeadbeef | 109 | #define KVM_INVALID_ADDR 0xdeadbeef |
109 | 110 | ||
110 | /* | 111 | /* |
@@ -121,8 +122,6 @@ static inline bool kvm_is_error_hva(unsigned long addr) | |||
121 | return IS_ERR_VALUE(addr); | 122 | return IS_ERR_VALUE(addr); |
122 | } | 123 | } |
123 | 124 | ||
124 | extern atomic_t kvm_mips_instance; | ||
125 | |||
126 | struct kvm_vm_stat { | 125 | struct kvm_vm_stat { |
127 | ulong remote_tlb_flush; | 126 | ulong remote_tlb_flush; |
128 | }; | 127 | }; |
@@ -156,12 +155,8 @@ struct kvm_arch_memory_slot { | |||
156 | }; | 155 | }; |
157 | 156 | ||
158 | struct kvm_arch { | 157 | struct kvm_arch { |
159 | /* Guest GVA->HPA page table */ | 158 | /* Guest physical mm */ |
160 | unsigned long *guest_pmap; | 159 | struct mm_struct gpa_mm; |
161 | unsigned long guest_pmap_npages; | ||
162 | |||
163 | /* Wired host TLB used for the commpage */ | ||
164 | int commpage_tlb; | ||
165 | }; | 160 | }; |
166 | 161 | ||
167 | #define N_MIPS_COPROC_REGS 32 | 162 | #define N_MIPS_COPROC_REGS 32 |
@@ -233,6 +228,7 @@ enum emulation_result { | |||
233 | EMULATE_FAIL, /* can't emulate this instruction */ | 228 | EMULATE_FAIL, /* can't emulate this instruction */ |
234 | EMULATE_WAIT, /* WAIT instruction */ | 229 | EMULATE_WAIT, /* WAIT instruction */ |
235 | EMULATE_PRIV_FAIL, | 230 | EMULATE_PRIV_FAIL, |
231 | EMULATE_EXCEPT, /* A guest exception has been generated */ | ||
236 | }; | 232 | }; |
237 | 233 | ||
238 | #define mips3_paddr_to_tlbpfn(x) \ | 234 | #define mips3_paddr_to_tlbpfn(x) \ |
@@ -250,6 +246,7 @@ enum emulation_result { | |||
250 | #define TLB_ASID(x) ((x).tlb_hi & KVM_ENTRYHI_ASID) | 246 | #define TLB_ASID(x) ((x).tlb_hi & KVM_ENTRYHI_ASID) |
251 | #define TLB_LO_IDX(x, va) (((va) >> PAGE_SHIFT) & 1) | 247 | #define TLB_LO_IDX(x, va) (((va) >> PAGE_SHIFT) & 1) |
252 | #define TLB_IS_VALID(x, va) ((x).tlb_lo[TLB_LO_IDX(x, va)] & ENTRYLO_V) | 248 | #define TLB_IS_VALID(x, va) ((x).tlb_lo[TLB_LO_IDX(x, va)] & ENTRYLO_V) |
249 | #define TLB_IS_DIRTY(x, va) ((x).tlb_lo[TLB_LO_IDX(x, va)] & ENTRYLO_D) | ||
253 | #define TLB_HI_VPN2_HIT(x, y) ((TLB_VPN2(x) & ~(x).tlb_mask) == \ | 250 | #define TLB_HI_VPN2_HIT(x, y) ((TLB_VPN2(x) & ~(x).tlb_mask) == \ |
254 | ((y) & VPN2_MASK & ~(x).tlb_mask)) | 251 | ((y) & VPN2_MASK & ~(x).tlb_mask)) |
255 | #define TLB_HI_ASID_HIT(x, y) (TLB_IS_GLOBAL(x) || \ | 252 | #define TLB_HI_ASID_HIT(x, y) (TLB_IS_GLOBAL(x) || \ |
@@ -261,6 +258,17 @@ struct kvm_mips_tlb { | |||
261 | long tlb_lo[2]; | 258 | long tlb_lo[2]; |
262 | }; | 259 | }; |
263 | 260 | ||
261 | #define KVM_NR_MEM_OBJS 4 | ||
262 | |||
263 | /* | ||
264 | * We don't want allocation failures within the mmu code, so we preallocate | ||
265 | * enough memory for a single page fault in a cache. | ||
266 | */ | ||
267 | struct kvm_mmu_memory_cache { | ||
268 | int nobjs; | ||
269 | void *objects[KVM_NR_MEM_OBJS]; | ||
270 | }; | ||
271 | |||
264 | #define KVM_MIPS_AUX_FPU 0x1 | 272 | #define KVM_MIPS_AUX_FPU 0x1 |
265 | #define KVM_MIPS_AUX_MSA 0x2 | 273 | #define KVM_MIPS_AUX_MSA 0x2 |
266 | 274 | ||
@@ -275,6 +283,8 @@ struct kvm_vcpu_arch { | |||
275 | unsigned long host_cp0_badvaddr; | 283 | unsigned long host_cp0_badvaddr; |
276 | unsigned long host_cp0_epc; | 284 | unsigned long host_cp0_epc; |
277 | u32 host_cp0_cause; | 285 | u32 host_cp0_cause; |
286 | u32 host_cp0_badinstr; | ||
287 | u32 host_cp0_badinstrp; | ||
278 | 288 | ||
279 | /* GPRS */ | 289 | /* GPRS */ |
280 | unsigned long gprs[32]; | 290 | unsigned long gprs[32]; |
@@ -318,20 +328,18 @@ struct kvm_vcpu_arch { | |||
318 | /* Bitmask of pending exceptions to be cleared */ | 328 | /* Bitmask of pending exceptions to be cleared */ |
319 | unsigned long pending_exceptions_clr; | 329 | unsigned long pending_exceptions_clr; |
320 | 330 | ||
321 | /* Save/Restore the entryhi register when are are preempted/scheduled back in */ | ||
322 | unsigned long preempt_entryhi; | ||
323 | |||
324 | /* S/W Based TLB for guest */ | 331 | /* S/W Based TLB for guest */ |
325 | struct kvm_mips_tlb guest_tlb[KVM_MIPS_GUEST_TLB_SIZE]; | 332 | struct kvm_mips_tlb guest_tlb[KVM_MIPS_GUEST_TLB_SIZE]; |
326 | 333 | ||
327 | /* Cached guest kernel/user ASIDs */ | 334 | /* Guest kernel/user [partial] mm */ |
328 | u32 guest_user_asid[NR_CPUS]; | ||
329 | u32 guest_kernel_asid[NR_CPUS]; | ||
330 | struct mm_struct guest_kernel_mm, guest_user_mm; | 335 | struct mm_struct guest_kernel_mm, guest_user_mm; |
331 | 336 | ||
332 | /* Guest ASID of last user mode execution */ | 337 | /* Guest ASID of last user mode execution */ |
333 | unsigned int last_user_gasid; | 338 | unsigned int last_user_gasid; |
334 | 339 | ||
340 | /* Cache some mmu pages needed inside spinlock regions */ | ||
341 | struct kvm_mmu_memory_cache mmu_page_cache; | ||
342 | |||
335 | int last_sched_cpu; | 343 | int last_sched_cpu; |
336 | 344 | ||
337 | /* WAIT executed */ | 345 | /* WAIT executed */ |
@@ -339,14 +347,15 @@ struct kvm_vcpu_arch { | |||
339 | 347 | ||
340 | u8 fpu_enabled; | 348 | u8 fpu_enabled; |
341 | u8 msa_enabled; | 349 | u8 msa_enabled; |
342 | u8 kscratch_enabled; | ||
343 | }; | 350 | }; |
344 | 351 | ||
345 | 352 | ||
346 | #define kvm_read_c0_guest_index(cop0) (cop0->reg[MIPS_CP0_TLB_INDEX][0]) | 353 | #define kvm_read_c0_guest_index(cop0) (cop0->reg[MIPS_CP0_TLB_INDEX][0]) |
347 | #define kvm_write_c0_guest_index(cop0, val) (cop0->reg[MIPS_CP0_TLB_INDEX][0] = val) | 354 | #define kvm_write_c0_guest_index(cop0, val) (cop0->reg[MIPS_CP0_TLB_INDEX][0] = val) |
348 | #define kvm_read_c0_guest_entrylo0(cop0) (cop0->reg[MIPS_CP0_TLB_LO0][0]) | 355 | #define kvm_read_c0_guest_entrylo0(cop0) (cop0->reg[MIPS_CP0_TLB_LO0][0]) |
356 | #define kvm_write_c0_guest_entrylo0(cop0, val) (cop0->reg[MIPS_CP0_TLB_LO0][0] = (val)) | ||
349 | #define kvm_read_c0_guest_entrylo1(cop0) (cop0->reg[MIPS_CP0_TLB_LO1][0]) | 357 | #define kvm_read_c0_guest_entrylo1(cop0) (cop0->reg[MIPS_CP0_TLB_LO1][0]) |
358 | #define kvm_write_c0_guest_entrylo1(cop0, val) (cop0->reg[MIPS_CP0_TLB_LO1][0] = (val)) | ||
350 | #define kvm_read_c0_guest_context(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0]) | 359 | #define kvm_read_c0_guest_context(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0]) |
351 | #define kvm_write_c0_guest_context(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val)) | 360 | #define kvm_write_c0_guest_context(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val)) |
352 | #define kvm_read_c0_guest_userlocal(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2]) | 361 | #define kvm_read_c0_guest_userlocal(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2]) |
@@ -522,9 +531,17 @@ struct kvm_mips_callbacks { | |||
522 | int (*handle_msa_fpe)(struct kvm_vcpu *vcpu); | 531 | int (*handle_msa_fpe)(struct kvm_vcpu *vcpu); |
523 | int (*handle_fpe)(struct kvm_vcpu *vcpu); | 532 | int (*handle_fpe)(struct kvm_vcpu *vcpu); |
524 | int (*handle_msa_disabled)(struct kvm_vcpu *vcpu); | 533 | int (*handle_msa_disabled)(struct kvm_vcpu *vcpu); |
525 | int (*vm_init)(struct kvm *kvm); | ||
526 | int (*vcpu_init)(struct kvm_vcpu *vcpu); | 534 | int (*vcpu_init)(struct kvm_vcpu *vcpu); |
535 | void (*vcpu_uninit)(struct kvm_vcpu *vcpu); | ||
527 | int (*vcpu_setup)(struct kvm_vcpu *vcpu); | 536 | int (*vcpu_setup)(struct kvm_vcpu *vcpu); |
537 | void (*flush_shadow_all)(struct kvm *kvm); | ||
538 | /* | ||
539 | * Must take care of flushing any cached GPA PTEs (e.g. guest entries in | ||
540 | * VZ root TLB, or T&E GVA page tables and corresponding root TLB | ||
541 | * mappings). | ||
542 | */ | ||
543 | void (*flush_shadow_memslot)(struct kvm *kvm, | ||
544 | const struct kvm_memory_slot *slot); | ||
528 | gpa_t (*gva_to_gpa)(gva_t gva); | 545 | gpa_t (*gva_to_gpa)(gva_t gva); |
529 | void (*queue_timer_int)(struct kvm_vcpu *vcpu); | 546 | void (*queue_timer_int)(struct kvm_vcpu *vcpu); |
530 | void (*dequeue_timer_int)(struct kvm_vcpu *vcpu); | 547 | void (*dequeue_timer_int)(struct kvm_vcpu *vcpu); |
@@ -542,8 +559,10 @@ struct kvm_mips_callbacks { | |||
542 | const struct kvm_one_reg *reg, s64 *v); | 559 | const struct kvm_one_reg *reg, s64 *v); |
543 | int (*set_one_reg)(struct kvm_vcpu *vcpu, | 560 | int (*set_one_reg)(struct kvm_vcpu *vcpu, |
544 | const struct kvm_one_reg *reg, s64 v); | 561 | const struct kvm_one_reg *reg, s64 v); |
545 | int (*vcpu_get_regs)(struct kvm_vcpu *vcpu); | 562 | int (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); |
546 | int (*vcpu_set_regs)(struct kvm_vcpu *vcpu); | 563 | int (*vcpu_put)(struct kvm_vcpu *vcpu, int cpu); |
564 | int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu); | ||
565 | void (*vcpu_reenter)(struct kvm_run *run, struct kvm_vcpu *vcpu); | ||
547 | }; | 566 | }; |
548 | extern struct kvm_mips_callbacks *kvm_mips_callbacks; | 567 | extern struct kvm_mips_callbacks *kvm_mips_callbacks; |
549 | int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks); | 568 | int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks); |
@@ -556,6 +575,7 @@ extern int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu); | |||
556 | /* Building of entry/exception code */ | 575 | /* Building of entry/exception code */ |
557 | int kvm_mips_entry_setup(void); | 576 | int kvm_mips_entry_setup(void); |
558 | void *kvm_mips_build_vcpu_run(void *addr); | 577 | void *kvm_mips_build_vcpu_run(void *addr); |
578 | void *kvm_mips_build_tlb_refill_exception(void *addr, void *handler); | ||
559 | void *kvm_mips_build_exception(void *addr, void *handler); | 579 | void *kvm_mips_build_exception(void *addr, void *handler); |
560 | void *kvm_mips_build_exit(void *addr); | 580 | void *kvm_mips_build_exit(void *addr); |
561 | 581 | ||
@@ -580,54 +600,125 @@ u32 kvm_get_user_asid(struct kvm_vcpu *vcpu); | |||
580 | u32 kvm_get_commpage_asid (struct kvm_vcpu *vcpu); | 600 | u32 kvm_get_commpage_asid (struct kvm_vcpu *vcpu); |
581 | 601 | ||
582 | extern int kvm_mips_handle_kseg0_tlb_fault(unsigned long badbaddr, | 602 | extern int kvm_mips_handle_kseg0_tlb_fault(unsigned long badbaddr, |
583 | struct kvm_vcpu *vcpu); | 603 | struct kvm_vcpu *vcpu, |
604 | bool write_fault); | ||
584 | 605 | ||
585 | extern int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, | 606 | extern int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, |
586 | struct kvm_vcpu *vcpu); | 607 | struct kvm_vcpu *vcpu); |
587 | 608 | ||
588 | extern int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, | 609 | extern int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, |
589 | struct kvm_mips_tlb *tlb); | 610 | struct kvm_mips_tlb *tlb, |
611 | unsigned long gva, | ||
612 | bool write_fault); | ||
590 | 613 | ||
591 | extern enum emulation_result kvm_mips_handle_tlbmiss(u32 cause, | 614 | extern enum emulation_result kvm_mips_handle_tlbmiss(u32 cause, |
592 | u32 *opc, | 615 | u32 *opc, |
593 | struct kvm_run *run, | 616 | struct kvm_run *run, |
594 | struct kvm_vcpu *vcpu); | 617 | struct kvm_vcpu *vcpu, |
595 | 618 | bool write_fault); | |
596 | extern enum emulation_result kvm_mips_handle_tlbmod(u32 cause, | ||
597 | u32 *opc, | ||
598 | struct kvm_run *run, | ||
599 | struct kvm_vcpu *vcpu); | ||
600 | 619 | ||
601 | extern void kvm_mips_dump_host_tlbs(void); | 620 | extern void kvm_mips_dump_host_tlbs(void); |
602 | extern void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu); | 621 | extern void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu); |
603 | extern int kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi, | 622 | extern int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi, |
604 | unsigned long entrylo0, | 623 | bool user, bool kernel); |
605 | unsigned long entrylo1, | ||
606 | int flush_dcache_mask); | ||
607 | extern void kvm_mips_flush_host_tlb(int skip_kseg0); | ||
608 | extern int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi); | ||
609 | 624 | ||
610 | extern int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, | 625 | extern int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, |
611 | unsigned long entryhi); | 626 | unsigned long entryhi); |
612 | extern int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr); | 627 | |
613 | extern unsigned long kvm_mips_translate_guest_kseg0_to_hpa(struct kvm_vcpu *vcpu, | 628 | void kvm_mips_suspend_mm(int cpu); |
614 | unsigned long gva); | 629 | void kvm_mips_resume_mm(int cpu); |
615 | extern void kvm_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu, | 630 | |
616 | struct kvm_vcpu *vcpu); | 631 | /* MMU handling */ |
617 | extern void kvm_local_flush_tlb_all(void); | 632 | |
618 | extern void kvm_mips_alloc_new_mmu_context(struct kvm_vcpu *vcpu); | 633 | /** |
619 | extern void kvm_mips_vcpu_load(struct kvm_vcpu *vcpu, int cpu); | 634 | * enum kvm_mips_flush - Types of MMU flushes. |
620 | extern void kvm_mips_vcpu_put(struct kvm_vcpu *vcpu); | 635 | * @KMF_USER: Flush guest user virtual memory mappings. |
636 | * Guest USeg only. | ||
637 | * @KMF_KERN: Flush guest kernel virtual memory mappings. | ||
638 | * Guest USeg and KSeg2/3. | ||
639 | * @KMF_GPA: Flush guest physical memory mappings. | ||
640 | * Also includes KSeg0 if KMF_KERN is set. | ||
641 | */ | ||
642 | enum kvm_mips_flush { | ||
643 | KMF_USER = 0x0, | ||
644 | KMF_KERN = 0x1, | ||
645 | KMF_GPA = 0x2, | ||
646 | }; | ||
647 | void kvm_mips_flush_gva_pt(pgd_t *pgd, enum kvm_mips_flush flags); | ||
648 | bool kvm_mips_flush_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn); | ||
649 | int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn); | ||
650 | pgd_t *kvm_pgd_alloc(void); | ||
651 | void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); | ||
652 | void kvm_trap_emul_invalidate_gva(struct kvm_vcpu *vcpu, unsigned long addr, | ||
653 | bool user); | ||
654 | void kvm_trap_emul_gva_lockless_begin(struct kvm_vcpu *vcpu); | ||
655 | void kvm_trap_emul_gva_lockless_end(struct kvm_vcpu *vcpu); | ||
656 | |||
657 | enum kvm_mips_fault_result { | ||
658 | KVM_MIPS_MAPPED = 0, | ||
659 | KVM_MIPS_GVA, | ||
660 | KVM_MIPS_GPA, | ||
661 | KVM_MIPS_TLB, | ||
662 | KVM_MIPS_TLBINV, | ||
663 | KVM_MIPS_TLBMOD, | ||
664 | }; | ||
665 | enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu, | ||
666 | unsigned long gva, | ||
667 | bool write); | ||
668 | |||
669 | #define KVM_ARCH_WANT_MMU_NOTIFIER | ||
670 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); | ||
671 | int kvm_unmap_hva_range(struct kvm *kvm, | ||
672 | unsigned long start, unsigned long end); | ||
673 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); | ||
674 | int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); | ||
675 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); | ||
676 | |||
677 | static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, | ||
678 | unsigned long address) | ||
679 | { | ||
680 | } | ||
621 | 681 | ||
622 | /* Emulation */ | 682 | /* Emulation */ |
623 | u32 kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu); | 683 | int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out); |
624 | enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause); | 684 | enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause); |
685 | int kvm_get_badinstr(u32 *opc, struct kvm_vcpu *vcpu, u32 *out); | ||
686 | int kvm_get_badinstrp(u32 *opc, struct kvm_vcpu *vcpu, u32 *out); | ||
687 | |||
688 | /** | ||
689 | * kvm_is_ifetch_fault() - Find whether a TLBL exception is due to ifetch fault. | ||
690 | * @vcpu: Virtual CPU. | ||
691 | * | ||
692 | * Returns: Whether the TLBL exception was likely due to an instruction | ||
693 | * fetch fault rather than a data load fault. | ||
694 | */ | ||
695 | static inline bool kvm_is_ifetch_fault(struct kvm_vcpu_arch *vcpu) | ||
696 | { | ||
697 | unsigned long badvaddr = vcpu->host_cp0_badvaddr; | ||
698 | unsigned long epc = msk_isa16_mode(vcpu->pc); | ||
699 | u32 cause = vcpu->host_cp0_cause; | ||
700 | |||
701 | if (epc == badvaddr) | ||
702 | return true; | ||
703 | |||
704 | /* | ||
705 | * Branches may be 32-bit or 16-bit instructions. | ||
706 | * This isn't exact, but we don't really support MIPS16 or microMIPS yet | ||
707 | * in KVM anyway. | ||
708 | */ | ||
709 | if ((cause & CAUSEF_BD) && badvaddr - epc <= 4) | ||
710 | return true; | ||
711 | |||
712 | return false; | ||
713 | } | ||
625 | 714 | ||
626 | extern enum emulation_result kvm_mips_emulate_inst(u32 cause, | 715 | extern enum emulation_result kvm_mips_emulate_inst(u32 cause, |
627 | u32 *opc, | 716 | u32 *opc, |
628 | struct kvm_run *run, | 717 | struct kvm_run *run, |
629 | struct kvm_vcpu *vcpu); | 718 | struct kvm_vcpu *vcpu); |
630 | 719 | ||
720 | long kvm_mips_guest_exception_base(struct kvm_vcpu *vcpu); | ||
721 | |||
631 | extern enum emulation_result kvm_mips_emulate_syscall(u32 cause, | 722 | extern enum emulation_result kvm_mips_emulate_syscall(u32 cause, |
632 | u32 *opc, | 723 | u32 *opc, |
633 | struct kvm_run *run, | 724 | struct kvm_run *run, |
@@ -761,10 +852,6 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {} | |||
761 | static inline void kvm_arch_free_memslot(struct kvm *kvm, | 852 | static inline void kvm_arch_free_memslot(struct kvm *kvm, |
762 | struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} | 853 | struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} |
763 | static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} | 854 | static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} |
764 | static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} | ||
765 | static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | ||
766 | struct kvm_memory_slot *slot) {} | ||
767 | static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} | ||
768 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} | 855 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} |
769 | static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} | 856 | static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} |
770 | static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} | 857 | static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} |
diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h index ddd57ade1aa8..2abf94f72c0a 100644 --- a/arch/mips/include/asm/mmu_context.h +++ b/arch/mips/include/asm/mmu_context.h | |||
@@ -29,9 +29,11 @@ do { \ | |||
29 | } \ | 29 | } \ |
30 | } while (0) | 30 | } while (0) |
31 | 31 | ||
32 | extern void tlbmiss_handler_setup_pgd(unsigned long); | ||
33 | |||
34 | /* Note: This is also implemented with uasm in arch/mips/kvm/entry.c */ | ||
32 | #define TLBMISS_HANDLER_SETUP_PGD(pgd) \ | 35 | #define TLBMISS_HANDLER_SETUP_PGD(pgd) \ |
33 | do { \ | 36 | do { \ |
34 | extern void tlbmiss_handler_setup_pgd(unsigned long); \ | ||
35 | tlbmiss_handler_setup_pgd((unsigned long)(pgd)); \ | 37 | tlbmiss_handler_setup_pgd((unsigned long)(pgd)); \ |
36 | htw_set_pwbase((unsigned long)pgd); \ | 38 | htw_set_pwbase((unsigned long)pgd); \ |
37 | } while (0) | 39 | } while (0) |
@@ -97,17 +99,12 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) | |||
97 | static inline void | 99 | static inline void |
98 | get_new_mmu_context(struct mm_struct *mm, unsigned long cpu) | 100 | get_new_mmu_context(struct mm_struct *mm, unsigned long cpu) |
99 | { | 101 | { |
100 | extern void kvm_local_flush_tlb_all(void); | ||
101 | unsigned long asid = asid_cache(cpu); | 102 | unsigned long asid = asid_cache(cpu); |
102 | 103 | ||
103 | if (!((asid += cpu_asid_inc()) & cpu_asid_mask(&cpu_data[cpu]))) { | 104 | if (!((asid += cpu_asid_inc()) & cpu_asid_mask(&cpu_data[cpu]))) { |
104 | if (cpu_has_vtag_icache) | 105 | if (cpu_has_vtag_icache) |
105 | flush_icache_all(); | 106 | flush_icache_all(); |
106 | #ifdef CONFIG_KVM | ||
107 | kvm_local_flush_tlb_all(); /* start new asid cycle */ | ||
108 | #else | ||
109 | local_flush_tlb_all(); /* start new asid cycle */ | 107 | local_flush_tlb_all(); /* start new asid cycle */ |
110 | #endif | ||
111 | if (!asid) /* fix version if needed */ | 108 | if (!asid) /* fix version if needed */ |
112 | asid = asid_first_version(cpu); | 109 | asid = asid_first_version(cpu); |
113 | } | 110 | } |
diff --git a/arch/mips/include/uapi/asm/kvm.h b/arch/mips/include/uapi/asm/kvm.h index 6985eb59b085..a8a0199bf760 100644 --- a/arch/mips/include/uapi/asm/kvm.h +++ b/arch/mips/include/uapi/asm/kvm.h | |||
@@ -19,6 +19,8 @@ | |||
19 | * Some parts derived from the x86 version of this file. | 19 | * Some parts derived from the x86 version of this file. |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #define __KVM_HAVE_READONLY_MEM | ||
23 | |||
22 | /* | 24 | /* |
23 | * for KVM_GET_REGS and KVM_SET_REGS | 25 | * for KVM_GET_REGS and KVM_SET_REGS |
24 | * | 26 | * |
diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig index 7c56d6b124d1..65067327db12 100644 --- a/arch/mips/kvm/Kconfig +++ b/arch/mips/kvm/Kconfig | |||
@@ -20,7 +20,9 @@ config KVM | |||
20 | select EXPORT_UASM | 20 | select EXPORT_UASM |
21 | select PREEMPT_NOTIFIERS | 21 | select PREEMPT_NOTIFIERS |
22 | select ANON_INODES | 22 | select ANON_INODES |
23 | select KVM_GENERIC_DIRTYLOG_READ_PROTECT | ||
23 | select KVM_MMIO | 24 | select KVM_MMIO |
25 | select MMU_NOTIFIER | ||
24 | select SRCU | 26 | select SRCU |
25 | ---help--- | 27 | ---help--- |
26 | Support for hosting Guest kernels. | 28 | Support for hosting Guest kernels. |
diff --git a/arch/mips/kvm/dyntrans.c b/arch/mips/kvm/dyntrans.c index 010cef240688..f8e772564d74 100644 --- a/arch/mips/kvm/dyntrans.c +++ b/arch/mips/kvm/dyntrans.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/err.h> | 13 | #include <linux/err.h> |
14 | #include <linux/highmem.h> | 14 | #include <linux/highmem.h> |
15 | #include <linux/kvm_host.h> | 15 | #include <linux/kvm_host.h> |
16 | #include <linux/uaccess.h> | ||
16 | #include <linux/vmalloc.h> | 17 | #include <linux/vmalloc.h> |
17 | #include <linux/fs.h> | 18 | #include <linux/fs.h> |
18 | #include <linux/bootmem.h> | 19 | #include <linux/bootmem.h> |
@@ -29,28 +30,37 @@ | |||
29 | static int kvm_mips_trans_replace(struct kvm_vcpu *vcpu, u32 *opc, | 30 | static int kvm_mips_trans_replace(struct kvm_vcpu *vcpu, u32 *opc, |
30 | union mips_instruction replace) | 31 | union mips_instruction replace) |
31 | { | 32 | { |
32 | unsigned long paddr, flags; | 33 | unsigned long vaddr = (unsigned long)opc; |
33 | void *vaddr; | 34 | int err; |
34 | 35 | ||
35 | if (KVM_GUEST_KSEGX((unsigned long)opc) == KVM_GUEST_KSEG0) { | 36 | retry: |
36 | paddr = kvm_mips_translate_guest_kseg0_to_hpa(vcpu, | 37 | /* The GVA page table is still active so use the Linux TLB handlers */ |
37 | (unsigned long)opc); | 38 | kvm_trap_emul_gva_lockless_begin(vcpu); |
38 | vaddr = kmap_atomic(pfn_to_page(PHYS_PFN(paddr))); | 39 | err = put_user(replace.word, opc); |
39 | vaddr += paddr & ~PAGE_MASK; | 40 | kvm_trap_emul_gva_lockless_end(vcpu); |
40 | memcpy(vaddr, (void *)&replace, sizeof(u32)); | 41 | |
41 | local_flush_icache_range((unsigned long)vaddr, | 42 | if (unlikely(err)) { |
42 | (unsigned long)vaddr + 32); | 43 | /* |
43 | kunmap_atomic(vaddr); | 44 | * We write protect clean pages in GVA page table so normal |
44 | } else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) { | 45 | * Linux TLB mod handler doesn't silently dirty the page. |
45 | local_irq_save(flags); | 46 | * Its also possible we raced with a GVA invalidation. |
46 | memcpy((void *)opc, (void *)&replace, sizeof(u32)); | 47 | * Try to force the page to become dirty. |
47 | __local_flush_icache_user_range((unsigned long)opc, | 48 | */ |
48 | (unsigned long)opc + 32); | 49 | err = kvm_trap_emul_gva_fault(vcpu, vaddr, true); |
49 | local_irq_restore(flags); | 50 | if (unlikely(err)) { |
50 | } else { | 51 | kvm_info("%s: Address unwriteable: %p\n", |
51 | kvm_err("%s: Invalid address: %p\n", __func__, opc); | 52 | __func__, opc); |
52 | return -EFAULT; | 53 | return -EFAULT; |
54 | } | ||
55 | |||
56 | /* | ||
57 | * Try again. This will likely trigger a TLB refill, which will | ||
58 | * fetch the new dirty entry from the GVA page table, which | ||
59 | * should then succeed. | ||
60 | */ | ||
61 | goto retry; | ||
53 | } | 62 | } |
63 | __local_flush_icache_user_range(vaddr, vaddr + 4); | ||
54 | 64 | ||
55 | return 0; | 65 | return 0; |
56 | } | 66 | } |
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index aa0937423e28..d40cfaad4529 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c | |||
@@ -38,23 +38,25 @@ | |||
38 | * Compute the return address and do emulate branch simulation, if required. | 38 | * Compute the return address and do emulate branch simulation, if required. |
39 | * This function should be called only in branch delay slot active. | 39 | * This function should be called only in branch delay slot active. |
40 | */ | 40 | */ |
41 | unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, | 41 | static int kvm_compute_return_epc(struct kvm_vcpu *vcpu, unsigned long instpc, |
42 | unsigned long instpc) | 42 | unsigned long *out) |
43 | { | 43 | { |
44 | unsigned int dspcontrol; | 44 | unsigned int dspcontrol; |
45 | union mips_instruction insn; | 45 | union mips_instruction insn; |
46 | struct kvm_vcpu_arch *arch = &vcpu->arch; | 46 | struct kvm_vcpu_arch *arch = &vcpu->arch; |
47 | long epc = instpc; | 47 | long epc = instpc; |
48 | long nextpc = KVM_INVALID_INST; | 48 | long nextpc; |
49 | int err; | ||
49 | 50 | ||
50 | if (epc & 3) | 51 | if (epc & 3) { |
51 | goto unaligned; | 52 | kvm_err("%s: unaligned epc\n", __func__); |
53 | return -EINVAL; | ||
54 | } | ||
52 | 55 | ||
53 | /* Read the instruction */ | 56 | /* Read the instruction */ |
54 | insn.word = kvm_get_inst((u32 *) epc, vcpu); | 57 | err = kvm_get_badinstrp((u32 *)epc, vcpu, &insn.word); |
55 | 58 | if (err) | |
56 | if (insn.word == KVM_INVALID_INST) | 59 | return err; |
57 | return KVM_INVALID_INST; | ||
58 | 60 | ||
59 | switch (insn.i_format.opcode) { | 61 | switch (insn.i_format.opcode) { |
60 | /* jr and jalr are in r_format format. */ | 62 | /* jr and jalr are in r_format format. */ |
@@ -66,6 +68,8 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, | |||
66 | case jr_op: | 68 | case jr_op: |
67 | nextpc = arch->gprs[insn.r_format.rs]; | 69 | nextpc = arch->gprs[insn.r_format.rs]; |
68 | break; | 70 | break; |
71 | default: | ||
72 | return -EINVAL; | ||
69 | } | 73 | } |
70 | break; | 74 | break; |
71 | 75 | ||
@@ -114,8 +118,11 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, | |||
114 | nextpc = epc; | 118 | nextpc = epc; |
115 | break; | 119 | break; |
116 | case bposge32_op: | 120 | case bposge32_op: |
117 | if (!cpu_has_dsp) | 121 | if (!cpu_has_dsp) { |
118 | goto sigill; | 122 | kvm_err("%s: DSP branch but not DSP ASE\n", |
123 | __func__); | ||
124 | return -EINVAL; | ||
125 | } | ||
119 | 126 | ||
120 | dspcontrol = rddsp(0x01); | 127 | dspcontrol = rddsp(0x01); |
121 | 128 | ||
@@ -125,6 +132,8 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, | |||
125 | epc += 8; | 132 | epc += 8; |
126 | nextpc = epc; | 133 | nextpc = epc; |
127 | break; | 134 | break; |
135 | default: | ||
136 | return -EINVAL; | ||
128 | } | 137 | } |
129 | break; | 138 | break; |
130 | 139 | ||
@@ -189,7 +198,7 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, | |||
189 | /* And now the FPA/cp1 branch instructions. */ | 198 | /* And now the FPA/cp1 branch instructions. */ |
190 | case cop1_op: | 199 | case cop1_op: |
191 | kvm_err("%s: unsupported cop1_op\n", __func__); | 200 | kvm_err("%s: unsupported cop1_op\n", __func__); |
192 | break; | 201 | return -EINVAL; |
193 | 202 | ||
194 | #ifdef CONFIG_CPU_MIPSR6 | 203 | #ifdef CONFIG_CPU_MIPSR6 |
195 | /* R6 added the following compact branches with forbidden slots */ | 204 | /* R6 added the following compact branches with forbidden slots */ |
@@ -198,19 +207,19 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, | |||
198 | /* only rt == 0 isn't compact branch */ | 207 | /* only rt == 0 isn't compact branch */ |
199 | if (insn.i_format.rt != 0) | 208 | if (insn.i_format.rt != 0) |
200 | goto compact_branch; | 209 | goto compact_branch; |
201 | break; | 210 | return -EINVAL; |
202 | case pop10_op: | 211 | case pop10_op: |
203 | case pop30_op: | 212 | case pop30_op: |
204 | /* only rs == rt == 0 is reserved, rest are compact branches */ | 213 | /* only rs == rt == 0 is reserved, rest are compact branches */ |
205 | if (insn.i_format.rs != 0 || insn.i_format.rt != 0) | 214 | if (insn.i_format.rs != 0 || insn.i_format.rt != 0) |
206 | goto compact_branch; | 215 | goto compact_branch; |
207 | break; | 216 | return -EINVAL; |
208 | case pop66_op: | 217 | case pop66_op: |
209 | case pop76_op: | 218 | case pop76_op: |
210 | /* only rs == 0 isn't compact branch */ | 219 | /* only rs == 0 isn't compact branch */ |
211 | if (insn.i_format.rs != 0) | 220 | if (insn.i_format.rs != 0) |
212 | goto compact_branch; | 221 | goto compact_branch; |
213 | break; | 222 | return -EINVAL; |
214 | compact_branch: | 223 | compact_branch: |
215 | /* | 224 | /* |
216 | * If we've hit an exception on the forbidden slot, then | 225 | * If we've hit an exception on the forbidden slot, then |
@@ -221,42 +230,74 @@ compact_branch: | |||
221 | break; | 230 | break; |
222 | #else | 231 | #else |
223 | compact_branch: | 232 | compact_branch: |
224 | /* Compact branches not supported before R6 */ | 233 | /* Fall through - Compact branches not supported before R6 */ |
225 | break; | ||
226 | #endif | 234 | #endif |
235 | default: | ||
236 | return -EINVAL; | ||
227 | } | 237 | } |
228 | 238 | ||
229 | return nextpc; | 239 | *out = nextpc; |
230 | 240 | return 0; | |
231 | unaligned: | ||
232 | kvm_err("%s: unaligned epc\n", __func__); | ||
233 | return nextpc; | ||
234 | |||
235 | sigill: | ||
236 | kvm_err("%s: DSP branch but not DSP ASE\n", __func__); | ||
237 | return nextpc; | ||
238 | } | 241 | } |
239 | 242 | ||
240 | enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause) | 243 | enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause) |
241 | { | 244 | { |
242 | unsigned long branch_pc; | 245 | int err; |
243 | enum emulation_result er = EMULATE_DONE; | ||
244 | 246 | ||
245 | if (cause & CAUSEF_BD) { | 247 | if (cause & CAUSEF_BD) { |
246 | branch_pc = kvm_compute_return_epc(vcpu, vcpu->arch.pc); | 248 | err = kvm_compute_return_epc(vcpu, vcpu->arch.pc, |
247 | if (branch_pc == KVM_INVALID_INST) { | 249 | &vcpu->arch.pc); |
248 | er = EMULATE_FAIL; | 250 | if (err) |
249 | } else { | 251 | return EMULATE_FAIL; |
250 | vcpu->arch.pc = branch_pc; | 252 | } else { |
251 | kvm_debug("BD update_pc(): New PC: %#lx\n", | ||
252 | vcpu->arch.pc); | ||
253 | } | ||
254 | } else | ||
255 | vcpu->arch.pc += 4; | 253 | vcpu->arch.pc += 4; |
254 | } | ||
256 | 255 | ||
257 | kvm_debug("update_pc(): New PC: %#lx\n", vcpu->arch.pc); | 256 | kvm_debug("update_pc(): New PC: %#lx\n", vcpu->arch.pc); |
258 | 257 | ||
259 | return er; | 258 | return EMULATE_DONE; |
259 | } | ||
260 | |||
261 | /** | ||
262 | * kvm_get_badinstr() - Get bad instruction encoding. | ||
263 | * @opc: Guest pointer to faulting instruction. | ||
264 | * @vcpu: KVM VCPU information. | ||
265 | * | ||
266 | * Gets the instruction encoding of the faulting instruction, using the saved | ||
267 | * BadInstr register value if it exists, otherwise falling back to reading guest | ||
268 | * memory at @opc. | ||
269 | * | ||
270 | * Returns: The instruction encoding of the faulting instruction. | ||
271 | */ | ||
272 | int kvm_get_badinstr(u32 *opc, struct kvm_vcpu *vcpu, u32 *out) | ||
273 | { | ||
274 | if (cpu_has_badinstr) { | ||
275 | *out = vcpu->arch.host_cp0_badinstr; | ||
276 | return 0; | ||
277 | } else { | ||
278 | return kvm_get_inst(opc, vcpu, out); | ||
279 | } | ||
280 | } | ||
281 | |||
282 | /** | ||
283 | * kvm_get_badinstrp() - Get bad prior instruction encoding. | ||
284 | * @opc: Guest pointer to prior faulting instruction. | ||
285 | * @vcpu: KVM VCPU information. | ||
286 | * | ||
287 | * Gets the instruction encoding of the prior faulting instruction (the branch | ||
288 | * containing the delay slot which faulted), using the saved BadInstrP register | ||
289 | * value if it exists, otherwise falling back to reading guest memory at @opc. | ||
290 | * | ||
291 | * Returns: The instruction encoding of the prior faulting instruction. | ||
292 | */ | ||
293 | int kvm_get_badinstrp(u32 *opc, struct kvm_vcpu *vcpu, u32 *out) | ||
294 | { | ||
295 | if (cpu_has_badinstrp) { | ||
296 | *out = vcpu->arch.host_cp0_badinstrp; | ||
297 | return 0; | ||
298 | } else { | ||
299 | return kvm_get_inst(opc, vcpu, out); | ||
300 | } | ||
260 | } | 301 | } |
261 | 302 | ||
262 | /** | 303 | /** |
@@ -856,22 +897,30 @@ enum emulation_result kvm_mips_emul_tlbr(struct kvm_vcpu *vcpu) | |||
856 | static void kvm_mips_invalidate_guest_tlb(struct kvm_vcpu *vcpu, | 897 | static void kvm_mips_invalidate_guest_tlb(struct kvm_vcpu *vcpu, |
857 | struct kvm_mips_tlb *tlb) | 898 | struct kvm_mips_tlb *tlb) |
858 | { | 899 | { |
900 | struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; | ||
901 | struct mm_struct *user_mm = &vcpu->arch.guest_user_mm; | ||
859 | int cpu, i; | 902 | int cpu, i; |
860 | bool user; | 903 | bool user; |
861 | 904 | ||
862 | /* No need to flush for entries which are already invalid */ | 905 | /* No need to flush for entries which are already invalid */ |
863 | if (!((tlb->tlb_lo[0] | tlb->tlb_lo[1]) & ENTRYLO_V)) | 906 | if (!((tlb->tlb_lo[0] | tlb->tlb_lo[1]) & ENTRYLO_V)) |
864 | return; | 907 | return; |
908 | /* Don't touch host kernel page tables or TLB mappings */ | ||
909 | if ((unsigned long)tlb->tlb_hi > 0x7fffffff) | ||
910 | return; | ||
865 | /* User address space doesn't need flushing for KSeg2/3 changes */ | 911 | /* User address space doesn't need flushing for KSeg2/3 changes */ |
866 | user = tlb->tlb_hi < KVM_GUEST_KSEG0; | 912 | user = tlb->tlb_hi < KVM_GUEST_KSEG0; |
867 | 913 | ||
868 | preempt_disable(); | 914 | preempt_disable(); |
869 | 915 | ||
916 | /* Invalidate page table entries */ | ||
917 | kvm_trap_emul_invalidate_gva(vcpu, tlb->tlb_hi & VPN2_MASK, user); | ||
918 | |||
870 | /* | 919 | /* |
871 | * Probe the shadow host TLB for the entry being overwritten, if one | 920 | * Probe the shadow host TLB for the entry being overwritten, if one |
872 | * matches, invalidate it | 921 | * matches, invalidate it |
873 | */ | 922 | */ |
874 | kvm_mips_host_tlb_inv(vcpu, tlb->tlb_hi); | 923 | kvm_mips_host_tlb_inv(vcpu, tlb->tlb_hi, user, true); |
875 | 924 | ||
876 | /* Invalidate the whole ASID on other CPUs */ | 925 | /* Invalidate the whole ASID on other CPUs */ |
877 | cpu = smp_processor_id(); | 926 | cpu = smp_processor_id(); |
@@ -879,8 +928,8 @@ static void kvm_mips_invalidate_guest_tlb(struct kvm_vcpu *vcpu, | |||
879 | if (i == cpu) | 928 | if (i == cpu) |
880 | continue; | 929 | continue; |
881 | if (user) | 930 | if (user) |
882 | vcpu->arch.guest_user_asid[i] = 0; | 931 | cpu_context(i, user_mm) = 0; |
883 | vcpu->arch.guest_kernel_asid[i] = 0; | 932 | cpu_context(i, kern_mm) = 0; |
884 | } | 933 | } |
885 | 934 | ||
886 | preempt_enable(); | 935 | preempt_enable(); |
@@ -1017,7 +1066,7 @@ unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu) | |||
1017 | unsigned int mask = MIPS_CONF_M; | 1066 | unsigned int mask = MIPS_CONF_M; |
1018 | 1067 | ||
1019 | /* KScrExist */ | 1068 | /* KScrExist */ |
1020 | mask |= (unsigned int)vcpu->arch.kscratch_enabled << 16; | 1069 | mask |= 0xfc << MIPS_CONF4_KSCREXIST_SHIFT; |
1021 | 1070 | ||
1022 | return mask; | 1071 | return mask; |
1023 | } | 1072 | } |
@@ -1056,6 +1105,7 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst, | |||
1056 | struct kvm_vcpu *vcpu) | 1105 | struct kvm_vcpu *vcpu) |
1057 | { | 1106 | { |
1058 | struct mips_coproc *cop0 = vcpu->arch.cop0; | 1107 | struct mips_coproc *cop0 = vcpu->arch.cop0; |
1108 | struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; | ||
1059 | enum emulation_result er = EMULATE_DONE; | 1109 | enum emulation_result er = EMULATE_DONE; |
1060 | u32 rt, rd, sel; | 1110 | u32 rt, rd, sel; |
1061 | unsigned long curr_pc; | 1111 | unsigned long curr_pc; |
@@ -1150,14 +1200,13 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst, | |||
1150 | er = EMULATE_FAIL; | 1200 | er = EMULATE_FAIL; |
1151 | break; | 1201 | break; |
1152 | } | 1202 | } |
1153 | #define C0_EBASE_CORE_MASK 0xff | ||
1154 | if ((rd == MIPS_CP0_PRID) && (sel == 1)) { | 1203 | if ((rd == MIPS_CP0_PRID) && (sel == 1)) { |
1155 | /* Preserve CORE number */ | 1204 | /* |
1156 | kvm_change_c0_guest_ebase(cop0, | 1205 | * Preserve core number, and keep the exception |
1157 | ~(C0_EBASE_CORE_MASK), | 1206 | * base in guest KSeg0. |
1207 | */ | ||
1208 | kvm_change_c0_guest_ebase(cop0, 0x1ffff000, | ||
1158 | vcpu->arch.gprs[rt]); | 1209 | vcpu->arch.gprs[rt]); |
1159 | kvm_err("MTCz, cop0->reg[EBASE]: %#lx\n", | ||
1160 | kvm_read_c0_guest_ebase(cop0)); | ||
1161 | } else if (rd == MIPS_CP0_TLB_HI && sel == 0) { | 1210 | } else if (rd == MIPS_CP0_TLB_HI && sel == 0) { |
1162 | u32 nasid = | 1211 | u32 nasid = |
1163 | vcpu->arch.gprs[rt] & KVM_ENTRYHI_ASID; | 1212 | vcpu->arch.gprs[rt] & KVM_ENTRYHI_ASID; |
@@ -1169,6 +1218,17 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst, | |||
1169 | nasid); | 1218 | nasid); |
1170 | 1219 | ||
1171 | /* | 1220 | /* |
1221 | * Flush entries from the GVA page | ||
1222 | * tables. | ||
1223 | * Guest user page table will get | ||
1224 | * flushed lazily on re-entry to guest | ||
1225 | * user if the guest ASID actually | ||
1226 | * changes. | ||
1227 | */ | ||
1228 | kvm_mips_flush_gva_pt(kern_mm->pgd, | ||
1229 | KMF_KERN); | ||
1230 | |||
1231 | /* | ||
1172 | * Regenerate/invalidate kernel MMU | 1232 | * Regenerate/invalidate kernel MMU |
1173 | * context. | 1233 | * context. |
1174 | * The user MMU context will be | 1234 | * The user MMU context will be |
@@ -1178,13 +1238,10 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst, | |||
1178 | */ | 1238 | */ |
1179 | preempt_disable(); | 1239 | preempt_disable(); |
1180 | cpu = smp_processor_id(); | 1240 | cpu = smp_processor_id(); |
1181 | kvm_get_new_mmu_context(&vcpu->arch.guest_kernel_mm, | 1241 | get_new_mmu_context(kern_mm, cpu); |
1182 | cpu, vcpu); | ||
1183 | vcpu->arch.guest_kernel_asid[cpu] = | ||
1184 | vcpu->arch.guest_kernel_mm.context.asid[cpu]; | ||
1185 | for_each_possible_cpu(i) | 1242 | for_each_possible_cpu(i) |
1186 | if (i != cpu) | 1243 | if (i != cpu) |
1187 | vcpu->arch.guest_kernel_asid[i] = 0; | 1244 | cpu_context(i, kern_mm) = 0; |
1188 | preempt_enable(); | 1245 | preempt_enable(); |
1189 | } | 1246 | } |
1190 | kvm_write_c0_guest_entryhi(cop0, | 1247 | kvm_write_c0_guest_entryhi(cop0, |
@@ -1639,12 +1696,56 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, | |||
1639 | return er; | 1696 | return er; |
1640 | } | 1697 | } |
1641 | 1698 | ||
1699 | static enum emulation_result kvm_mips_guest_cache_op(int (*fn)(unsigned long), | ||
1700 | unsigned long curr_pc, | ||
1701 | unsigned long addr, | ||
1702 | struct kvm_run *run, | ||
1703 | struct kvm_vcpu *vcpu, | ||
1704 | u32 cause) | ||
1705 | { | ||
1706 | int err; | ||
1707 | |||
1708 | for (;;) { | ||
1709 | /* Carefully attempt the cache operation */ | ||
1710 | kvm_trap_emul_gva_lockless_begin(vcpu); | ||
1711 | err = fn(addr); | ||
1712 | kvm_trap_emul_gva_lockless_end(vcpu); | ||
1713 | |||
1714 | if (likely(!err)) | ||
1715 | return EMULATE_DONE; | ||
1716 | |||
1717 | /* | ||
1718 | * Try to handle the fault and retry, maybe we just raced with a | ||
1719 | * GVA invalidation. | ||
1720 | */ | ||
1721 | switch (kvm_trap_emul_gva_fault(vcpu, addr, false)) { | ||
1722 | case KVM_MIPS_GVA: | ||
1723 | case KVM_MIPS_GPA: | ||
1724 | /* bad virtual or physical address */ | ||
1725 | return EMULATE_FAIL; | ||
1726 | case KVM_MIPS_TLB: | ||
1727 | /* no matching guest TLB */ | ||
1728 | vcpu->arch.host_cp0_badvaddr = addr; | ||
1729 | vcpu->arch.pc = curr_pc; | ||
1730 | kvm_mips_emulate_tlbmiss_ld(cause, NULL, run, vcpu); | ||
1731 | return EMULATE_EXCEPT; | ||
1732 | case KVM_MIPS_TLBINV: | ||
1733 | /* invalid matching guest TLB */ | ||
1734 | vcpu->arch.host_cp0_badvaddr = addr; | ||
1735 | vcpu->arch.pc = curr_pc; | ||
1736 | kvm_mips_emulate_tlbinv_ld(cause, NULL, run, vcpu); | ||
1737 | return EMULATE_EXCEPT; | ||
1738 | default: | ||
1739 | break; | ||
1740 | }; | ||
1741 | } | ||
1742 | } | ||
1743 | |||
1642 | enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst, | 1744 | enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst, |
1643 | u32 *opc, u32 cause, | 1745 | u32 *opc, u32 cause, |
1644 | struct kvm_run *run, | 1746 | struct kvm_run *run, |
1645 | struct kvm_vcpu *vcpu) | 1747 | struct kvm_vcpu *vcpu) |
1646 | { | 1748 | { |
1647 | struct mips_coproc *cop0 = vcpu->arch.cop0; | ||
1648 | enum emulation_result er = EMULATE_DONE; | 1749 | enum emulation_result er = EMULATE_DONE; |
1649 | u32 cache, op_inst, op, base; | 1750 | u32 cache, op_inst, op, base; |
1650 | s16 offset; | 1751 | s16 offset; |
@@ -1701,80 +1802,16 @@ enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst, | |||
1701 | goto done; | 1802 | goto done; |
1702 | } | 1803 | } |
1703 | 1804 | ||
1704 | preempt_disable(); | ||
1705 | if (KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG0) { | ||
1706 | if (kvm_mips_host_tlb_lookup(vcpu, va) < 0 && | ||
1707 | kvm_mips_handle_kseg0_tlb_fault(va, vcpu)) { | ||
1708 | kvm_err("%s: handling mapped kseg0 tlb fault for %lx, vcpu: %p, ASID: %#lx\n", | ||
1709 | __func__, va, vcpu, read_c0_entryhi()); | ||
1710 | er = EMULATE_FAIL; | ||
1711 | preempt_enable(); | ||
1712 | goto done; | ||
1713 | } | ||
1714 | } else if ((KVM_GUEST_KSEGX(va) < KVM_GUEST_KSEG0) || | ||
1715 | KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG23) { | ||
1716 | int index; | ||
1717 | |||
1718 | /* If an entry already exists then skip */ | ||
1719 | if (kvm_mips_host_tlb_lookup(vcpu, va) >= 0) | ||
1720 | goto skip_fault; | ||
1721 | |||
1722 | /* | ||
1723 | * If address not in the guest TLB, then give the guest a fault, | ||
1724 | * the resulting handler will do the right thing | ||
1725 | */ | ||
1726 | index = kvm_mips_guest_tlb_lookup(vcpu, (va & VPN2_MASK) | | ||
1727 | (kvm_read_c0_guest_entryhi | ||
1728 | (cop0) & KVM_ENTRYHI_ASID)); | ||
1729 | |||
1730 | if (index < 0) { | ||
1731 | vcpu->arch.host_cp0_badvaddr = va; | ||
1732 | vcpu->arch.pc = curr_pc; | ||
1733 | er = kvm_mips_emulate_tlbmiss_ld(cause, NULL, run, | ||
1734 | vcpu); | ||
1735 | preempt_enable(); | ||
1736 | goto dont_update_pc; | ||
1737 | } else { | ||
1738 | struct kvm_mips_tlb *tlb = &vcpu->arch.guest_tlb[index]; | ||
1739 | /* | ||
1740 | * Check if the entry is valid, if not then setup a TLB | ||
1741 | * invalid exception to the guest | ||
1742 | */ | ||
1743 | if (!TLB_IS_VALID(*tlb, va)) { | ||
1744 | vcpu->arch.host_cp0_badvaddr = va; | ||
1745 | vcpu->arch.pc = curr_pc; | ||
1746 | er = kvm_mips_emulate_tlbinv_ld(cause, NULL, | ||
1747 | run, vcpu); | ||
1748 | preempt_enable(); | ||
1749 | goto dont_update_pc; | ||
1750 | } | ||
1751 | /* | ||
1752 | * We fault an entry from the guest tlb to the | ||
1753 | * shadow host TLB | ||
1754 | */ | ||
1755 | if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb)) { | ||
1756 | kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n", | ||
1757 | __func__, va, index, vcpu, | ||
1758 | read_c0_entryhi()); | ||
1759 | er = EMULATE_FAIL; | ||
1760 | preempt_enable(); | ||
1761 | goto done; | ||
1762 | } | ||
1763 | } | ||
1764 | } else { | ||
1765 | kvm_err("INVALID CACHE INDEX/ADDRESS (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", | ||
1766 | cache, op, base, arch->gprs[base], offset); | ||
1767 | er = EMULATE_FAIL; | ||
1768 | preempt_enable(); | ||
1769 | goto done; | ||
1770 | |||
1771 | } | ||
1772 | |||
1773 | skip_fault: | ||
1774 | /* XXXKYMA: Only a subset of cache ops are supported, used by Linux */ | 1805 | /* XXXKYMA: Only a subset of cache ops are supported, used by Linux */ |
1775 | if (op_inst == Hit_Writeback_Inv_D || op_inst == Hit_Invalidate_D) { | 1806 | if (op_inst == Hit_Writeback_Inv_D || op_inst == Hit_Invalidate_D) { |
1776 | flush_dcache_line(va); | 1807 | /* |
1777 | 1808 | * Perform the dcache part of icache synchronisation on the | |
1809 | * guest's behalf. | ||
1810 | */ | ||
1811 | er = kvm_mips_guest_cache_op(protected_writeback_dcache_line, | ||
1812 | curr_pc, va, run, vcpu, cause); | ||
1813 | if (er != EMULATE_DONE) | ||
1814 | goto done; | ||
1778 | #ifdef CONFIG_KVM_MIPS_DYN_TRANS | 1815 | #ifdef CONFIG_KVM_MIPS_DYN_TRANS |
1779 | /* | 1816 | /* |
1780 | * Replace the CACHE instruction, with a SYNCI, not the same, | 1817 | * Replace the CACHE instruction, with a SYNCI, not the same, |
@@ -1783,8 +1820,15 @@ skip_fault: | |||
1783 | kvm_mips_trans_cache_va(inst, opc, vcpu); | 1820 | kvm_mips_trans_cache_va(inst, opc, vcpu); |
1784 | #endif | 1821 | #endif |
1785 | } else if (op_inst == Hit_Invalidate_I) { | 1822 | } else if (op_inst == Hit_Invalidate_I) { |
1786 | flush_dcache_line(va); | 1823 | /* Perform the icache synchronisation on the guest's behalf */ |
1787 | flush_icache_line(va); | 1824 | er = kvm_mips_guest_cache_op(protected_writeback_dcache_line, |
1825 | curr_pc, va, run, vcpu, cause); | ||
1826 | if (er != EMULATE_DONE) | ||
1827 | goto done; | ||
1828 | er = kvm_mips_guest_cache_op(protected_flush_icache_line, | ||
1829 | curr_pc, va, run, vcpu, cause); | ||
1830 | if (er != EMULATE_DONE) | ||
1831 | goto done; | ||
1788 | 1832 | ||
1789 | #ifdef CONFIG_KVM_MIPS_DYN_TRANS | 1833 | #ifdef CONFIG_KVM_MIPS_DYN_TRANS |
1790 | /* Replace the CACHE instruction, with a SYNCI */ | 1834 | /* Replace the CACHE instruction, with a SYNCI */ |
@@ -1796,17 +1840,13 @@ skip_fault: | |||
1796 | er = EMULATE_FAIL; | 1840 | er = EMULATE_FAIL; |
1797 | } | 1841 | } |
1798 | 1842 | ||
1799 | preempt_enable(); | ||
1800 | done: | 1843 | done: |
1801 | /* Rollback PC only if emulation was unsuccessful */ | 1844 | /* Rollback PC only if emulation was unsuccessful */ |
1802 | if (er == EMULATE_FAIL) | 1845 | if (er == EMULATE_FAIL) |
1803 | vcpu->arch.pc = curr_pc; | 1846 | vcpu->arch.pc = curr_pc; |
1804 | 1847 | /* Guest exception needs guest to resume */ | |
1805 | dont_update_pc: | 1848 | if (er == EMULATE_EXCEPT) |
1806 | /* | 1849 | er = EMULATE_DONE; |
1807 | * This is for exceptions whose emulation updates the PC, so do not | ||
1808 | * overwrite the PC under any circumstances | ||
1809 | */ | ||
1810 | 1850 | ||
1811 | return er; | 1851 | return er; |
1812 | } | 1852 | } |
@@ -1817,12 +1857,14 @@ enum emulation_result kvm_mips_emulate_inst(u32 cause, u32 *opc, | |||
1817 | { | 1857 | { |
1818 | union mips_instruction inst; | 1858 | union mips_instruction inst; |
1819 | enum emulation_result er = EMULATE_DONE; | 1859 | enum emulation_result er = EMULATE_DONE; |
1860 | int err; | ||
1820 | 1861 | ||
1821 | /* Fetch the instruction. */ | 1862 | /* Fetch the instruction. */ |
1822 | if (cause & CAUSEF_BD) | 1863 | if (cause & CAUSEF_BD) |
1823 | opc += 1; | 1864 | opc += 1; |
1824 | 1865 | err = kvm_get_badinstr(opc, vcpu, &inst.word); | |
1825 | inst.word = kvm_get_inst(opc, vcpu); | 1866 | if (err) |
1867 | return EMULATE_FAIL; | ||
1826 | 1868 | ||
1827 | switch (inst.r_format.opcode) { | 1869 | switch (inst.r_format.opcode) { |
1828 | case cop0_op: | 1870 | case cop0_op: |
@@ -1874,6 +1916,22 @@ unknown: | |||
1874 | return er; | 1916 | return er; |
1875 | } | 1917 | } |
1876 | 1918 | ||
1919 | /** | ||
1920 | * kvm_mips_guest_exception_base() - Find guest exception vector base address. | ||
1921 | * | ||
1922 | * Returns: The base address of the current guest exception vector, taking | ||
1923 | * both Guest.CP0_Status.BEV and Guest.CP0_EBase into account. | ||
1924 | */ | ||
1925 | long kvm_mips_guest_exception_base(struct kvm_vcpu *vcpu) | ||
1926 | { | ||
1927 | struct mips_coproc *cop0 = vcpu->arch.cop0; | ||
1928 | |||
1929 | if (kvm_read_c0_guest_status(cop0) & ST0_BEV) | ||
1930 | return KVM_GUEST_CKSEG1ADDR(0x1fc00200); | ||
1931 | else | ||
1932 | return kvm_read_c0_guest_ebase(cop0) & MIPS_EBASE_BASE; | ||
1933 | } | ||
1934 | |||
1877 | enum emulation_result kvm_mips_emulate_syscall(u32 cause, | 1935 | enum emulation_result kvm_mips_emulate_syscall(u32 cause, |
1878 | u32 *opc, | 1936 | u32 *opc, |
1879 | struct kvm_run *run, | 1937 | struct kvm_run *run, |
@@ -1899,7 +1957,7 @@ enum emulation_result kvm_mips_emulate_syscall(u32 cause, | |||
1899 | (EXCCODE_SYS << CAUSEB_EXCCODE)); | 1957 | (EXCCODE_SYS << CAUSEB_EXCCODE)); |
1900 | 1958 | ||
1901 | /* Set PC to the exception entry point */ | 1959 | /* Set PC to the exception entry point */ |
1902 | arch->pc = KVM_GUEST_KSEG0 + 0x180; | 1960 | arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; |
1903 | 1961 | ||
1904 | } else { | 1962 | } else { |
1905 | kvm_err("Trying to deliver SYSCALL when EXL is already set\n"); | 1963 | kvm_err("Trying to deliver SYSCALL when EXL is already set\n"); |
@@ -1933,13 +1991,13 @@ enum emulation_result kvm_mips_emulate_tlbmiss_ld(u32 cause, | |||
1933 | arch->pc); | 1991 | arch->pc); |
1934 | 1992 | ||
1935 | /* set pc to the exception entry point */ | 1993 | /* set pc to the exception entry point */ |
1936 | arch->pc = KVM_GUEST_KSEG0 + 0x0; | 1994 | arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x0; |
1937 | 1995 | ||
1938 | } else { | 1996 | } else { |
1939 | kvm_debug("[EXL == 1] delivering TLB MISS @ pc %#lx\n", | 1997 | kvm_debug("[EXL == 1] delivering TLB MISS @ pc %#lx\n", |
1940 | arch->pc); | 1998 | arch->pc); |
1941 | 1999 | ||
1942 | arch->pc = KVM_GUEST_KSEG0 + 0x180; | 2000 | arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; |
1943 | } | 2001 | } |
1944 | 2002 | ||
1945 | kvm_change_c0_guest_cause(cop0, (0xff), | 2003 | kvm_change_c0_guest_cause(cop0, (0xff), |
@@ -1949,8 +2007,6 @@ enum emulation_result kvm_mips_emulate_tlbmiss_ld(u32 cause, | |||
1949 | kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); | 2007 | kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); |
1950 | /* XXXKYMA: is the context register used by linux??? */ | 2008 | /* XXXKYMA: is the context register used by linux??? */ |
1951 | kvm_write_c0_guest_entryhi(cop0, entryhi); | 2009 | kvm_write_c0_guest_entryhi(cop0, entryhi); |
1952 | /* Blow away the shadow host TLBs */ | ||
1953 | kvm_mips_flush_host_tlb(1); | ||
1954 | 2010 | ||
1955 | return EMULATE_DONE; | 2011 | return EMULATE_DONE; |
1956 | } | 2012 | } |
@@ -1978,16 +2034,14 @@ enum emulation_result kvm_mips_emulate_tlbinv_ld(u32 cause, | |||
1978 | 2034 | ||
1979 | kvm_debug("[EXL == 0] delivering TLB INV @ pc %#lx\n", | 2035 | kvm_debug("[EXL == 0] delivering TLB INV @ pc %#lx\n", |
1980 | arch->pc); | 2036 | arch->pc); |
1981 | |||
1982 | /* set pc to the exception entry point */ | ||
1983 | arch->pc = KVM_GUEST_KSEG0 + 0x180; | ||
1984 | |||
1985 | } else { | 2037 | } else { |
1986 | kvm_debug("[EXL == 1] delivering TLB MISS @ pc %#lx\n", | 2038 | kvm_debug("[EXL == 1] delivering TLB MISS @ pc %#lx\n", |
1987 | arch->pc); | 2039 | arch->pc); |
1988 | arch->pc = KVM_GUEST_KSEG0 + 0x180; | ||
1989 | } | 2040 | } |
1990 | 2041 | ||
2042 | /* set pc to the exception entry point */ | ||
2043 | arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; | ||
2044 | |||
1991 | kvm_change_c0_guest_cause(cop0, (0xff), | 2045 | kvm_change_c0_guest_cause(cop0, (0xff), |
1992 | (EXCCODE_TLBL << CAUSEB_EXCCODE)); | 2046 | (EXCCODE_TLBL << CAUSEB_EXCCODE)); |
1993 | 2047 | ||
@@ -1995,8 +2049,6 @@ enum emulation_result kvm_mips_emulate_tlbinv_ld(u32 cause, | |||
1995 | kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); | 2049 | kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); |
1996 | /* XXXKYMA: is the context register used by linux??? */ | 2050 | /* XXXKYMA: is the context register used by linux??? */ |
1997 | kvm_write_c0_guest_entryhi(cop0, entryhi); | 2051 | kvm_write_c0_guest_entryhi(cop0, entryhi); |
1998 | /* Blow away the shadow host TLBs */ | ||
1999 | kvm_mips_flush_host_tlb(1); | ||
2000 | 2052 | ||
2001 | return EMULATE_DONE; | 2053 | return EMULATE_DONE; |
2002 | } | 2054 | } |
@@ -2025,11 +2077,11 @@ enum emulation_result kvm_mips_emulate_tlbmiss_st(u32 cause, | |||
2025 | arch->pc); | 2077 | arch->pc); |
2026 | 2078 | ||
2027 | /* Set PC to the exception entry point */ | 2079 | /* Set PC to the exception entry point */ |
2028 | arch->pc = KVM_GUEST_KSEG0 + 0x0; | 2080 | arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x0; |
2029 | } else { | 2081 | } else { |
2030 | kvm_debug("[EXL == 1] Delivering TLB MISS @ pc %#lx\n", | 2082 | kvm_debug("[EXL == 1] Delivering TLB MISS @ pc %#lx\n", |
2031 | arch->pc); | 2083 | arch->pc); |
2032 | arch->pc = KVM_GUEST_KSEG0 + 0x180; | 2084 | arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; |
2033 | } | 2085 | } |
2034 | 2086 | ||
2035 | kvm_change_c0_guest_cause(cop0, (0xff), | 2087 | kvm_change_c0_guest_cause(cop0, (0xff), |
@@ -2039,8 +2091,6 @@ enum emulation_result kvm_mips_emulate_tlbmiss_st(u32 cause, | |||
2039 | kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); | 2091 | kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); |
2040 | /* XXXKYMA: is the context register used by linux??? */ | 2092 | /* XXXKYMA: is the context register used by linux??? */ |
2041 | kvm_write_c0_guest_entryhi(cop0, entryhi); | 2093 | kvm_write_c0_guest_entryhi(cop0, entryhi); |
2042 | /* Blow away the shadow host TLBs */ | ||
2043 | kvm_mips_flush_host_tlb(1); | ||
2044 | 2094 | ||
2045 | return EMULATE_DONE; | 2095 | return EMULATE_DONE; |
2046 | } | 2096 | } |
@@ -2067,15 +2117,14 @@ enum emulation_result kvm_mips_emulate_tlbinv_st(u32 cause, | |||
2067 | 2117 | ||
2068 | kvm_debug("[EXL == 0] Delivering TLB MISS @ pc %#lx\n", | 2118 | kvm_debug("[EXL == 0] Delivering TLB MISS @ pc %#lx\n", |
2069 | arch->pc); | 2119 | arch->pc); |
2070 | |||
2071 | /* Set PC to the exception entry point */ | ||
2072 | arch->pc = KVM_GUEST_KSEG0 + 0x180; | ||
2073 | } else { | 2120 | } else { |
2074 | kvm_debug("[EXL == 1] Delivering TLB MISS @ pc %#lx\n", | 2121 | kvm_debug("[EXL == 1] Delivering TLB MISS @ pc %#lx\n", |
2075 | arch->pc); | 2122 | arch->pc); |
2076 | arch->pc = KVM_GUEST_KSEG0 + 0x180; | ||
2077 | } | 2123 | } |
2078 | 2124 | ||
2125 | /* Set PC to the exception entry point */ | ||
2126 | arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; | ||
2127 | |||
2079 | kvm_change_c0_guest_cause(cop0, (0xff), | 2128 | kvm_change_c0_guest_cause(cop0, (0xff), |
2080 | (EXCCODE_TLBS << CAUSEB_EXCCODE)); | 2129 | (EXCCODE_TLBS << CAUSEB_EXCCODE)); |
2081 | 2130 | ||
@@ -2083,41 +2132,10 @@ enum emulation_result kvm_mips_emulate_tlbinv_st(u32 cause, | |||
2083 | kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); | 2132 | kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); |
2084 | /* XXXKYMA: is the context register used by linux??? */ | 2133 | /* XXXKYMA: is the context register used by linux??? */ |
2085 | kvm_write_c0_guest_entryhi(cop0, entryhi); | 2134 | kvm_write_c0_guest_entryhi(cop0, entryhi); |
2086 | /* Blow away the shadow host TLBs */ | ||
2087 | kvm_mips_flush_host_tlb(1); | ||
2088 | 2135 | ||
2089 | return EMULATE_DONE; | 2136 | return EMULATE_DONE; |
2090 | } | 2137 | } |
2091 | 2138 | ||
2092 | /* TLBMOD: store into address matching TLB with Dirty bit off */ | ||
2093 | enum emulation_result kvm_mips_handle_tlbmod(u32 cause, u32 *opc, | ||
2094 | struct kvm_run *run, | ||
2095 | struct kvm_vcpu *vcpu) | ||
2096 | { | ||
2097 | enum emulation_result er = EMULATE_DONE; | ||
2098 | #ifdef DEBUG | ||
2099 | struct mips_coproc *cop0 = vcpu->arch.cop0; | ||
2100 | unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) | | ||
2101 | (kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID); | ||
2102 | int index; | ||
2103 | |||
2104 | /* If address not in the guest TLB, then we are in trouble */ | ||
2105 | index = kvm_mips_guest_tlb_lookup(vcpu, entryhi); | ||
2106 | if (index < 0) { | ||
2107 | /* XXXKYMA Invalidate and retry */ | ||
2108 | kvm_mips_host_tlb_inv(vcpu, vcpu->arch.host_cp0_badvaddr); | ||
2109 | kvm_err("%s: host got TLBMOD for %#lx but entry not present in Guest TLB\n", | ||
2110 | __func__, entryhi); | ||
2111 | kvm_mips_dump_guest_tlbs(vcpu); | ||
2112 | kvm_mips_dump_host_tlbs(); | ||
2113 | return EMULATE_FAIL; | ||
2114 | } | ||
2115 | #endif | ||
2116 | |||
2117 | er = kvm_mips_emulate_tlbmod(cause, opc, run, vcpu); | ||
2118 | return er; | ||
2119 | } | ||
2120 | |||
2121 | enum emulation_result kvm_mips_emulate_tlbmod(u32 cause, | 2139 | enum emulation_result kvm_mips_emulate_tlbmod(u32 cause, |
2122 | u32 *opc, | 2140 | u32 *opc, |
2123 | struct kvm_run *run, | 2141 | struct kvm_run *run, |
@@ -2140,14 +2158,13 @@ enum emulation_result kvm_mips_emulate_tlbmod(u32 cause, | |||
2140 | 2158 | ||
2141 | kvm_debug("[EXL == 0] Delivering TLB MOD @ pc %#lx\n", | 2159 | kvm_debug("[EXL == 0] Delivering TLB MOD @ pc %#lx\n", |
2142 | arch->pc); | 2160 | arch->pc); |
2143 | |||
2144 | arch->pc = KVM_GUEST_KSEG0 + 0x180; | ||
2145 | } else { | 2161 | } else { |
2146 | kvm_debug("[EXL == 1] Delivering TLB MOD @ pc %#lx\n", | 2162 | kvm_debug("[EXL == 1] Delivering TLB MOD @ pc %#lx\n", |
2147 | arch->pc); | 2163 | arch->pc); |
2148 | arch->pc = KVM_GUEST_KSEG0 + 0x180; | ||
2149 | } | 2164 | } |
2150 | 2165 | ||
2166 | arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; | ||
2167 | |||
2151 | kvm_change_c0_guest_cause(cop0, (0xff), | 2168 | kvm_change_c0_guest_cause(cop0, (0xff), |
2152 | (EXCCODE_MOD << CAUSEB_EXCCODE)); | 2169 | (EXCCODE_MOD << CAUSEB_EXCCODE)); |
2153 | 2170 | ||
@@ -2155,8 +2172,6 @@ enum emulation_result kvm_mips_emulate_tlbmod(u32 cause, | |||
2155 | kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); | 2172 | kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); |
2156 | /* XXXKYMA: is the context register used by linux??? */ | 2173 | /* XXXKYMA: is the context register used by linux??? */ |
2157 | kvm_write_c0_guest_entryhi(cop0, entryhi); | 2174 | kvm_write_c0_guest_entryhi(cop0, entryhi); |
2158 | /* Blow away the shadow host TLBs */ | ||
2159 | kvm_mips_flush_host_tlb(1); | ||
2160 | 2175 | ||
2161 | return EMULATE_DONE; | 2176 | return EMULATE_DONE; |
2162 | } | 2177 | } |
@@ -2181,7 +2196,7 @@ enum emulation_result kvm_mips_emulate_fpu_exc(u32 cause, | |||
2181 | 2196 | ||
2182 | } | 2197 | } |
2183 | 2198 | ||
2184 | arch->pc = KVM_GUEST_KSEG0 + 0x180; | 2199 | arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; |
2185 | 2200 | ||
2186 | kvm_change_c0_guest_cause(cop0, (0xff), | 2201 | kvm_change_c0_guest_cause(cop0, (0xff), |
2187 | (EXCCODE_CPU << CAUSEB_EXCCODE)); | 2202 | (EXCCODE_CPU << CAUSEB_EXCCODE)); |
@@ -2215,7 +2230,7 @@ enum emulation_result kvm_mips_emulate_ri_exc(u32 cause, | |||
2215 | (EXCCODE_RI << CAUSEB_EXCCODE)); | 2230 | (EXCCODE_RI << CAUSEB_EXCCODE)); |
2216 | 2231 | ||
2217 | /* Set PC to the exception entry point */ | 2232 | /* Set PC to the exception entry point */ |
2218 | arch->pc = KVM_GUEST_KSEG0 + 0x180; | 2233 | arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; |
2219 | 2234 | ||
2220 | } else { | 2235 | } else { |
2221 | kvm_err("Trying to deliver RI when EXL is already set\n"); | 2236 | kvm_err("Trying to deliver RI when EXL is already set\n"); |
@@ -2250,7 +2265,7 @@ enum emulation_result kvm_mips_emulate_bp_exc(u32 cause, | |||
2250 | (EXCCODE_BP << CAUSEB_EXCCODE)); | 2265 | (EXCCODE_BP << CAUSEB_EXCCODE)); |
2251 | 2266 | ||
2252 | /* Set PC to the exception entry point */ | 2267 | /* Set PC to the exception entry point */ |
2253 | arch->pc = KVM_GUEST_KSEG0 + 0x180; | 2268 | arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; |
2254 | 2269 | ||
2255 | } else { | 2270 | } else { |
2256 | kvm_err("Trying to deliver BP when EXL is already set\n"); | 2271 | kvm_err("Trying to deliver BP when EXL is already set\n"); |
@@ -2285,7 +2300,7 @@ enum emulation_result kvm_mips_emulate_trap_exc(u32 cause, | |||
2285 | (EXCCODE_TR << CAUSEB_EXCCODE)); | 2300 | (EXCCODE_TR << CAUSEB_EXCCODE)); |
2286 | 2301 | ||
2287 | /* Set PC to the exception entry point */ | 2302 | /* Set PC to the exception entry point */ |
2288 | arch->pc = KVM_GUEST_KSEG0 + 0x180; | 2303 | arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; |
2289 | 2304 | ||
2290 | } else { | 2305 | } else { |
2291 | kvm_err("Trying to deliver TRAP when EXL is already set\n"); | 2306 | kvm_err("Trying to deliver TRAP when EXL is already set\n"); |
@@ -2320,7 +2335,7 @@ enum emulation_result kvm_mips_emulate_msafpe_exc(u32 cause, | |||
2320 | (EXCCODE_MSAFPE << CAUSEB_EXCCODE)); | 2335 | (EXCCODE_MSAFPE << CAUSEB_EXCCODE)); |
2321 | 2336 | ||
2322 | /* Set PC to the exception entry point */ | 2337 | /* Set PC to the exception entry point */ |
2323 | arch->pc = KVM_GUEST_KSEG0 + 0x180; | 2338 | arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; |
2324 | 2339 | ||
2325 | } else { | 2340 | } else { |
2326 | kvm_err("Trying to deliver MSAFPE when EXL is already set\n"); | 2341 | kvm_err("Trying to deliver MSAFPE when EXL is already set\n"); |
@@ -2355,7 +2370,7 @@ enum emulation_result kvm_mips_emulate_fpe_exc(u32 cause, | |||
2355 | (EXCCODE_FPE << CAUSEB_EXCCODE)); | 2370 | (EXCCODE_FPE << CAUSEB_EXCCODE)); |
2356 | 2371 | ||
2357 | /* Set PC to the exception entry point */ | 2372 | /* Set PC to the exception entry point */ |
2358 | arch->pc = KVM_GUEST_KSEG0 + 0x180; | 2373 | arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; |
2359 | 2374 | ||
2360 | } else { | 2375 | } else { |
2361 | kvm_err("Trying to deliver FPE when EXL is already set\n"); | 2376 | kvm_err("Trying to deliver FPE when EXL is already set\n"); |
@@ -2390,7 +2405,7 @@ enum emulation_result kvm_mips_emulate_msadis_exc(u32 cause, | |||
2390 | (EXCCODE_MSADIS << CAUSEB_EXCCODE)); | 2405 | (EXCCODE_MSADIS << CAUSEB_EXCCODE)); |
2391 | 2406 | ||
2392 | /* Set PC to the exception entry point */ | 2407 | /* Set PC to the exception entry point */ |
2393 | arch->pc = KVM_GUEST_KSEG0 + 0x180; | 2408 | arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; |
2394 | 2409 | ||
2395 | } else { | 2410 | } else { |
2396 | kvm_err("Trying to deliver MSADIS when EXL is already set\n"); | 2411 | kvm_err("Trying to deliver MSADIS when EXL is already set\n"); |
@@ -2409,6 +2424,7 @@ enum emulation_result kvm_mips_handle_ri(u32 cause, u32 *opc, | |||
2409 | enum emulation_result er = EMULATE_DONE; | 2424 | enum emulation_result er = EMULATE_DONE; |
2410 | unsigned long curr_pc; | 2425 | unsigned long curr_pc; |
2411 | union mips_instruction inst; | 2426 | union mips_instruction inst; |
2427 | int err; | ||
2412 | 2428 | ||
2413 | /* | 2429 | /* |
2414 | * Update PC and hold onto current PC in case there is | 2430 | * Update PC and hold onto current PC in case there is |
@@ -2422,11 +2438,9 @@ enum emulation_result kvm_mips_handle_ri(u32 cause, u32 *opc, | |||
2422 | /* Fetch the instruction. */ | 2438 | /* Fetch the instruction. */ |
2423 | if (cause & CAUSEF_BD) | 2439 | if (cause & CAUSEF_BD) |
2424 | opc += 1; | 2440 | opc += 1; |
2425 | 2441 | err = kvm_get_badinstr(opc, vcpu, &inst.word); | |
2426 | inst.word = kvm_get_inst(opc, vcpu); | 2442 | if (err) { |
2427 | 2443 | kvm_err("%s: Cannot get inst @ %p (%d)\n", __func__, opc, err); | |
2428 | if (inst.word == KVM_INVALID_INST) { | ||
2429 | kvm_err("%s: Cannot get inst @ %p\n", __func__, opc); | ||
2430 | return EMULATE_FAIL; | 2444 | return EMULATE_FAIL; |
2431 | } | 2445 | } |
2432 | 2446 | ||
@@ -2557,7 +2571,7 @@ static enum emulation_result kvm_mips_emulate_exc(u32 cause, | |||
2557 | (exccode << CAUSEB_EXCCODE)); | 2571 | (exccode << CAUSEB_EXCCODE)); |
2558 | 2572 | ||
2559 | /* Set PC to the exception entry point */ | 2573 | /* Set PC to the exception entry point */ |
2560 | arch->pc = KVM_GUEST_KSEG0 + 0x180; | 2574 | arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; |
2561 | kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); | 2575 | kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); |
2562 | 2576 | ||
2563 | kvm_debug("Delivering EXC %d @ pc %#lx, badVaddr: %#lx\n", | 2577 | kvm_debug("Delivering EXC %d @ pc %#lx, badVaddr: %#lx\n", |
@@ -2670,7 +2684,8 @@ enum emulation_result kvm_mips_check_privilege(u32 cause, | |||
2670 | enum emulation_result kvm_mips_handle_tlbmiss(u32 cause, | 2684 | enum emulation_result kvm_mips_handle_tlbmiss(u32 cause, |
2671 | u32 *opc, | 2685 | u32 *opc, |
2672 | struct kvm_run *run, | 2686 | struct kvm_run *run, |
2673 | struct kvm_vcpu *vcpu) | 2687 | struct kvm_vcpu *vcpu, |
2688 | bool write_fault) | ||
2674 | { | 2689 | { |
2675 | enum emulation_result er = EMULATE_DONE; | 2690 | enum emulation_result er = EMULATE_DONE; |
2676 | u32 exccode = (cause >> CAUSEB_EXCCODE) & 0x1f; | 2691 | u32 exccode = (cause >> CAUSEB_EXCCODE) & 0x1f; |
@@ -2726,7 +2741,8 @@ enum emulation_result kvm_mips_handle_tlbmiss(u32 cause, | |||
2726 | * OK we have a Guest TLB entry, now inject it into the | 2741 | * OK we have a Guest TLB entry, now inject it into the |
2727 | * shadow host TLB | 2742 | * shadow host TLB |
2728 | */ | 2743 | */ |
2729 | if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb)) { | 2744 | if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, va, |
2745 | write_fault)) { | ||
2730 | kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n", | 2746 | kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n", |
2731 | __func__, va, index, vcpu, | 2747 | __func__, va, index, vcpu, |
2732 | read_c0_entryhi()); | 2748 | read_c0_entryhi()); |
diff --git a/arch/mips/kvm/entry.c b/arch/mips/kvm/entry.c index e92fb190e2d6..c5b254c4d0da 100644 --- a/arch/mips/kvm/entry.c +++ b/arch/mips/kvm/entry.c | |||
@@ -12,8 +12,11 @@ | |||
12 | */ | 12 | */ |
13 | 13 | ||
14 | #include <linux/kvm_host.h> | 14 | #include <linux/kvm_host.h> |
15 | #include <linux/log2.h> | ||
16 | #include <asm/mmu_context.h> | ||
15 | #include <asm/msa.h> | 17 | #include <asm/msa.h> |
16 | #include <asm/setup.h> | 18 | #include <asm/setup.h> |
19 | #include <asm/tlbex.h> | ||
17 | #include <asm/uasm.h> | 20 | #include <asm/uasm.h> |
18 | 21 | ||
19 | /* Register names */ | 22 | /* Register names */ |
@@ -50,6 +53,8 @@ | |||
50 | /* Some CP0 registers */ | 53 | /* Some CP0 registers */ |
51 | #define C0_HWRENA 7, 0 | 54 | #define C0_HWRENA 7, 0 |
52 | #define C0_BADVADDR 8, 0 | 55 | #define C0_BADVADDR 8, 0 |
56 | #define C0_BADINSTR 8, 1 | ||
57 | #define C0_BADINSTRP 8, 2 | ||
53 | #define C0_ENTRYHI 10, 0 | 58 | #define C0_ENTRYHI 10, 0 |
54 | #define C0_STATUS 12, 0 | 59 | #define C0_STATUS 12, 0 |
55 | #define C0_CAUSE 13, 0 | 60 | #define C0_CAUSE 13, 0 |
@@ -89,6 +94,21 @@ static void *kvm_mips_build_ret_from_exit(void *addr); | |||
89 | static void *kvm_mips_build_ret_to_guest(void *addr); | 94 | static void *kvm_mips_build_ret_to_guest(void *addr); |
90 | static void *kvm_mips_build_ret_to_host(void *addr); | 95 | static void *kvm_mips_build_ret_to_host(void *addr); |
91 | 96 | ||
97 | /* | ||
98 | * The version of this function in tlbex.c uses current_cpu_type(), but for KVM | ||
99 | * we assume symmetry. | ||
100 | */ | ||
101 | static int c0_kscratch(void) | ||
102 | { | ||
103 | switch (boot_cpu_type()) { | ||
104 | case CPU_XLP: | ||
105 | case CPU_XLR: | ||
106 | return 22; | ||
107 | default: | ||
108 | return 31; | ||
109 | } | ||
110 | } | ||
111 | |||
92 | /** | 112 | /** |
93 | * kvm_mips_entry_setup() - Perform global setup for entry code. | 113 | * kvm_mips_entry_setup() - Perform global setup for entry code. |
94 | * | 114 | * |
@@ -103,18 +123,21 @@ int kvm_mips_entry_setup(void) | |||
103 | * We prefer to use KScratchN registers if they are available over the | 123 | * We prefer to use KScratchN registers if they are available over the |
104 | * defaults above, which may not work on all cores. | 124 | * defaults above, which may not work on all cores. |
105 | */ | 125 | */ |
106 | unsigned int kscratch_mask = cpu_data[0].kscratch_mask & 0xfc; | 126 | unsigned int kscratch_mask = cpu_data[0].kscratch_mask; |
127 | |||
128 | if (pgd_reg != -1) | ||
129 | kscratch_mask &= ~BIT(pgd_reg); | ||
107 | 130 | ||
108 | /* Pick a scratch register for storing VCPU */ | 131 | /* Pick a scratch register for storing VCPU */ |
109 | if (kscratch_mask) { | 132 | if (kscratch_mask) { |
110 | scratch_vcpu[0] = 31; | 133 | scratch_vcpu[0] = c0_kscratch(); |
111 | scratch_vcpu[1] = ffs(kscratch_mask) - 1; | 134 | scratch_vcpu[1] = ffs(kscratch_mask) - 1; |
112 | kscratch_mask &= ~BIT(scratch_vcpu[1]); | 135 | kscratch_mask &= ~BIT(scratch_vcpu[1]); |
113 | } | 136 | } |
114 | 137 | ||
115 | /* Pick a scratch register to use as a temp for saving state */ | 138 | /* Pick a scratch register to use as a temp for saving state */ |
116 | if (kscratch_mask) { | 139 | if (kscratch_mask) { |
117 | scratch_tmp[0] = 31; | 140 | scratch_tmp[0] = c0_kscratch(); |
118 | scratch_tmp[1] = ffs(kscratch_mask) - 1; | 141 | scratch_tmp[1] = ffs(kscratch_mask) - 1; |
119 | kscratch_mask &= ~BIT(scratch_tmp[1]); | 142 | kscratch_mask &= ~BIT(scratch_tmp[1]); |
120 | } | 143 | } |
@@ -130,7 +153,7 @@ static void kvm_mips_build_save_scratch(u32 **p, unsigned int tmp, | |||
130 | UASM_i_SW(p, tmp, offsetof(struct pt_regs, cp0_epc), frame); | 153 | UASM_i_SW(p, tmp, offsetof(struct pt_regs, cp0_epc), frame); |
131 | 154 | ||
132 | /* Save the temp scratch register value in cp0_cause of stack frame */ | 155 | /* Save the temp scratch register value in cp0_cause of stack frame */ |
133 | if (scratch_tmp[0] == 31) { | 156 | if (scratch_tmp[0] == c0_kscratch()) { |
134 | UASM_i_MFC0(p, tmp, scratch_tmp[0], scratch_tmp[1]); | 157 | UASM_i_MFC0(p, tmp, scratch_tmp[0], scratch_tmp[1]); |
135 | UASM_i_SW(p, tmp, offsetof(struct pt_regs, cp0_cause), frame); | 158 | UASM_i_SW(p, tmp, offsetof(struct pt_regs, cp0_cause), frame); |
136 | } | 159 | } |
@@ -146,7 +169,7 @@ static void kvm_mips_build_restore_scratch(u32 **p, unsigned int tmp, | |||
146 | UASM_i_LW(p, tmp, offsetof(struct pt_regs, cp0_epc), frame); | 169 | UASM_i_LW(p, tmp, offsetof(struct pt_regs, cp0_epc), frame); |
147 | UASM_i_MTC0(p, tmp, scratch_vcpu[0], scratch_vcpu[1]); | 170 | UASM_i_MTC0(p, tmp, scratch_vcpu[0], scratch_vcpu[1]); |
148 | 171 | ||
149 | if (scratch_tmp[0] == 31) { | 172 | if (scratch_tmp[0] == c0_kscratch()) { |
150 | UASM_i_LW(p, tmp, offsetof(struct pt_regs, cp0_cause), frame); | 173 | UASM_i_LW(p, tmp, offsetof(struct pt_regs, cp0_cause), frame); |
151 | UASM_i_MTC0(p, tmp, scratch_tmp[0], scratch_tmp[1]); | 174 | UASM_i_MTC0(p, tmp, scratch_tmp[0], scratch_tmp[1]); |
152 | } | 175 | } |
@@ -286,23 +309,26 @@ static void *kvm_mips_build_enter_guest(void *addr) | |||
286 | uasm_i_andi(&p, T0, T0, KSU_USER | ST0_ERL | ST0_EXL); | 309 | uasm_i_andi(&p, T0, T0, KSU_USER | ST0_ERL | ST0_EXL); |
287 | uasm_i_xori(&p, T0, T0, KSU_USER); | 310 | uasm_i_xori(&p, T0, T0, KSU_USER); |
288 | uasm_il_bnez(&p, &r, T0, label_kernel_asid); | 311 | uasm_il_bnez(&p, &r, T0, label_kernel_asid); |
289 | UASM_i_ADDIU(&p, T1, K1, | 312 | UASM_i_ADDIU(&p, T1, K1, offsetof(struct kvm_vcpu_arch, |
290 | offsetof(struct kvm_vcpu_arch, guest_kernel_asid)); | 313 | guest_kernel_mm.context.asid)); |
291 | /* else user */ | 314 | /* else user */ |
292 | UASM_i_ADDIU(&p, T1, K1, | 315 | UASM_i_ADDIU(&p, T1, K1, offsetof(struct kvm_vcpu_arch, |
293 | offsetof(struct kvm_vcpu_arch, guest_user_asid)); | 316 | guest_user_mm.context.asid)); |
294 | uasm_l_kernel_asid(&l, p); | 317 | uasm_l_kernel_asid(&l, p); |
295 | 318 | ||
296 | /* t1: contains the base of the ASID array, need to get the cpu id */ | 319 | /* t1: contains the base of the ASID array, need to get the cpu id */ |
297 | /* smp_processor_id */ | 320 | /* smp_processor_id */ |
298 | uasm_i_lw(&p, T2, offsetof(struct thread_info, cpu), GP); | 321 | uasm_i_lw(&p, T2, offsetof(struct thread_info, cpu), GP); |
299 | /* x4 */ | 322 | /* index the ASID array */ |
300 | uasm_i_sll(&p, T2, T2, 2); | 323 | uasm_i_sll(&p, T2, T2, ilog2(sizeof(long))); |
301 | UASM_i_ADDU(&p, T3, T1, T2); | 324 | UASM_i_ADDU(&p, T3, T1, T2); |
302 | uasm_i_lw(&p, K0, 0, T3); | 325 | UASM_i_LW(&p, K0, 0, T3); |
303 | #ifdef CONFIG_MIPS_ASID_BITS_VARIABLE | 326 | #ifdef CONFIG_MIPS_ASID_BITS_VARIABLE |
304 | /* x sizeof(struct cpuinfo_mips)/4 */ | 327 | /* |
305 | uasm_i_addiu(&p, T3, ZERO, sizeof(struct cpuinfo_mips)/4); | 328 | * reuse ASID array offset |
329 | * cpuinfo_mips is a multiple of sizeof(long) | ||
330 | */ | ||
331 | uasm_i_addiu(&p, T3, ZERO, sizeof(struct cpuinfo_mips)/sizeof(long)); | ||
306 | uasm_i_mul(&p, T2, T2, T3); | 332 | uasm_i_mul(&p, T2, T2, T3); |
307 | 333 | ||
308 | UASM_i_LA_mostly(&p, AT, (long)&cpu_data[0].asid_mask); | 334 | UASM_i_LA_mostly(&p, AT, (long)&cpu_data[0].asid_mask); |
@@ -312,7 +338,20 @@ static void *kvm_mips_build_enter_guest(void *addr) | |||
312 | #else | 338 | #else |
313 | uasm_i_andi(&p, K0, K0, MIPS_ENTRYHI_ASID); | 339 | uasm_i_andi(&p, K0, K0, MIPS_ENTRYHI_ASID); |
314 | #endif | 340 | #endif |
315 | uasm_i_mtc0(&p, K0, C0_ENTRYHI); | 341 | |
342 | /* | ||
343 | * Set up KVM T&E GVA pgd. | ||
344 | * This does roughly the same as TLBMISS_HANDLER_SETUP_PGD(): | ||
345 | * - call tlbmiss_handler_setup_pgd(mm->pgd) | ||
346 | * - but skips write into CP0_PWBase for now | ||
347 | */ | ||
348 | UASM_i_LW(&p, A0, (int)offsetof(struct mm_struct, pgd) - | ||
349 | (int)offsetof(struct mm_struct, context.asid), T1); | ||
350 | |||
351 | UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd); | ||
352 | uasm_i_jalr(&p, RA, T9); | ||
353 | uasm_i_mtc0(&p, K0, C0_ENTRYHI); | ||
354 | |||
316 | uasm_i_ehb(&p); | 355 | uasm_i_ehb(&p); |
317 | 356 | ||
318 | /* Disable RDHWR access */ | 357 | /* Disable RDHWR access */ |
@@ -348,6 +387,80 @@ static void *kvm_mips_build_enter_guest(void *addr) | |||
348 | } | 387 | } |
349 | 388 | ||
350 | /** | 389 | /** |
390 | * kvm_mips_build_tlb_refill_exception() - Assemble TLB refill handler. | ||
391 | * @addr: Address to start writing code. | ||
392 | * @handler: Address of common handler (within range of @addr). | ||
393 | * | ||
394 | * Assemble TLB refill exception fast path handler for guest execution. | ||
395 | * | ||
396 | * Returns: Next address after end of written function. | ||
397 | */ | ||
398 | void *kvm_mips_build_tlb_refill_exception(void *addr, void *handler) | ||
399 | { | ||
400 | u32 *p = addr; | ||
401 | struct uasm_label labels[2]; | ||
402 | struct uasm_reloc relocs[2]; | ||
403 | struct uasm_label *l = labels; | ||
404 | struct uasm_reloc *r = relocs; | ||
405 | |||
406 | memset(labels, 0, sizeof(labels)); | ||
407 | memset(relocs, 0, sizeof(relocs)); | ||
408 | |||
409 | /* Save guest k1 into scratch register */ | ||
410 | UASM_i_MTC0(&p, K1, scratch_tmp[0], scratch_tmp[1]); | ||
411 | |||
412 | /* Get the VCPU pointer from the VCPU scratch register */ | ||
413 | UASM_i_MFC0(&p, K1, scratch_vcpu[0], scratch_vcpu[1]); | ||
414 | |||
415 | /* Save guest k0 into VCPU structure */ | ||
416 | UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu, arch.gprs[K0]), K1); | ||
417 | |||
418 | /* | ||
419 | * Some of the common tlbex code uses current_cpu_type(). For KVM we | ||
420 | * assume symmetry and just disable preemption to silence the warning. | ||
421 | */ | ||
422 | preempt_disable(); | ||
423 | |||
424 | /* | ||
425 | * Now for the actual refill bit. A lot of this can be common with the | ||
426 | * Linux TLB refill handler, however we don't need to handle so many | ||
427 | * cases. We only need to handle user mode refills, and user mode runs | ||
428 | * with 32-bit addressing. | ||
429 | * | ||
430 | * Therefore the branch to label_vmalloc generated by build_get_pmde64() | ||
431 | * that isn't resolved should never actually get taken and is harmless | ||
432 | * to leave in place for now. | ||
433 | */ | ||
434 | |||
435 | #ifdef CONFIG_64BIT | ||
436 | build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd in K1 */ | ||
437 | #else | ||
438 | build_get_pgde32(&p, K0, K1); /* get pgd in K1 */ | ||
439 | #endif | ||
440 | |||
441 | /* we don't support huge pages yet */ | ||
442 | |||
443 | build_get_ptep(&p, K0, K1); | ||
444 | build_update_entries(&p, K0, K1); | ||
445 | build_tlb_write_entry(&p, &l, &r, tlb_random); | ||
446 | |||
447 | preempt_enable(); | ||
448 | |||
449 | /* Get the VCPU pointer from the VCPU scratch register again */ | ||
450 | UASM_i_MFC0(&p, K1, scratch_vcpu[0], scratch_vcpu[1]); | ||
451 | |||
452 | /* Restore the guest's k0/k1 registers */ | ||
453 | UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu, arch.gprs[K0]), K1); | ||
454 | uasm_i_ehb(&p); | ||
455 | UASM_i_MFC0(&p, K1, scratch_tmp[0], scratch_tmp[1]); | ||
456 | |||
457 | /* Jump to guest */ | ||
458 | uasm_i_eret(&p); | ||
459 | |||
460 | return p; | ||
461 | } | ||
462 | |||
463 | /** | ||
351 | * kvm_mips_build_exception() - Assemble first level guest exception handler. | 464 | * kvm_mips_build_exception() - Assemble first level guest exception handler. |
352 | * @addr: Address to start writing code. | 465 | * @addr: Address to start writing code. |
353 | * @handler: Address of common handler (within range of @addr). | 466 | * @handler: Address of common handler (within range of @addr). |
@@ -468,6 +581,18 @@ void *kvm_mips_build_exit(void *addr) | |||
468 | uasm_i_mfc0(&p, K0, C0_CAUSE); | 581 | uasm_i_mfc0(&p, K0, C0_CAUSE); |
469 | uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch, host_cp0_cause), K1); | 582 | uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch, host_cp0_cause), K1); |
470 | 583 | ||
584 | if (cpu_has_badinstr) { | ||
585 | uasm_i_mfc0(&p, K0, C0_BADINSTR); | ||
586 | uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch, | ||
587 | host_cp0_badinstr), K1); | ||
588 | } | ||
589 | |||
590 | if (cpu_has_badinstrp) { | ||
591 | uasm_i_mfc0(&p, K0, C0_BADINSTRP); | ||
592 | uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch, | ||
593 | host_cp0_badinstrp), K1); | ||
594 | } | ||
595 | |||
471 | /* Now restore the host state just enough to run the handlers */ | 596 | /* Now restore the host state just enough to run the handlers */ |
472 | 597 | ||
473 | /* Switch EBASE to the one used by Linux */ | 598 | /* Switch EBASE to the one used by Linux */ |
diff --git a/arch/mips/kvm/interrupt.c b/arch/mips/kvm/interrupt.c index e88403b3dcdd..aa0a1a00faf6 100644 --- a/arch/mips/kvm/interrupt.c +++ b/arch/mips/kvm/interrupt.c | |||
@@ -183,10 +183,11 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority, | |||
183 | (exccode << CAUSEB_EXCCODE)); | 183 | (exccode << CAUSEB_EXCCODE)); |
184 | 184 | ||
185 | /* XXXSL Set PC to the interrupt exception entry point */ | 185 | /* XXXSL Set PC to the interrupt exception entry point */ |
186 | arch->pc = kvm_mips_guest_exception_base(vcpu); | ||
186 | if (kvm_read_c0_guest_cause(cop0) & CAUSEF_IV) | 187 | if (kvm_read_c0_guest_cause(cop0) & CAUSEF_IV) |
187 | arch->pc = KVM_GUEST_KSEG0 + 0x200; | 188 | arch->pc += 0x200; |
188 | else | 189 | else |
189 | arch->pc = KVM_GUEST_KSEG0 + 0x180; | 190 | arch->pc += 0x180; |
190 | 191 | ||
191 | clear_bit(priority, &vcpu->arch.pending_exceptions); | 192 | clear_bit(priority, &vcpu->arch.pending_exceptions); |
192 | } | 193 | } |
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 29ec9ab3fd55..ed81e5ac1426 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <asm/page.h> | 22 | #include <asm/page.h> |
23 | #include <asm/cacheflush.h> | 23 | #include <asm/cacheflush.h> |
24 | #include <asm/mmu_context.h> | 24 | #include <asm/mmu_context.h> |
25 | #include <asm/pgalloc.h> | ||
25 | #include <asm/pgtable.h> | 26 | #include <asm/pgtable.h> |
26 | 27 | ||
27 | #include <linux/kvm_host.h> | 28 | #include <linux/kvm_host.h> |
@@ -63,18 +64,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
63 | {NULL} | 64 | {NULL} |
64 | }; | 65 | }; |
65 | 66 | ||
66 | static int kvm_mips_reset_vcpu(struct kvm_vcpu *vcpu) | ||
67 | { | ||
68 | int i; | ||
69 | |||
70 | for_each_possible_cpu(i) { | ||
71 | vcpu->arch.guest_kernel_asid[i] = 0; | ||
72 | vcpu->arch.guest_user_asid[i] = 0; | ||
73 | } | ||
74 | |||
75 | return 0; | ||
76 | } | ||
77 | |||
78 | /* | 67 | /* |
79 | * XXXKYMA: We are simulatoring a processor that has the WII bit set in | 68 | * XXXKYMA: We are simulatoring a processor that has the WII bit set in |
80 | * Config7, so we are "runnable" if interrupts are pending | 69 | * Config7, so we are "runnable" if interrupts are pending |
@@ -104,39 +93,12 @@ void kvm_arch_check_processor_compat(void *rtn) | |||
104 | *(int *)rtn = 0; | 93 | *(int *)rtn = 0; |
105 | } | 94 | } |
106 | 95 | ||
107 | static void kvm_mips_init_tlbs(struct kvm *kvm) | ||
108 | { | ||
109 | unsigned long wired; | ||
110 | |||
111 | /* | ||
112 | * Add a wired entry to the TLB, it is used to map the commpage to | ||
113 | * the Guest kernel | ||
114 | */ | ||
115 | wired = read_c0_wired(); | ||
116 | write_c0_wired(wired + 1); | ||
117 | mtc0_tlbw_hazard(); | ||
118 | kvm->arch.commpage_tlb = wired; | ||
119 | |||
120 | kvm_debug("[%d] commpage TLB: %d\n", smp_processor_id(), | ||
121 | kvm->arch.commpage_tlb); | ||
122 | } | ||
123 | |||
124 | static void kvm_mips_init_vm_percpu(void *arg) | ||
125 | { | ||
126 | struct kvm *kvm = (struct kvm *)arg; | ||
127 | |||
128 | kvm_mips_init_tlbs(kvm); | ||
129 | kvm_mips_callbacks->vm_init(kvm); | ||
130 | |||
131 | } | ||
132 | |||
133 | int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | 96 | int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) |
134 | { | 97 | { |
135 | if (atomic_inc_return(&kvm_mips_instance) == 1) { | 98 | /* Allocate page table to map GPA -> RPA */ |
136 | kvm_debug("%s: 1st KVM instance, setup host TLB parameters\n", | 99 | kvm->arch.gpa_mm.pgd = kvm_pgd_alloc(); |
137 | __func__); | 100 | if (!kvm->arch.gpa_mm.pgd) |
138 | on_each_cpu(kvm_mips_init_vm_percpu, kvm, 1); | 101 | return -ENOMEM; |
139 | } | ||
140 | 102 | ||
141 | return 0; | 103 | return 0; |
142 | } | 104 | } |
@@ -156,13 +118,6 @@ void kvm_mips_free_vcpus(struct kvm *kvm) | |||
156 | unsigned int i; | 118 | unsigned int i; |
157 | struct kvm_vcpu *vcpu; | 119 | struct kvm_vcpu *vcpu; |
158 | 120 | ||
159 | /* Put the pages we reserved for the guest pmap */ | ||
160 | for (i = 0; i < kvm->arch.guest_pmap_npages; i++) { | ||
161 | if (kvm->arch.guest_pmap[i] != KVM_INVALID_PAGE) | ||
162 | kvm_release_pfn_clean(kvm->arch.guest_pmap[i]); | ||
163 | } | ||
164 | kfree(kvm->arch.guest_pmap); | ||
165 | |||
166 | kvm_for_each_vcpu(i, vcpu, kvm) { | 121 | kvm_for_each_vcpu(i, vcpu, kvm) { |
167 | kvm_arch_vcpu_free(vcpu); | 122 | kvm_arch_vcpu_free(vcpu); |
168 | } | 123 | } |
@@ -177,25 +132,17 @@ void kvm_mips_free_vcpus(struct kvm *kvm) | |||
177 | mutex_unlock(&kvm->lock); | 132 | mutex_unlock(&kvm->lock); |
178 | } | 133 | } |
179 | 134 | ||
180 | static void kvm_mips_uninit_tlbs(void *arg) | 135 | static void kvm_mips_free_gpa_pt(struct kvm *kvm) |
181 | { | 136 | { |
182 | /* Restore wired count */ | 137 | /* It should always be safe to remove after flushing the whole range */ |
183 | write_c0_wired(0); | 138 | WARN_ON(!kvm_mips_flush_gpa_pt(kvm, 0, ~0)); |
184 | mtc0_tlbw_hazard(); | 139 | pgd_free(NULL, kvm->arch.gpa_mm.pgd); |
185 | /* Clear out all the TLBs */ | ||
186 | kvm_local_flush_tlb_all(); | ||
187 | } | 140 | } |
188 | 141 | ||
189 | void kvm_arch_destroy_vm(struct kvm *kvm) | 142 | void kvm_arch_destroy_vm(struct kvm *kvm) |
190 | { | 143 | { |
191 | kvm_mips_free_vcpus(kvm); | 144 | kvm_mips_free_vcpus(kvm); |
192 | 145 | kvm_mips_free_gpa_pt(kvm); | |
193 | /* If this is the last instance, restore wired count */ | ||
194 | if (atomic_dec_return(&kvm_mips_instance) == 0) { | ||
195 | kvm_debug("%s: last KVM instance, restoring TLB parameters\n", | ||
196 | __func__); | ||
197 | on_each_cpu(kvm_mips_uninit_tlbs, NULL, 1); | ||
198 | } | ||
199 | } | 146 | } |
200 | 147 | ||
201 | long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, | 148 | long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, |
@@ -210,6 +157,32 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | |||
210 | return 0; | 157 | return 0; |
211 | } | 158 | } |
212 | 159 | ||
160 | void kvm_arch_flush_shadow_all(struct kvm *kvm) | ||
161 | { | ||
162 | /* Flush whole GPA */ | ||
163 | kvm_mips_flush_gpa_pt(kvm, 0, ~0); | ||
164 | |||
165 | /* Let implementation do the rest */ | ||
166 | kvm_mips_callbacks->flush_shadow_all(kvm); | ||
167 | } | ||
168 | |||
169 | void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | ||
170 | struct kvm_memory_slot *slot) | ||
171 | { | ||
172 | /* | ||
173 | * The slot has been made invalid (ready for moving or deletion), so we | ||
174 | * need to ensure that it can no longer be accessed by any guest VCPUs. | ||
175 | */ | ||
176 | |||
177 | spin_lock(&kvm->mmu_lock); | ||
178 | /* Flush slot from GPA */ | ||
179 | kvm_mips_flush_gpa_pt(kvm, slot->base_gfn, | ||
180 | slot->base_gfn + slot->npages - 1); | ||
181 | /* Let implementation do the rest */ | ||
182 | kvm_mips_callbacks->flush_shadow_memslot(kvm, slot); | ||
183 | spin_unlock(&kvm->mmu_lock); | ||
184 | } | ||
185 | |||
213 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 186 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
214 | struct kvm_memory_slot *memslot, | 187 | struct kvm_memory_slot *memslot, |
215 | const struct kvm_userspace_memory_region *mem, | 188 | const struct kvm_userspace_memory_region *mem, |
@@ -224,35 +197,32 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
224 | const struct kvm_memory_slot *new, | 197 | const struct kvm_memory_slot *new, |
225 | enum kvm_mr_change change) | 198 | enum kvm_mr_change change) |
226 | { | 199 | { |
227 | unsigned long npages = 0; | 200 | int needs_flush; |
228 | int i; | ||
229 | 201 | ||
230 | kvm_debug("%s: kvm: %p slot: %d, GPA: %llx, size: %llx, QVA: %llx\n", | 202 | kvm_debug("%s: kvm: %p slot: %d, GPA: %llx, size: %llx, QVA: %llx\n", |
231 | __func__, kvm, mem->slot, mem->guest_phys_addr, | 203 | __func__, kvm, mem->slot, mem->guest_phys_addr, |
232 | mem->memory_size, mem->userspace_addr); | 204 | mem->memory_size, mem->userspace_addr); |
233 | 205 | ||
234 | /* Setup Guest PMAP table */ | 206 | /* |
235 | if (!kvm->arch.guest_pmap) { | 207 | * If dirty page logging is enabled, write protect all pages in the slot |
236 | if (mem->slot == 0) | 208 | * ready for dirty logging. |
237 | npages = mem->memory_size >> PAGE_SHIFT; | 209 | * |
238 | 210 | * There is no need to do this in any of the following cases: | |
239 | if (npages) { | 211 | * CREATE: No dirty mappings will already exist. |
240 | kvm->arch.guest_pmap_npages = npages; | 212 | * MOVE/DELETE: The old mappings will already have been cleaned up by |
241 | kvm->arch.guest_pmap = | 213 | * kvm_arch_flush_shadow_memslot() |
242 | kzalloc(npages * sizeof(unsigned long), GFP_KERNEL); | 214 | */ |
243 | 215 | if (change == KVM_MR_FLAGS_ONLY && | |
244 | if (!kvm->arch.guest_pmap) { | 216 | (!(old->flags & KVM_MEM_LOG_DIRTY_PAGES) && |
245 | kvm_err("Failed to allocate guest PMAP\n"); | 217 | new->flags & KVM_MEM_LOG_DIRTY_PAGES)) { |
246 | return; | 218 | spin_lock(&kvm->mmu_lock); |
247 | } | 219 | /* Write protect GPA page table entries */ |
248 | 220 | needs_flush = kvm_mips_mkclean_gpa_pt(kvm, new->base_gfn, | |
249 | kvm_debug("Allocated space for Guest PMAP Table (%ld pages) @ %p\n", | 221 | new->base_gfn + new->npages - 1); |
250 | npages, kvm->arch.guest_pmap); | 222 | /* Let implementation do the rest */ |
251 | 223 | if (needs_flush) | |
252 | /* Now setup the page table */ | 224 | kvm_mips_callbacks->flush_shadow_memslot(kvm, new); |
253 | for (i = 0; i < npages; i++) | 225 | spin_unlock(&kvm->mmu_lock); |
254 | kvm->arch.guest_pmap[i] = KVM_INVALID_PAGE; | ||
255 | } | ||
256 | } | 226 | } |
257 | } | 227 | } |
258 | 228 | ||
@@ -276,7 +246,7 @@ static inline void dump_handler(const char *symbol, void *start, void *end) | |||
276 | struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) | 246 | struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) |
277 | { | 247 | { |
278 | int err, size; | 248 | int err, size; |
279 | void *gebase, *p, *handler; | 249 | void *gebase, *p, *handler, *refill_start, *refill_end; |
280 | int i; | 250 | int i; |
281 | 251 | ||
282 | struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); | 252 | struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); |
@@ -329,8 +299,9 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) | |||
329 | /* Build guest exception vectors dynamically in unmapped memory */ | 299 | /* Build guest exception vectors dynamically in unmapped memory */ |
330 | handler = gebase + 0x2000; | 300 | handler = gebase + 0x2000; |
331 | 301 | ||
332 | /* TLB Refill, EXL = 0 */ | 302 | /* TLB refill */ |
333 | kvm_mips_build_exception(gebase, handler); | 303 | refill_start = gebase; |
304 | refill_end = kvm_mips_build_tlb_refill_exception(refill_start, handler); | ||
334 | 305 | ||
335 | /* General Exception Entry point */ | 306 | /* General Exception Entry point */ |
336 | kvm_mips_build_exception(gebase + 0x180, handler); | 307 | kvm_mips_build_exception(gebase + 0x180, handler); |
@@ -356,6 +327,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) | |||
356 | pr_debug("#include <asm/regdef.h>\n"); | 327 | pr_debug("#include <asm/regdef.h>\n"); |
357 | pr_debug("\n"); | 328 | pr_debug("\n"); |
358 | dump_handler("kvm_vcpu_run", vcpu->arch.vcpu_run, p); | 329 | dump_handler("kvm_vcpu_run", vcpu->arch.vcpu_run, p); |
330 | dump_handler("kvm_tlb_refill", refill_start, refill_end); | ||
359 | dump_handler("kvm_gen_exc", gebase + 0x180, gebase + 0x200); | 331 | dump_handler("kvm_gen_exc", gebase + 0x180, gebase + 0x200); |
360 | dump_handler("kvm_exit", gebase + 0x2000, vcpu->arch.vcpu_run); | 332 | dump_handler("kvm_exit", gebase + 0x2000, vcpu->arch.vcpu_run); |
361 | 333 | ||
@@ -406,6 +378,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | |||
406 | 378 | ||
407 | kvm_mips_dump_stats(vcpu); | 379 | kvm_mips_dump_stats(vcpu); |
408 | 380 | ||
381 | kvm_mmu_free_memory_caches(vcpu); | ||
409 | kfree(vcpu->arch.guest_ebase); | 382 | kfree(vcpu->arch.guest_ebase); |
410 | kfree(vcpu->arch.kseg0_commpage); | 383 | kfree(vcpu->arch.kseg0_commpage); |
411 | kfree(vcpu); | 384 | kfree(vcpu); |
@@ -422,37 +395,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
422 | return -ENOIOCTLCMD; | 395 | return -ENOIOCTLCMD; |
423 | } | 396 | } |
424 | 397 | ||
425 | /* Must be called with preemption disabled, just before entering guest */ | ||
426 | static void kvm_mips_check_asids(struct kvm_vcpu *vcpu) | ||
427 | { | ||
428 | struct mips_coproc *cop0 = vcpu->arch.cop0; | ||
429 | int i, cpu = smp_processor_id(); | ||
430 | unsigned int gasid; | ||
431 | |||
432 | /* | ||
433 | * Lazy host ASID regeneration for guest user mode. | ||
434 | * If the guest ASID has changed since the last guest usermode | ||
435 | * execution, regenerate the host ASID so as to invalidate stale TLB | ||
436 | * entries. | ||
437 | */ | ||
438 | if (!KVM_GUEST_KERNEL_MODE(vcpu)) { | ||
439 | gasid = kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID; | ||
440 | if (gasid != vcpu->arch.last_user_gasid) { | ||
441 | kvm_get_new_mmu_context(&vcpu->arch.guest_user_mm, cpu, | ||
442 | vcpu); | ||
443 | vcpu->arch.guest_user_asid[cpu] = | ||
444 | vcpu->arch.guest_user_mm.context.asid[cpu]; | ||
445 | for_each_possible_cpu(i) | ||
446 | if (i != cpu) | ||
447 | vcpu->arch.guest_user_asid[cpu] = 0; | ||
448 | vcpu->arch.last_user_gasid = gasid; | ||
449 | } | ||
450 | } | ||
451 | } | ||
452 | |||
453 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | 398 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) |
454 | { | 399 | { |
455 | int r = 0; | 400 | int r = -EINTR; |
456 | sigset_t sigsaved; | 401 | sigset_t sigsaved; |
457 | 402 | ||
458 | if (vcpu->sigset_active) | 403 | if (vcpu->sigset_active) |
@@ -464,31 +409,30 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
464 | vcpu->mmio_needed = 0; | 409 | vcpu->mmio_needed = 0; |
465 | } | 410 | } |
466 | 411 | ||
412 | if (run->immediate_exit) | ||
413 | goto out; | ||
414 | |||
467 | lose_fpu(1); | 415 | lose_fpu(1); |
468 | 416 | ||
469 | local_irq_disable(); | 417 | local_irq_disable(); |
470 | /* Check if we have any exceptions/interrupts pending */ | ||
471 | kvm_mips_deliver_interrupts(vcpu, | ||
472 | kvm_read_c0_guest_cause(vcpu->arch.cop0)); | ||
473 | |||
474 | guest_enter_irqoff(); | 418 | guest_enter_irqoff(); |
475 | |||
476 | /* Disable hardware page table walking while in guest */ | ||
477 | htw_stop(); | ||
478 | |||
479 | trace_kvm_enter(vcpu); | 419 | trace_kvm_enter(vcpu); |
480 | 420 | ||
481 | kvm_mips_check_asids(vcpu); | 421 | /* |
482 | 422 | * Make sure the read of VCPU requests in vcpu_run() callback is not | |
483 | r = vcpu->arch.vcpu_run(run, vcpu); | 423 | * reordered ahead of the write to vcpu->mode, or we could miss a TLB |
484 | trace_kvm_out(vcpu); | 424 | * flush request while the requester sees the VCPU as outside of guest |
425 | * mode and not needing an IPI. | ||
426 | */ | ||
427 | smp_store_mb(vcpu->mode, IN_GUEST_MODE); | ||
485 | 428 | ||
486 | /* Re-enable HTW before enabling interrupts */ | 429 | r = kvm_mips_callbacks->vcpu_run(run, vcpu); |
487 | htw_start(); | ||
488 | 430 | ||
431 | trace_kvm_out(vcpu); | ||
489 | guest_exit_irqoff(); | 432 | guest_exit_irqoff(); |
490 | local_irq_enable(); | 433 | local_irq_enable(); |
491 | 434 | ||
435 | out: | ||
492 | if (vcpu->sigset_active) | 436 | if (vcpu->sigset_active) |
493 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 437 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
494 | 438 | ||
@@ -580,33 +524,6 @@ static u64 kvm_mips_get_one_regs[] = { | |||
580 | KVM_REG_MIPS_LO, | 524 | KVM_REG_MIPS_LO, |
581 | #endif | 525 | #endif |
582 | KVM_REG_MIPS_PC, | 526 | KVM_REG_MIPS_PC, |
583 | |||
584 | KVM_REG_MIPS_CP0_INDEX, | ||
585 | KVM_REG_MIPS_CP0_CONTEXT, | ||
586 | KVM_REG_MIPS_CP0_USERLOCAL, | ||
587 | KVM_REG_MIPS_CP0_PAGEMASK, | ||
588 | KVM_REG_MIPS_CP0_WIRED, | ||
589 | KVM_REG_MIPS_CP0_HWRENA, | ||
590 | KVM_REG_MIPS_CP0_BADVADDR, | ||
591 | KVM_REG_MIPS_CP0_COUNT, | ||
592 | KVM_REG_MIPS_CP0_ENTRYHI, | ||
593 | KVM_REG_MIPS_CP0_COMPARE, | ||
594 | KVM_REG_MIPS_CP0_STATUS, | ||
595 | KVM_REG_MIPS_CP0_CAUSE, | ||
596 | KVM_REG_MIPS_CP0_EPC, | ||
597 | KVM_REG_MIPS_CP0_PRID, | ||
598 | KVM_REG_MIPS_CP0_CONFIG, | ||
599 | KVM_REG_MIPS_CP0_CONFIG1, | ||
600 | KVM_REG_MIPS_CP0_CONFIG2, | ||
601 | KVM_REG_MIPS_CP0_CONFIG3, | ||
602 | KVM_REG_MIPS_CP0_CONFIG4, | ||
603 | KVM_REG_MIPS_CP0_CONFIG5, | ||
604 | KVM_REG_MIPS_CP0_CONFIG7, | ||
605 | KVM_REG_MIPS_CP0_ERROREPC, | ||
606 | |||
607 | KVM_REG_MIPS_COUNT_CTL, | ||
608 | KVM_REG_MIPS_COUNT_RESUME, | ||
609 | KVM_REG_MIPS_COUNT_HZ, | ||
610 | }; | 527 | }; |
611 | 528 | ||
612 | static u64 kvm_mips_get_one_regs_fpu[] = { | 529 | static u64 kvm_mips_get_one_regs_fpu[] = { |
@@ -619,15 +536,6 @@ static u64 kvm_mips_get_one_regs_msa[] = { | |||
619 | KVM_REG_MIPS_MSA_CSR, | 536 | KVM_REG_MIPS_MSA_CSR, |
620 | }; | 537 | }; |
621 | 538 | ||
622 | static u64 kvm_mips_get_one_regs_kscratch[] = { | ||
623 | KVM_REG_MIPS_CP0_KSCRATCH1, | ||
624 | KVM_REG_MIPS_CP0_KSCRATCH2, | ||
625 | KVM_REG_MIPS_CP0_KSCRATCH3, | ||
626 | KVM_REG_MIPS_CP0_KSCRATCH4, | ||
627 | KVM_REG_MIPS_CP0_KSCRATCH5, | ||
628 | KVM_REG_MIPS_CP0_KSCRATCH6, | ||
629 | }; | ||
630 | |||
631 | static unsigned long kvm_mips_num_regs(struct kvm_vcpu *vcpu) | 539 | static unsigned long kvm_mips_num_regs(struct kvm_vcpu *vcpu) |
632 | { | 540 | { |
633 | unsigned long ret; | 541 | unsigned long ret; |
@@ -641,7 +549,6 @@ static unsigned long kvm_mips_num_regs(struct kvm_vcpu *vcpu) | |||
641 | } | 549 | } |
642 | if (kvm_mips_guest_can_have_msa(&vcpu->arch)) | 550 | if (kvm_mips_guest_can_have_msa(&vcpu->arch)) |
643 | ret += ARRAY_SIZE(kvm_mips_get_one_regs_msa) + 32; | 551 | ret += ARRAY_SIZE(kvm_mips_get_one_regs_msa) + 32; |
644 | ret += __arch_hweight8(vcpu->arch.kscratch_enabled); | ||
645 | ret += kvm_mips_callbacks->num_regs(vcpu); | 552 | ret += kvm_mips_callbacks->num_regs(vcpu); |
646 | 553 | ||
647 | return ret; | 554 | return ret; |
@@ -694,16 +601,6 @@ static int kvm_mips_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices) | |||
694 | } | 601 | } |
695 | } | 602 | } |
696 | 603 | ||
697 | for (i = 0; i < 6; ++i) { | ||
698 | if (!(vcpu->arch.kscratch_enabled & BIT(i + 2))) | ||
699 | continue; | ||
700 | |||
701 | if (copy_to_user(indices, &kvm_mips_get_one_regs_kscratch[i], | ||
702 | sizeof(kvm_mips_get_one_regs_kscratch[i]))) | ||
703 | return -EFAULT; | ||
704 | ++indices; | ||
705 | } | ||
706 | |||
707 | return kvm_mips_callbacks->copy_reg_indices(vcpu, indices); | 604 | return kvm_mips_callbacks->copy_reg_indices(vcpu, indices); |
708 | } | 605 | } |
709 | 606 | ||
@@ -794,95 +691,6 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, | |||
794 | v = fpu->msacsr; | 691 | v = fpu->msacsr; |
795 | break; | 692 | break; |
796 | 693 | ||
797 | /* Co-processor 0 registers */ | ||
798 | case KVM_REG_MIPS_CP0_INDEX: | ||
799 | v = (long)kvm_read_c0_guest_index(cop0); | ||
800 | break; | ||
801 | case KVM_REG_MIPS_CP0_CONTEXT: | ||
802 | v = (long)kvm_read_c0_guest_context(cop0); | ||
803 | break; | ||
804 | case KVM_REG_MIPS_CP0_USERLOCAL: | ||
805 | v = (long)kvm_read_c0_guest_userlocal(cop0); | ||
806 | break; | ||
807 | case KVM_REG_MIPS_CP0_PAGEMASK: | ||
808 | v = (long)kvm_read_c0_guest_pagemask(cop0); | ||
809 | break; | ||
810 | case KVM_REG_MIPS_CP0_WIRED: | ||
811 | v = (long)kvm_read_c0_guest_wired(cop0); | ||
812 | break; | ||
813 | case KVM_REG_MIPS_CP0_HWRENA: | ||
814 | v = (long)kvm_read_c0_guest_hwrena(cop0); | ||
815 | break; | ||
816 | case KVM_REG_MIPS_CP0_BADVADDR: | ||
817 | v = (long)kvm_read_c0_guest_badvaddr(cop0); | ||
818 | break; | ||
819 | case KVM_REG_MIPS_CP0_ENTRYHI: | ||
820 | v = (long)kvm_read_c0_guest_entryhi(cop0); | ||
821 | break; | ||
822 | case KVM_REG_MIPS_CP0_COMPARE: | ||
823 | v = (long)kvm_read_c0_guest_compare(cop0); | ||
824 | break; | ||
825 | case KVM_REG_MIPS_CP0_STATUS: | ||
826 | v = (long)kvm_read_c0_guest_status(cop0); | ||
827 | break; | ||
828 | case KVM_REG_MIPS_CP0_CAUSE: | ||
829 | v = (long)kvm_read_c0_guest_cause(cop0); | ||
830 | break; | ||
831 | case KVM_REG_MIPS_CP0_EPC: | ||
832 | v = (long)kvm_read_c0_guest_epc(cop0); | ||
833 | break; | ||
834 | case KVM_REG_MIPS_CP0_PRID: | ||
835 | v = (long)kvm_read_c0_guest_prid(cop0); | ||
836 | break; | ||
837 | case KVM_REG_MIPS_CP0_CONFIG: | ||
838 | v = (long)kvm_read_c0_guest_config(cop0); | ||
839 | break; | ||
840 | case KVM_REG_MIPS_CP0_CONFIG1: | ||
841 | v = (long)kvm_read_c0_guest_config1(cop0); | ||
842 | break; | ||
843 | case KVM_REG_MIPS_CP0_CONFIG2: | ||
844 | v = (long)kvm_read_c0_guest_config2(cop0); | ||
845 | break; | ||
846 | case KVM_REG_MIPS_CP0_CONFIG3: | ||
847 | v = (long)kvm_read_c0_guest_config3(cop0); | ||
848 | break; | ||
849 | case KVM_REG_MIPS_CP0_CONFIG4: | ||
850 | v = (long)kvm_read_c0_guest_config4(cop0); | ||
851 | break; | ||
852 | case KVM_REG_MIPS_CP0_CONFIG5: | ||
853 | v = (long)kvm_read_c0_guest_config5(cop0); | ||
854 | break; | ||
855 | case KVM_REG_MIPS_CP0_CONFIG7: | ||
856 | v = (long)kvm_read_c0_guest_config7(cop0); | ||
857 | break; | ||
858 | case KVM_REG_MIPS_CP0_ERROREPC: | ||
859 | v = (long)kvm_read_c0_guest_errorepc(cop0); | ||
860 | break; | ||
861 | case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6: | ||
862 | idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2; | ||
863 | if (!(vcpu->arch.kscratch_enabled & BIT(idx))) | ||
864 | return -EINVAL; | ||
865 | switch (idx) { | ||
866 | case 2: | ||
867 | v = (long)kvm_read_c0_guest_kscratch1(cop0); | ||
868 | break; | ||
869 | case 3: | ||
870 | v = (long)kvm_read_c0_guest_kscratch2(cop0); | ||
871 | break; | ||
872 | case 4: | ||
873 | v = (long)kvm_read_c0_guest_kscratch3(cop0); | ||
874 | break; | ||
875 | case 5: | ||
876 | v = (long)kvm_read_c0_guest_kscratch4(cop0); | ||
877 | break; | ||
878 | case 6: | ||
879 | v = (long)kvm_read_c0_guest_kscratch5(cop0); | ||
880 | break; | ||
881 | case 7: | ||
882 | v = (long)kvm_read_c0_guest_kscratch6(cop0); | ||
883 | break; | ||
884 | } | ||
885 | break; | ||
886 | /* registers to be handled specially */ | 694 | /* registers to be handled specially */ |
887 | default: | 695 | default: |
888 | ret = kvm_mips_callbacks->get_one_reg(vcpu, reg, &v); | 696 | ret = kvm_mips_callbacks->get_one_reg(vcpu, reg, &v); |
@@ -1014,68 +822,6 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, | |||
1014 | fpu->msacsr = v; | 822 | fpu->msacsr = v; |
1015 | break; | 823 | break; |
1016 | 824 | ||
1017 | /* Co-processor 0 registers */ | ||
1018 | case KVM_REG_MIPS_CP0_INDEX: | ||
1019 | kvm_write_c0_guest_index(cop0, v); | ||
1020 | break; | ||
1021 | case KVM_REG_MIPS_CP0_CONTEXT: | ||
1022 | kvm_write_c0_guest_context(cop0, v); | ||
1023 | break; | ||
1024 | case KVM_REG_MIPS_CP0_USERLOCAL: | ||
1025 | kvm_write_c0_guest_userlocal(cop0, v); | ||
1026 | break; | ||
1027 | case KVM_REG_MIPS_CP0_PAGEMASK: | ||
1028 | kvm_write_c0_guest_pagemask(cop0, v); | ||
1029 | break; | ||
1030 | case KVM_REG_MIPS_CP0_WIRED: | ||
1031 | kvm_write_c0_guest_wired(cop0, v); | ||
1032 | break; | ||
1033 | case KVM_REG_MIPS_CP0_HWRENA: | ||
1034 | kvm_write_c0_guest_hwrena(cop0, v); | ||
1035 | break; | ||
1036 | case KVM_REG_MIPS_CP0_BADVADDR: | ||
1037 | kvm_write_c0_guest_badvaddr(cop0, v); | ||
1038 | break; | ||
1039 | case KVM_REG_MIPS_CP0_ENTRYHI: | ||
1040 | kvm_write_c0_guest_entryhi(cop0, v); | ||
1041 | break; | ||
1042 | case KVM_REG_MIPS_CP0_STATUS: | ||
1043 | kvm_write_c0_guest_status(cop0, v); | ||
1044 | break; | ||
1045 | case KVM_REG_MIPS_CP0_EPC: | ||
1046 | kvm_write_c0_guest_epc(cop0, v); | ||
1047 | break; | ||
1048 | case KVM_REG_MIPS_CP0_PRID: | ||
1049 | kvm_write_c0_guest_prid(cop0, v); | ||
1050 | break; | ||
1051 | case KVM_REG_MIPS_CP0_ERROREPC: | ||
1052 | kvm_write_c0_guest_errorepc(cop0, v); | ||
1053 | break; | ||
1054 | case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6: | ||
1055 | idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2; | ||
1056 | if (!(vcpu->arch.kscratch_enabled & BIT(idx))) | ||
1057 | return -EINVAL; | ||
1058 | switch (idx) { | ||
1059 | case 2: | ||
1060 | kvm_write_c0_guest_kscratch1(cop0, v); | ||
1061 | break; | ||
1062 | case 3: | ||
1063 | kvm_write_c0_guest_kscratch2(cop0, v); | ||
1064 | break; | ||
1065 | case 4: | ||
1066 | kvm_write_c0_guest_kscratch3(cop0, v); | ||
1067 | break; | ||
1068 | case 5: | ||
1069 | kvm_write_c0_guest_kscratch4(cop0, v); | ||
1070 | break; | ||
1071 | case 6: | ||
1072 | kvm_write_c0_guest_kscratch5(cop0, v); | ||
1073 | break; | ||
1074 | case 7: | ||
1075 | kvm_write_c0_guest_kscratch6(cop0, v); | ||
1076 | break; | ||
1077 | } | ||
1078 | break; | ||
1079 | /* registers to be handled specially */ | 825 | /* registers to be handled specially */ |
1080 | default: | 826 | default: |
1081 | return kvm_mips_callbacks->set_one_reg(vcpu, reg, v); | 827 | return kvm_mips_callbacks->set_one_reg(vcpu, reg, v); |
@@ -1144,18 +890,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, | |||
1144 | return -E2BIG; | 890 | return -E2BIG; |
1145 | return kvm_mips_copy_reg_indices(vcpu, user_list->reg); | 891 | return kvm_mips_copy_reg_indices(vcpu, user_list->reg); |
1146 | } | 892 | } |
1147 | case KVM_NMI: | ||
1148 | /* Treat the NMI as a CPU reset */ | ||
1149 | r = kvm_mips_reset_vcpu(vcpu); | ||
1150 | break; | ||
1151 | case KVM_INTERRUPT: | 893 | case KVM_INTERRUPT: |
1152 | { | 894 | { |
1153 | struct kvm_mips_interrupt irq; | 895 | struct kvm_mips_interrupt irq; |
1154 | 896 | ||
1155 | r = -EFAULT; | ||
1156 | if (copy_from_user(&irq, argp, sizeof(irq))) | 897 | if (copy_from_user(&irq, argp, sizeof(irq))) |
1157 | goto out; | 898 | return -EFAULT; |
1158 | |||
1159 | kvm_debug("[%d] %s: irq: %d\n", vcpu->vcpu_id, __func__, | 899 | kvm_debug("[%d] %s: irq: %d\n", vcpu->vcpu_id, __func__, |
1160 | irq.irq); | 900 | irq.irq); |
1161 | 901 | ||
@@ -1165,56 +905,57 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, | |||
1165 | case KVM_ENABLE_CAP: { | 905 | case KVM_ENABLE_CAP: { |
1166 | struct kvm_enable_cap cap; | 906 | struct kvm_enable_cap cap; |
1167 | 907 | ||
1168 | r = -EFAULT; | ||
1169 | if (copy_from_user(&cap, argp, sizeof(cap))) | 908 | if (copy_from_user(&cap, argp, sizeof(cap))) |
1170 | goto out; | 909 | return -EFAULT; |
1171 | r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); | 910 | r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); |
1172 | break; | 911 | break; |
1173 | } | 912 | } |
1174 | default: | 913 | default: |
1175 | r = -ENOIOCTLCMD; | 914 | r = -ENOIOCTLCMD; |
1176 | } | 915 | } |
1177 | |||
1178 | out: | ||
1179 | return r; | 916 | return r; |
1180 | } | 917 | } |
1181 | 918 | ||
1182 | /* Get (and clear) the dirty memory log for a memory slot. */ | 919 | /** |
920 | * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot | ||
921 | * @kvm: kvm instance | ||
922 | * @log: slot id and address to which we copy the log | ||
923 | * | ||
924 | * Steps 1-4 below provide general overview of dirty page logging. See | ||
925 | * kvm_get_dirty_log_protect() function description for additional details. | ||
926 | * | ||
927 | * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we | ||
928 | * always flush the TLB (step 4) even if previous step failed and the dirty | ||
929 | * bitmap may be corrupt. Regardless of previous outcome the KVM logging API | ||
930 | * does not preclude user space subsequent dirty log read. Flushing TLB ensures | ||
931 | * writes will be marked dirty for next log read. | ||
932 | * | ||
933 | * 1. Take a snapshot of the bit and clear it if needed. | ||
934 | * 2. Write protect the corresponding page. | ||
935 | * 3. Copy the snapshot to the userspace. | ||
936 | * 4. Flush TLB's if needed. | ||
937 | */ | ||
1183 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) | 938 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) |
1184 | { | 939 | { |
1185 | struct kvm_memslots *slots; | 940 | struct kvm_memslots *slots; |
1186 | struct kvm_memory_slot *memslot; | 941 | struct kvm_memory_slot *memslot; |
1187 | unsigned long ga, ga_end; | 942 | bool is_dirty = false; |
1188 | int is_dirty = 0; | ||
1189 | int r; | 943 | int r; |
1190 | unsigned long n; | ||
1191 | 944 | ||
1192 | mutex_lock(&kvm->slots_lock); | 945 | mutex_lock(&kvm->slots_lock); |
1193 | 946 | ||
1194 | r = kvm_get_dirty_log(kvm, log, &is_dirty); | 947 | r = kvm_get_dirty_log_protect(kvm, log, &is_dirty); |
1195 | if (r) | ||
1196 | goto out; | ||
1197 | 948 | ||
1198 | /* If nothing is dirty, don't bother messing with page tables. */ | ||
1199 | if (is_dirty) { | 949 | if (is_dirty) { |
1200 | slots = kvm_memslots(kvm); | 950 | slots = kvm_memslots(kvm); |
1201 | memslot = id_to_memslot(slots, log->slot); | 951 | memslot = id_to_memslot(slots, log->slot); |
1202 | 952 | ||
1203 | ga = memslot->base_gfn << PAGE_SHIFT; | 953 | /* Let implementation handle TLB/GVA invalidation */ |
1204 | ga_end = ga + (memslot->npages << PAGE_SHIFT); | 954 | kvm_mips_callbacks->flush_shadow_memslot(kvm, memslot); |
1205 | |||
1206 | kvm_info("%s: dirty, ga: %#lx, ga_end %#lx\n", __func__, ga, | ||
1207 | ga_end); | ||
1208 | |||
1209 | n = kvm_dirty_bitmap_bytes(memslot); | ||
1210 | memset(memslot->dirty_bitmap, 0, n); | ||
1211 | } | 955 | } |
1212 | 956 | ||
1213 | r = 0; | ||
1214 | out: | ||
1215 | mutex_unlock(&kvm->slots_lock); | 957 | mutex_unlock(&kvm->slots_lock); |
1216 | return r; | 958 | return r; |
1217 | |||
1218 | } | 959 | } |
1219 | 960 | ||
1220 | long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) | 961 | long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) |
@@ -1282,11 +1023,20 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
1282 | switch (ext) { | 1023 | switch (ext) { |
1283 | case KVM_CAP_ONE_REG: | 1024 | case KVM_CAP_ONE_REG: |
1284 | case KVM_CAP_ENABLE_CAP: | 1025 | case KVM_CAP_ENABLE_CAP: |
1026 | case KVM_CAP_READONLY_MEM: | ||
1027 | case KVM_CAP_SYNC_MMU: | ||
1028 | case KVM_CAP_IMMEDIATE_EXIT: | ||
1285 | r = 1; | 1029 | r = 1; |
1286 | break; | 1030 | break; |
1287 | case KVM_CAP_COALESCED_MMIO: | 1031 | case KVM_CAP_COALESCED_MMIO: |
1288 | r = KVM_COALESCED_MMIO_PAGE_OFFSET; | 1032 | r = KVM_COALESCED_MMIO_PAGE_OFFSET; |
1289 | break; | 1033 | break; |
1034 | case KVM_CAP_NR_VCPUS: | ||
1035 | r = num_online_cpus(); | ||
1036 | break; | ||
1037 | case KVM_CAP_MAX_VCPUS: | ||
1038 | r = KVM_MAX_VCPUS; | ||
1039 | break; | ||
1290 | case KVM_CAP_MIPS_FPU: | 1040 | case KVM_CAP_MIPS_FPU: |
1291 | /* We don't handle systems with inconsistent cpu_has_fpu */ | 1041 | /* We don't handle systems with inconsistent cpu_has_fpu */ |
1292 | r = !!raw_cpu_has_fpu; | 1042 | r = !!raw_cpu_has_fpu; |
@@ -1400,13 +1150,23 @@ static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer) | |||
1400 | 1150 | ||
1401 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | 1151 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) |
1402 | { | 1152 | { |
1403 | kvm_mips_callbacks->vcpu_init(vcpu); | 1153 | int err; |
1154 | |||
1155 | err = kvm_mips_callbacks->vcpu_init(vcpu); | ||
1156 | if (err) | ||
1157 | return err; | ||
1158 | |||
1404 | hrtimer_init(&vcpu->arch.comparecount_timer, CLOCK_MONOTONIC, | 1159 | hrtimer_init(&vcpu->arch.comparecount_timer, CLOCK_MONOTONIC, |
1405 | HRTIMER_MODE_REL); | 1160 | HRTIMER_MODE_REL); |
1406 | vcpu->arch.comparecount_timer.function = kvm_mips_comparecount_wakeup; | 1161 | vcpu->arch.comparecount_timer.function = kvm_mips_comparecount_wakeup; |
1407 | return 0; | 1162 | return 0; |
1408 | } | 1163 | } |
1409 | 1164 | ||
1165 | void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | ||
1166 | { | ||
1167 | kvm_mips_callbacks->vcpu_uninit(vcpu); | ||
1168 | } | ||
1169 | |||
1410 | int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | 1170 | int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, |
1411 | struct kvm_translation *tr) | 1171 | struct kvm_translation *tr) |
1412 | { | 1172 | { |
@@ -1440,8 +1200,11 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
1440 | u32 __user *opc = (u32 __user *) vcpu->arch.pc; | 1200 | u32 __user *opc = (u32 __user *) vcpu->arch.pc; |
1441 | unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; | 1201 | unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; |
1442 | enum emulation_result er = EMULATE_DONE; | 1202 | enum emulation_result er = EMULATE_DONE; |
1203 | u32 inst; | ||
1443 | int ret = RESUME_GUEST; | 1204 | int ret = RESUME_GUEST; |
1444 | 1205 | ||
1206 | vcpu->mode = OUTSIDE_GUEST_MODE; | ||
1207 | |||
1445 | /* re-enable HTW before enabling interrupts */ | 1208 | /* re-enable HTW before enabling interrupts */ |
1446 | htw_start(); | 1209 | htw_start(); |
1447 | 1210 | ||
@@ -1564,8 +1327,12 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
1564 | break; | 1327 | break; |
1565 | 1328 | ||
1566 | default: | 1329 | default: |
1330 | if (cause & CAUSEF_BD) | ||
1331 | opc += 1; | ||
1332 | inst = 0; | ||
1333 | kvm_get_badinstr(opc, vcpu, &inst); | ||
1567 | kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#lx\n", | 1334 | kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#lx\n", |
1568 | exccode, opc, kvm_get_inst(opc, vcpu), badvaddr, | 1335 | exccode, opc, inst, badvaddr, |
1569 | kvm_read_c0_guest_status(vcpu->arch.cop0)); | 1336 | kvm_read_c0_guest_status(vcpu->arch.cop0)); |
1570 | kvm_arch_vcpu_dump_regs(vcpu); | 1337 | kvm_arch_vcpu_dump_regs(vcpu); |
1571 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 1338 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
@@ -1593,7 +1360,15 @@ skip_emul: | |||
1593 | if (ret == RESUME_GUEST) { | 1360 | if (ret == RESUME_GUEST) { |
1594 | trace_kvm_reenter(vcpu); | 1361 | trace_kvm_reenter(vcpu); |
1595 | 1362 | ||
1596 | kvm_mips_check_asids(vcpu); | 1363 | /* |
1364 | * Make sure the read of VCPU requests in vcpu_reenter() | ||
1365 | * callback is not reordered ahead of the write to vcpu->mode, | ||
1366 | * or we could miss a TLB flush request while the requester sees | ||
1367 | * the VCPU as outside of guest mode and not needing an IPI. | ||
1368 | */ | ||
1369 | smp_store_mb(vcpu->mode, IN_GUEST_MODE); | ||
1370 | |||
1371 | kvm_mips_callbacks->vcpu_reenter(run, vcpu); | ||
1597 | 1372 | ||
1598 | /* | 1373 | /* |
1599 | * If FPU / MSA are enabled (i.e. the guest's FPU / MSA context | 1374 | * If FPU / MSA are enabled (i.e. the guest's FPU / MSA context |
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index 3b677c851be0..cb0faade311e 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c | |||
@@ -11,86 +11,995 @@ | |||
11 | 11 | ||
12 | #include <linux/highmem.h> | 12 | #include <linux/highmem.h> |
13 | #include <linux/kvm_host.h> | 13 | #include <linux/kvm_host.h> |
14 | #include <linux/uaccess.h> | ||
14 | #include <asm/mmu_context.h> | 15 | #include <asm/mmu_context.h> |
16 | #include <asm/pgalloc.h> | ||
15 | 17 | ||
16 | static u32 kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu) | 18 | /* |
19 | * KVM_MMU_CACHE_MIN_PAGES is the number of GPA page table translation levels | ||
20 | * for which pages need to be cached. | ||
21 | */ | ||
22 | #if defined(__PAGETABLE_PMD_FOLDED) | ||
23 | #define KVM_MMU_CACHE_MIN_PAGES 1 | ||
24 | #else | ||
25 | #define KVM_MMU_CACHE_MIN_PAGES 2 | ||
26 | #endif | ||
27 | |||
28 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | ||
29 | int min, int max) | ||
17 | { | 30 | { |
18 | int cpu = smp_processor_id(); | 31 | void *page; |
32 | |||
33 | BUG_ON(max > KVM_NR_MEM_OBJS); | ||
34 | if (cache->nobjs >= min) | ||
35 | return 0; | ||
36 | while (cache->nobjs < max) { | ||
37 | page = (void *)__get_free_page(GFP_KERNEL); | ||
38 | if (!page) | ||
39 | return -ENOMEM; | ||
40 | cache->objects[cache->nobjs++] = page; | ||
41 | } | ||
42 | return 0; | ||
43 | } | ||
19 | 44 | ||
20 | return vcpu->arch.guest_kernel_asid[cpu] & | 45 | static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) |
21 | cpu_asid_mask(&cpu_data[cpu]); | 46 | { |
47 | while (mc->nobjs) | ||
48 | free_page((unsigned long)mc->objects[--mc->nobjs]); | ||
22 | } | 49 | } |
23 | 50 | ||
24 | static u32 kvm_mips_get_user_asid(struct kvm_vcpu *vcpu) | 51 | static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) |
25 | { | 52 | { |
26 | int cpu = smp_processor_id(); | 53 | void *p; |
27 | 54 | ||
28 | return vcpu->arch.guest_user_asid[cpu] & | 55 | BUG_ON(!mc || !mc->nobjs); |
29 | cpu_asid_mask(&cpu_data[cpu]); | 56 | p = mc->objects[--mc->nobjs]; |
57 | return p; | ||
30 | } | 58 | } |
31 | 59 | ||
32 | static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn) | 60 | void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) |
33 | { | 61 | { |
34 | int srcu_idx, err = 0; | 62 | mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); |
35 | kvm_pfn_t pfn; | 63 | } |
64 | |||
65 | /** | ||
66 | * kvm_pgd_init() - Initialise KVM GPA page directory. | ||
67 | * @page: Pointer to page directory (PGD) for KVM GPA. | ||
68 | * | ||
69 | * Initialise a KVM GPA page directory with pointers to the invalid table, i.e. | ||
70 | * representing no mappings. This is similar to pgd_init(), however it | ||
71 | * initialises all the page directory pointers, not just the ones corresponding | ||
72 | * to the userland address space (since it is for the guest physical address | ||
73 | * space rather than a virtual address space). | ||
74 | */ | ||
75 | static void kvm_pgd_init(void *page) | ||
76 | { | ||
77 | unsigned long *p, *end; | ||
78 | unsigned long entry; | ||
79 | |||
80 | #ifdef __PAGETABLE_PMD_FOLDED | ||
81 | entry = (unsigned long)invalid_pte_table; | ||
82 | #else | ||
83 | entry = (unsigned long)invalid_pmd_table; | ||
84 | #endif | ||
85 | |||
86 | p = (unsigned long *)page; | ||
87 | end = p + PTRS_PER_PGD; | ||
88 | |||
89 | do { | ||
90 | p[0] = entry; | ||
91 | p[1] = entry; | ||
92 | p[2] = entry; | ||
93 | p[3] = entry; | ||
94 | p[4] = entry; | ||
95 | p += 8; | ||
96 | p[-3] = entry; | ||
97 | p[-2] = entry; | ||
98 | p[-1] = entry; | ||
99 | } while (p != end); | ||
100 | } | ||
101 | |||
102 | /** | ||
103 | * kvm_pgd_alloc() - Allocate and initialise a KVM GPA page directory. | ||
104 | * | ||
105 | * Allocate a blank KVM GPA page directory (PGD) for representing guest physical | ||
106 | * to host physical page mappings. | ||
107 | * | ||
108 | * Returns: Pointer to new KVM GPA page directory. | ||
109 | * NULL on allocation failure. | ||
110 | */ | ||
111 | pgd_t *kvm_pgd_alloc(void) | ||
112 | { | ||
113 | pgd_t *ret; | ||
114 | |||
115 | ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD_ORDER); | ||
116 | if (ret) | ||
117 | kvm_pgd_init(ret); | ||
118 | |||
119 | return ret; | ||
120 | } | ||
121 | |||
122 | /** | ||
123 | * kvm_mips_walk_pgd() - Walk page table with optional allocation. | ||
124 | * @pgd: Page directory pointer. | ||
125 | * @addr: Address to index page table using. | ||
126 | * @cache: MMU page cache to allocate new page tables from, or NULL. | ||
127 | * | ||
128 | * Walk the page tables pointed to by @pgd to find the PTE corresponding to the | ||
129 | * address @addr. If page tables don't exist for @addr, they will be created | ||
130 | * from the MMU cache if @cache is not NULL. | ||
131 | * | ||
132 | * Returns: Pointer to pte_t corresponding to @addr. | ||
133 | * NULL if a page table doesn't exist for @addr and !@cache. | ||
134 | * NULL if a page table allocation failed. | ||
135 | */ | ||
136 | static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache, | ||
137 | unsigned long addr) | ||
138 | { | ||
139 | pud_t *pud; | ||
140 | pmd_t *pmd; | ||
141 | |||
142 | pgd += pgd_index(addr); | ||
143 | if (pgd_none(*pgd)) { | ||
144 | /* Not used on MIPS yet */ | ||
145 | BUG(); | ||
146 | return NULL; | ||
147 | } | ||
148 | pud = pud_offset(pgd, addr); | ||
149 | if (pud_none(*pud)) { | ||
150 | pmd_t *new_pmd; | ||
151 | |||
152 | if (!cache) | ||
153 | return NULL; | ||
154 | new_pmd = mmu_memory_cache_alloc(cache); | ||
155 | pmd_init((unsigned long)new_pmd, | ||
156 | (unsigned long)invalid_pte_table); | ||
157 | pud_populate(NULL, pud, new_pmd); | ||
158 | } | ||
159 | pmd = pmd_offset(pud, addr); | ||
160 | if (pmd_none(*pmd)) { | ||
161 | pte_t *new_pte; | ||
162 | |||
163 | if (!cache) | ||
164 | return NULL; | ||
165 | new_pte = mmu_memory_cache_alloc(cache); | ||
166 | clear_page(new_pte); | ||
167 | pmd_populate_kernel(NULL, pmd, new_pte); | ||
168 | } | ||
169 | return pte_offset(pmd, addr); | ||
170 | } | ||
171 | |||
172 | /* Caller must hold kvm->mm_lock */ | ||
173 | static pte_t *kvm_mips_pte_for_gpa(struct kvm *kvm, | ||
174 | struct kvm_mmu_memory_cache *cache, | ||
175 | unsigned long addr) | ||
176 | { | ||
177 | return kvm_mips_walk_pgd(kvm->arch.gpa_mm.pgd, cache, addr); | ||
178 | } | ||
179 | |||
180 | /* | ||
181 | * kvm_mips_flush_gpa_{pte,pmd,pud,pgd,pt}. | ||
182 | * Flush a range of guest physical address space from the VM's GPA page tables. | ||
183 | */ | ||
184 | |||
185 | static bool kvm_mips_flush_gpa_pte(pte_t *pte, unsigned long start_gpa, | ||
186 | unsigned long end_gpa) | ||
187 | { | ||
188 | int i_min = __pte_offset(start_gpa); | ||
189 | int i_max = __pte_offset(end_gpa); | ||
190 | bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1); | ||
191 | int i; | ||
192 | |||
193 | for (i = i_min; i <= i_max; ++i) { | ||
194 | if (!pte_present(pte[i])) | ||
195 | continue; | ||
196 | |||
197 | set_pte(pte + i, __pte(0)); | ||
198 | } | ||
199 | return safe_to_remove; | ||
200 | } | ||
201 | |||
202 | static bool kvm_mips_flush_gpa_pmd(pmd_t *pmd, unsigned long start_gpa, | ||
203 | unsigned long end_gpa) | ||
204 | { | ||
205 | pte_t *pte; | ||
206 | unsigned long end = ~0ul; | ||
207 | int i_min = __pmd_offset(start_gpa); | ||
208 | int i_max = __pmd_offset(end_gpa); | ||
209 | bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1); | ||
210 | int i; | ||
211 | |||
212 | for (i = i_min; i <= i_max; ++i, start_gpa = 0) { | ||
213 | if (!pmd_present(pmd[i])) | ||
214 | continue; | ||
215 | |||
216 | pte = pte_offset(pmd + i, 0); | ||
217 | if (i == i_max) | ||
218 | end = end_gpa; | ||
219 | |||
220 | if (kvm_mips_flush_gpa_pte(pte, start_gpa, end)) { | ||
221 | pmd_clear(pmd + i); | ||
222 | pte_free_kernel(NULL, pte); | ||
223 | } else { | ||
224 | safe_to_remove = false; | ||
225 | } | ||
226 | } | ||
227 | return safe_to_remove; | ||
228 | } | ||
229 | |||
230 | static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa, | ||
231 | unsigned long end_gpa) | ||
232 | { | ||
233 | pmd_t *pmd; | ||
234 | unsigned long end = ~0ul; | ||
235 | int i_min = __pud_offset(start_gpa); | ||
236 | int i_max = __pud_offset(end_gpa); | ||
237 | bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1); | ||
238 | int i; | ||
239 | |||
240 | for (i = i_min; i <= i_max; ++i, start_gpa = 0) { | ||
241 | if (!pud_present(pud[i])) | ||
242 | continue; | ||
243 | |||
244 | pmd = pmd_offset(pud + i, 0); | ||
245 | if (i == i_max) | ||
246 | end = end_gpa; | ||
247 | |||
248 | if (kvm_mips_flush_gpa_pmd(pmd, start_gpa, end)) { | ||
249 | pud_clear(pud + i); | ||
250 | pmd_free(NULL, pmd); | ||
251 | } else { | ||
252 | safe_to_remove = false; | ||
253 | } | ||
254 | } | ||
255 | return safe_to_remove; | ||
256 | } | ||
257 | |||
258 | static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa, | ||
259 | unsigned long end_gpa) | ||
260 | { | ||
261 | pud_t *pud; | ||
262 | unsigned long end = ~0ul; | ||
263 | int i_min = pgd_index(start_gpa); | ||
264 | int i_max = pgd_index(end_gpa); | ||
265 | bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1); | ||
266 | int i; | ||
267 | |||
268 | for (i = i_min; i <= i_max; ++i, start_gpa = 0) { | ||
269 | if (!pgd_present(pgd[i])) | ||
270 | continue; | ||
271 | |||
272 | pud = pud_offset(pgd + i, 0); | ||
273 | if (i == i_max) | ||
274 | end = end_gpa; | ||
275 | |||
276 | if (kvm_mips_flush_gpa_pud(pud, start_gpa, end)) { | ||
277 | pgd_clear(pgd + i); | ||
278 | pud_free(NULL, pud); | ||
279 | } else { | ||
280 | safe_to_remove = false; | ||
281 | } | ||
282 | } | ||
283 | return safe_to_remove; | ||
284 | } | ||
285 | |||
286 | /** | ||
287 | * kvm_mips_flush_gpa_pt() - Flush a range of guest physical addresses. | ||
288 | * @kvm: KVM pointer. | ||
289 | * @start_gfn: Guest frame number of first page in GPA range to flush. | ||
290 | * @end_gfn: Guest frame number of last page in GPA range to flush. | ||
291 | * | ||
292 | * Flushes a range of GPA mappings from the GPA page tables. | ||
293 | * | ||
294 | * The caller must hold the @kvm->mmu_lock spinlock. | ||
295 | * | ||
296 | * Returns: Whether its safe to remove the top level page directory because | ||
297 | * all lower levels have been removed. | ||
298 | */ | ||
299 | bool kvm_mips_flush_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn) | ||
300 | { | ||
301 | return kvm_mips_flush_gpa_pgd(kvm->arch.gpa_mm.pgd, | ||
302 | start_gfn << PAGE_SHIFT, | ||
303 | end_gfn << PAGE_SHIFT); | ||
304 | } | ||
305 | |||
306 | #define BUILD_PTE_RANGE_OP(name, op) \ | ||
307 | static int kvm_mips_##name##_pte(pte_t *pte, unsigned long start, \ | ||
308 | unsigned long end) \ | ||
309 | { \ | ||
310 | int ret = 0; \ | ||
311 | int i_min = __pte_offset(start); \ | ||
312 | int i_max = __pte_offset(end); \ | ||
313 | int i; \ | ||
314 | pte_t old, new; \ | ||
315 | \ | ||
316 | for (i = i_min; i <= i_max; ++i) { \ | ||
317 | if (!pte_present(pte[i])) \ | ||
318 | continue; \ | ||
319 | \ | ||
320 | old = pte[i]; \ | ||
321 | new = op(old); \ | ||
322 | if (pte_val(new) == pte_val(old)) \ | ||
323 | continue; \ | ||
324 | set_pte(pte + i, new); \ | ||
325 | ret = 1; \ | ||
326 | } \ | ||
327 | return ret; \ | ||
328 | } \ | ||
329 | \ | ||
330 | /* returns true if anything was done */ \ | ||
331 | static int kvm_mips_##name##_pmd(pmd_t *pmd, unsigned long start, \ | ||
332 | unsigned long end) \ | ||
333 | { \ | ||
334 | int ret = 0; \ | ||
335 | pte_t *pte; \ | ||
336 | unsigned long cur_end = ~0ul; \ | ||
337 | int i_min = __pmd_offset(start); \ | ||
338 | int i_max = __pmd_offset(end); \ | ||
339 | int i; \ | ||
340 | \ | ||
341 | for (i = i_min; i <= i_max; ++i, start = 0) { \ | ||
342 | if (!pmd_present(pmd[i])) \ | ||
343 | continue; \ | ||
344 | \ | ||
345 | pte = pte_offset(pmd + i, 0); \ | ||
346 | if (i == i_max) \ | ||
347 | cur_end = end; \ | ||
348 | \ | ||
349 | ret |= kvm_mips_##name##_pte(pte, start, cur_end); \ | ||
350 | } \ | ||
351 | return ret; \ | ||
352 | } \ | ||
353 | \ | ||
354 | static int kvm_mips_##name##_pud(pud_t *pud, unsigned long start, \ | ||
355 | unsigned long end) \ | ||
356 | { \ | ||
357 | int ret = 0; \ | ||
358 | pmd_t *pmd; \ | ||
359 | unsigned long cur_end = ~0ul; \ | ||
360 | int i_min = __pud_offset(start); \ | ||
361 | int i_max = __pud_offset(end); \ | ||
362 | int i; \ | ||
363 | \ | ||
364 | for (i = i_min; i <= i_max; ++i, start = 0) { \ | ||
365 | if (!pud_present(pud[i])) \ | ||
366 | continue; \ | ||
367 | \ | ||
368 | pmd = pmd_offset(pud + i, 0); \ | ||
369 | if (i == i_max) \ | ||
370 | cur_end = end; \ | ||
371 | \ | ||
372 | ret |= kvm_mips_##name##_pmd(pmd, start, cur_end); \ | ||
373 | } \ | ||
374 | return ret; \ | ||
375 | } \ | ||
376 | \ | ||
377 | static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start, \ | ||
378 | unsigned long end) \ | ||
379 | { \ | ||
380 | int ret = 0; \ | ||
381 | pud_t *pud; \ | ||
382 | unsigned long cur_end = ~0ul; \ | ||
383 | int i_min = pgd_index(start); \ | ||
384 | int i_max = pgd_index(end); \ | ||
385 | int i; \ | ||
386 | \ | ||
387 | for (i = i_min; i <= i_max; ++i, start = 0) { \ | ||
388 | if (!pgd_present(pgd[i])) \ | ||
389 | continue; \ | ||
390 | \ | ||
391 | pud = pud_offset(pgd + i, 0); \ | ||
392 | if (i == i_max) \ | ||
393 | cur_end = end; \ | ||
394 | \ | ||
395 | ret |= kvm_mips_##name##_pud(pud, start, cur_end); \ | ||
396 | } \ | ||
397 | return ret; \ | ||
398 | } | ||
399 | |||
400 | /* | ||
401 | * kvm_mips_mkclean_gpa_pt. | ||
402 | * Mark a range of guest physical address space clean (writes fault) in the VM's | ||
403 | * GPA page table to allow dirty page tracking. | ||
404 | */ | ||
36 | 405 | ||
37 | if (kvm->arch.guest_pmap[gfn] != KVM_INVALID_PAGE) | 406 | BUILD_PTE_RANGE_OP(mkclean, pte_mkclean) |
407 | |||
408 | /** | ||
409 | * kvm_mips_mkclean_gpa_pt() - Make a range of guest physical addresses clean. | ||
410 | * @kvm: KVM pointer. | ||
411 | * @start_gfn: Guest frame number of first page in GPA range to flush. | ||
412 | * @end_gfn: Guest frame number of last page in GPA range to flush. | ||
413 | * | ||
414 | * Make a range of GPA mappings clean so that guest writes will fault and | ||
415 | * trigger dirty page logging. | ||
416 | * | ||
417 | * The caller must hold the @kvm->mmu_lock spinlock. | ||
418 | * | ||
419 | * Returns: Whether any GPA mappings were modified, which would require | ||
420 | * derived mappings (GVA page tables & TLB enties) to be | ||
421 | * invalidated. | ||
422 | */ | ||
423 | int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn) | ||
424 | { | ||
425 | return kvm_mips_mkclean_pgd(kvm->arch.gpa_mm.pgd, | ||
426 | start_gfn << PAGE_SHIFT, | ||
427 | end_gfn << PAGE_SHIFT); | ||
428 | } | ||
429 | |||
430 | /** | ||
431 | * kvm_arch_mmu_enable_log_dirty_pt_masked() - write protect dirty pages | ||
432 | * @kvm: The KVM pointer | ||
433 | * @slot: The memory slot associated with mask | ||
434 | * @gfn_offset: The gfn offset in memory slot | ||
435 | * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory | ||
436 | * slot to be write protected | ||
437 | * | ||
438 | * Walks bits set in mask write protects the associated pte's. Caller must | ||
439 | * acquire @kvm->mmu_lock. | ||
440 | */ | ||
441 | void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, | ||
442 | struct kvm_memory_slot *slot, | ||
443 | gfn_t gfn_offset, unsigned long mask) | ||
444 | { | ||
445 | gfn_t base_gfn = slot->base_gfn + gfn_offset; | ||
446 | gfn_t start = base_gfn + __ffs(mask); | ||
447 | gfn_t end = base_gfn + __fls(mask); | ||
448 | |||
449 | kvm_mips_mkclean_gpa_pt(kvm, start, end); | ||
450 | } | ||
451 | |||
452 | /* | ||
453 | * kvm_mips_mkold_gpa_pt. | ||
454 | * Mark a range of guest physical address space old (all accesses fault) in the | ||
455 | * VM's GPA page table to allow detection of commonly used pages. | ||
456 | */ | ||
457 | |||
458 | BUILD_PTE_RANGE_OP(mkold, pte_mkold) | ||
459 | |||
460 | static int kvm_mips_mkold_gpa_pt(struct kvm *kvm, gfn_t start_gfn, | ||
461 | gfn_t end_gfn) | ||
462 | { | ||
463 | return kvm_mips_mkold_pgd(kvm->arch.gpa_mm.pgd, | ||
464 | start_gfn << PAGE_SHIFT, | ||
465 | end_gfn << PAGE_SHIFT); | ||
466 | } | ||
467 | |||
468 | static int handle_hva_to_gpa(struct kvm *kvm, | ||
469 | unsigned long start, | ||
470 | unsigned long end, | ||
471 | int (*handler)(struct kvm *kvm, gfn_t gfn, | ||
472 | gpa_t gfn_end, | ||
473 | struct kvm_memory_slot *memslot, | ||
474 | void *data), | ||
475 | void *data) | ||
476 | { | ||
477 | struct kvm_memslots *slots; | ||
478 | struct kvm_memory_slot *memslot; | ||
479 | int ret = 0; | ||
480 | |||
481 | slots = kvm_memslots(kvm); | ||
482 | |||
483 | /* we only care about the pages that the guest sees */ | ||
484 | kvm_for_each_memslot(memslot, slots) { | ||
485 | unsigned long hva_start, hva_end; | ||
486 | gfn_t gfn, gfn_end; | ||
487 | |||
488 | hva_start = max(start, memslot->userspace_addr); | ||
489 | hva_end = min(end, memslot->userspace_addr + | ||
490 | (memslot->npages << PAGE_SHIFT)); | ||
491 | if (hva_start >= hva_end) | ||
492 | continue; | ||
493 | |||
494 | /* | ||
495 | * {gfn(page) | page intersects with [hva_start, hva_end)} = | ||
496 | * {gfn_start, gfn_start+1, ..., gfn_end-1}. | ||
497 | */ | ||
498 | gfn = hva_to_gfn_memslot(hva_start, memslot); | ||
499 | gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); | ||
500 | |||
501 | ret |= handler(kvm, gfn, gfn_end, memslot, data); | ||
502 | } | ||
503 | |||
504 | return ret; | ||
505 | } | ||
506 | |||
507 | |||
508 | static int kvm_unmap_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, | ||
509 | struct kvm_memory_slot *memslot, void *data) | ||
510 | { | ||
511 | kvm_mips_flush_gpa_pt(kvm, gfn, gfn_end); | ||
512 | return 1; | ||
513 | } | ||
514 | |||
515 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | ||
516 | { | ||
517 | unsigned long end = hva + PAGE_SIZE; | ||
518 | |||
519 | handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL); | ||
520 | |||
521 | kvm_mips_callbacks->flush_shadow_all(kvm); | ||
522 | return 0; | ||
523 | } | ||
524 | |||
525 | int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) | ||
526 | { | ||
527 | handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL); | ||
528 | |||
529 | kvm_mips_callbacks->flush_shadow_all(kvm); | ||
530 | return 0; | ||
531 | } | ||
532 | |||
533 | static int kvm_set_spte_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, | ||
534 | struct kvm_memory_slot *memslot, void *data) | ||
535 | { | ||
536 | gpa_t gpa = gfn << PAGE_SHIFT; | ||
537 | pte_t hva_pte = *(pte_t *)data; | ||
538 | pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa); | ||
539 | pte_t old_pte; | ||
540 | |||
541 | if (!gpa_pte) | ||
542 | return 0; | ||
543 | |||
544 | /* Mapping may need adjusting depending on memslot flags */ | ||
545 | old_pte = *gpa_pte; | ||
546 | if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte)) | ||
547 | hva_pte = pte_mkclean(hva_pte); | ||
548 | else if (memslot->flags & KVM_MEM_READONLY) | ||
549 | hva_pte = pte_wrprotect(hva_pte); | ||
550 | |||
551 | set_pte(gpa_pte, hva_pte); | ||
552 | |||
553 | /* Replacing an absent or old page doesn't need flushes */ | ||
554 | if (!pte_present(old_pte) || !pte_young(old_pte)) | ||
38 | return 0; | 555 | return 0; |
39 | 556 | ||
557 | /* Pages swapped, aged, moved, or cleaned require flushes */ | ||
558 | return !pte_present(hva_pte) || | ||
559 | !pte_young(hva_pte) || | ||
560 | pte_pfn(old_pte) != pte_pfn(hva_pte) || | ||
561 | (pte_dirty(old_pte) && !pte_dirty(hva_pte)); | ||
562 | } | ||
563 | |||
564 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) | ||
565 | { | ||
566 | unsigned long end = hva + PAGE_SIZE; | ||
567 | int ret; | ||
568 | |||
569 | ret = handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &pte); | ||
570 | if (ret) | ||
571 | kvm_mips_callbacks->flush_shadow_all(kvm); | ||
572 | } | ||
573 | |||
574 | static int kvm_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, | ||
575 | struct kvm_memory_slot *memslot, void *data) | ||
576 | { | ||
577 | return kvm_mips_mkold_gpa_pt(kvm, gfn, gfn_end); | ||
578 | } | ||
579 | |||
580 | static int kvm_test_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, | ||
581 | struct kvm_memory_slot *memslot, void *data) | ||
582 | { | ||
583 | gpa_t gpa = gfn << PAGE_SHIFT; | ||
584 | pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa); | ||
585 | |||
586 | if (!gpa_pte) | ||
587 | return 0; | ||
588 | return pte_young(*gpa_pte); | ||
589 | } | ||
590 | |||
591 | int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) | ||
592 | { | ||
593 | return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); | ||
594 | } | ||
595 | |||
596 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) | ||
597 | { | ||
598 | return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL); | ||
599 | } | ||
600 | |||
601 | /** | ||
602 | * _kvm_mips_map_page_fast() - Fast path GPA fault handler. | ||
603 | * @vcpu: VCPU pointer. | ||
604 | * @gpa: Guest physical address of fault. | ||
605 | * @write_fault: Whether the fault was due to a write. | ||
606 | * @out_entry: New PTE for @gpa (written on success unless NULL). | ||
607 | * @out_buddy: New PTE for @gpa's buddy (written on success unless | ||
608 | * NULL). | ||
609 | * | ||
610 | * Perform fast path GPA fault handling, doing all that can be done without | ||
611 | * calling into KVM. This handles marking old pages young (for idle page | ||
612 | * tracking), and dirtying of clean pages (for dirty page logging). | ||
613 | * | ||
614 | * Returns: 0 on success, in which case we can update derived mappings and | ||
615 | * resume guest execution. | ||
616 | * -EFAULT on failure due to absent GPA mapping or write to | ||
617 | * read-only page, in which case KVM must be consulted. | ||
618 | */ | ||
619 | static int _kvm_mips_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, | ||
620 | bool write_fault, | ||
621 | pte_t *out_entry, pte_t *out_buddy) | ||
622 | { | ||
623 | struct kvm *kvm = vcpu->kvm; | ||
624 | gfn_t gfn = gpa >> PAGE_SHIFT; | ||
625 | pte_t *ptep; | ||
626 | kvm_pfn_t pfn = 0; /* silence bogus GCC warning */ | ||
627 | bool pfn_valid = false; | ||
628 | int ret = 0; | ||
629 | |||
630 | spin_lock(&kvm->mmu_lock); | ||
631 | |||
632 | /* Fast path - just check GPA page table for an existing entry */ | ||
633 | ptep = kvm_mips_pte_for_gpa(kvm, NULL, gpa); | ||
634 | if (!ptep || !pte_present(*ptep)) { | ||
635 | ret = -EFAULT; | ||
636 | goto out; | ||
637 | } | ||
638 | |||
639 | /* Track access to pages marked old */ | ||
640 | if (!pte_young(*ptep)) { | ||
641 | set_pte(ptep, pte_mkyoung(*ptep)); | ||
642 | pfn = pte_pfn(*ptep); | ||
643 | pfn_valid = true; | ||
644 | /* call kvm_set_pfn_accessed() after unlock */ | ||
645 | } | ||
646 | if (write_fault && !pte_dirty(*ptep)) { | ||
647 | if (!pte_write(*ptep)) { | ||
648 | ret = -EFAULT; | ||
649 | goto out; | ||
650 | } | ||
651 | |||
652 | /* Track dirtying of writeable pages */ | ||
653 | set_pte(ptep, pte_mkdirty(*ptep)); | ||
654 | pfn = pte_pfn(*ptep); | ||
655 | mark_page_dirty(kvm, gfn); | ||
656 | kvm_set_pfn_dirty(pfn); | ||
657 | } | ||
658 | |||
659 | if (out_entry) | ||
660 | *out_entry = *ptep; | ||
661 | if (out_buddy) | ||
662 | *out_buddy = *ptep_buddy(ptep); | ||
663 | |||
664 | out: | ||
665 | spin_unlock(&kvm->mmu_lock); | ||
666 | if (pfn_valid) | ||
667 | kvm_set_pfn_accessed(pfn); | ||
668 | return ret; | ||
669 | } | ||
670 | |||
671 | /** | ||
672 | * kvm_mips_map_page() - Map a guest physical page. | ||
673 | * @vcpu: VCPU pointer. | ||
674 | * @gpa: Guest physical address of fault. | ||
675 | * @write_fault: Whether the fault was due to a write. | ||
676 | * @out_entry: New PTE for @gpa (written on success unless NULL). | ||
677 | * @out_buddy: New PTE for @gpa's buddy (written on success unless | ||
678 | * NULL). | ||
679 | * | ||
680 | * Handle GPA faults by creating a new GPA mapping (or updating an existing | ||
681 | * one). | ||
682 | * | ||
683 | * This takes care of marking pages young or dirty (idle/dirty page tracking), | ||
684 | * asking KVM for the corresponding PFN, and creating a mapping in the GPA page | ||
685 | * tables. Derived mappings (GVA page tables and TLBs) must be handled by the | ||
686 | * caller. | ||
687 | * | ||
688 | * Returns: 0 on success, in which case the caller may use the @out_entry | ||
689 | * and @out_buddy PTEs to update derived mappings and resume guest | ||
690 | * execution. | ||
691 | * -EFAULT if there is no memory region at @gpa or a write was | ||
692 | * attempted to a read-only memory region. This is usually handled | ||
693 | * as an MMIO access. | ||
694 | */ | ||
695 | static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, | ||
696 | bool write_fault, | ||
697 | pte_t *out_entry, pte_t *out_buddy) | ||
698 | { | ||
699 | struct kvm *kvm = vcpu->kvm; | ||
700 | struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; | ||
701 | gfn_t gfn = gpa >> PAGE_SHIFT; | ||
702 | int srcu_idx, err; | ||
703 | kvm_pfn_t pfn; | ||
704 | pte_t *ptep, entry, old_pte; | ||
705 | bool writeable; | ||
706 | unsigned long prot_bits; | ||
707 | unsigned long mmu_seq; | ||
708 | |||
709 | /* Try the fast path to handle old / clean pages */ | ||
40 | srcu_idx = srcu_read_lock(&kvm->srcu); | 710 | srcu_idx = srcu_read_lock(&kvm->srcu); |
41 | pfn = gfn_to_pfn(kvm, gfn); | 711 | err = _kvm_mips_map_page_fast(vcpu, gpa, write_fault, out_entry, |
712 | out_buddy); | ||
713 | if (!err) | ||
714 | goto out; | ||
42 | 715 | ||
716 | /* We need a minimum of cached pages ready for page table creation */ | ||
717 | err = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES, | ||
718 | KVM_NR_MEM_OBJS); | ||
719 | if (err) | ||
720 | goto out; | ||
721 | |||
722 | retry: | ||
723 | /* | ||
724 | * Used to check for invalidations in progress, of the pfn that is | ||
725 | * returned by pfn_to_pfn_prot below. | ||
726 | */ | ||
727 | mmu_seq = kvm->mmu_notifier_seq; | ||
728 | /* | ||
729 | * Ensure the read of mmu_notifier_seq isn't reordered with PTE reads in | ||
730 | * gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't | ||
731 | * risk the page we get a reference to getting unmapped before we have a | ||
732 | * chance to grab the mmu_lock without mmu_notifier_retry() noticing. | ||
733 | * | ||
734 | * This smp_rmb() pairs with the effective smp_wmb() of the combination | ||
735 | * of the pte_unmap_unlock() after the PTE is zapped, and the | ||
736 | * spin_lock() in kvm_mmu_notifier_invalidate_<page|range_end>() before | ||
737 | * mmu_notifier_seq is incremented. | ||
738 | */ | ||
739 | smp_rmb(); | ||
740 | |||
741 | /* Slow path - ask KVM core whether we can access this GPA */ | ||
742 | pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writeable); | ||
43 | if (is_error_noslot_pfn(pfn)) { | 743 | if (is_error_noslot_pfn(pfn)) { |
44 | kvm_err("Couldn't get pfn for gfn %#llx!\n", gfn); | ||
45 | err = -EFAULT; | 744 | err = -EFAULT; |
46 | goto out; | 745 | goto out; |
47 | } | 746 | } |
48 | 747 | ||
49 | kvm->arch.guest_pmap[gfn] = pfn; | 748 | spin_lock(&kvm->mmu_lock); |
749 | /* Check if an invalidation has taken place since we got pfn */ | ||
750 | if (mmu_notifier_retry(kvm, mmu_seq)) { | ||
751 | /* | ||
752 | * This can happen when mappings are changed asynchronously, but | ||
753 | * also synchronously if a COW is triggered by | ||
754 | * gfn_to_pfn_prot(). | ||
755 | */ | ||
756 | spin_unlock(&kvm->mmu_lock); | ||
757 | kvm_release_pfn_clean(pfn); | ||
758 | goto retry; | ||
759 | } | ||
760 | |||
761 | /* Ensure page tables are allocated */ | ||
762 | ptep = kvm_mips_pte_for_gpa(kvm, memcache, gpa); | ||
763 | |||
764 | /* Set up the PTE */ | ||
765 | prot_bits = _PAGE_PRESENT | __READABLE | _page_cachable_default; | ||
766 | if (writeable) { | ||
767 | prot_bits |= _PAGE_WRITE; | ||
768 | if (write_fault) { | ||
769 | prot_bits |= __WRITEABLE; | ||
770 | mark_page_dirty(kvm, gfn); | ||
771 | kvm_set_pfn_dirty(pfn); | ||
772 | } | ||
773 | } | ||
774 | entry = pfn_pte(pfn, __pgprot(prot_bits)); | ||
775 | |||
776 | /* Write the PTE */ | ||
777 | old_pte = *ptep; | ||
778 | set_pte(ptep, entry); | ||
779 | |||
780 | err = 0; | ||
781 | if (out_entry) | ||
782 | *out_entry = *ptep; | ||
783 | if (out_buddy) | ||
784 | *out_buddy = *ptep_buddy(ptep); | ||
785 | |||
786 | spin_unlock(&kvm->mmu_lock); | ||
787 | kvm_release_pfn_clean(pfn); | ||
788 | kvm_set_pfn_accessed(pfn); | ||
50 | out: | 789 | out: |
51 | srcu_read_unlock(&kvm->srcu, srcu_idx); | 790 | srcu_read_unlock(&kvm->srcu, srcu_idx); |
52 | return err; | 791 | return err; |
53 | } | 792 | } |
54 | 793 | ||
55 | /* Translate guest KSEG0 addresses to Host PA */ | 794 | static pte_t *kvm_trap_emul_pte_for_gva(struct kvm_vcpu *vcpu, |
56 | unsigned long kvm_mips_translate_guest_kseg0_to_hpa(struct kvm_vcpu *vcpu, | 795 | unsigned long addr) |
57 | unsigned long gva) | ||
58 | { | 796 | { |
59 | gfn_t gfn; | 797 | struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; |
60 | unsigned long offset = gva & ~PAGE_MASK; | 798 | pgd_t *pgdp; |
61 | struct kvm *kvm = vcpu->kvm; | 799 | int ret; |
800 | |||
801 | /* We need a minimum of cached pages ready for page table creation */ | ||
802 | ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES, | ||
803 | KVM_NR_MEM_OBJS); | ||
804 | if (ret) | ||
805 | return NULL; | ||
806 | |||
807 | if (KVM_GUEST_KERNEL_MODE(vcpu)) | ||
808 | pgdp = vcpu->arch.guest_kernel_mm.pgd; | ||
809 | else | ||
810 | pgdp = vcpu->arch.guest_user_mm.pgd; | ||
811 | |||
812 | return kvm_mips_walk_pgd(pgdp, memcache, addr); | ||
813 | } | ||
62 | 814 | ||
63 | if (KVM_GUEST_KSEGX(gva) != KVM_GUEST_KSEG0) { | 815 | void kvm_trap_emul_invalidate_gva(struct kvm_vcpu *vcpu, unsigned long addr, |
64 | kvm_err("%s/%p: Invalid gva: %#lx\n", __func__, | 816 | bool user) |
65 | __builtin_return_address(0), gva); | 817 | { |
66 | return KVM_INVALID_PAGE; | 818 | pgd_t *pgdp; |
819 | pte_t *ptep; | ||
820 | |||
821 | addr &= PAGE_MASK << 1; | ||
822 | |||
823 | pgdp = vcpu->arch.guest_kernel_mm.pgd; | ||
824 | ptep = kvm_mips_walk_pgd(pgdp, NULL, addr); | ||
825 | if (ptep) { | ||
826 | ptep[0] = pfn_pte(0, __pgprot(0)); | ||
827 | ptep[1] = pfn_pte(0, __pgprot(0)); | ||
828 | } | ||
829 | |||
830 | if (user) { | ||
831 | pgdp = vcpu->arch.guest_user_mm.pgd; | ||
832 | ptep = kvm_mips_walk_pgd(pgdp, NULL, addr); | ||
833 | if (ptep) { | ||
834 | ptep[0] = pfn_pte(0, __pgprot(0)); | ||
835 | ptep[1] = pfn_pte(0, __pgprot(0)); | ||
836 | } | ||
67 | } | 837 | } |
838 | } | ||
68 | 839 | ||
69 | gfn = (KVM_GUEST_CPHYSADDR(gva) >> PAGE_SHIFT); | 840 | /* |
841 | * kvm_mips_flush_gva_{pte,pmd,pud,pgd,pt}. | ||
842 | * Flush a range of guest physical address space from the VM's GPA page tables. | ||
843 | */ | ||
70 | 844 | ||
71 | if (gfn >= kvm->arch.guest_pmap_npages) { | 845 | static bool kvm_mips_flush_gva_pte(pte_t *pte, unsigned long start_gva, |
72 | kvm_err("%s: Invalid gfn: %#llx, GVA: %#lx\n", __func__, gfn, | 846 | unsigned long end_gva) |
73 | gva); | 847 | { |
74 | return KVM_INVALID_PAGE; | 848 | int i_min = __pte_offset(start_gva); |
849 | int i_max = __pte_offset(end_gva); | ||
850 | bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1); | ||
851 | int i; | ||
852 | |||
853 | /* | ||
854 | * There's no freeing to do, so there's no point clearing individual | ||
855 | * entries unless only part of the last level page table needs flushing. | ||
856 | */ | ||
857 | if (safe_to_remove) | ||
858 | return true; | ||
859 | |||
860 | for (i = i_min; i <= i_max; ++i) { | ||
861 | if (!pte_present(pte[i])) | ||
862 | continue; | ||
863 | |||
864 | set_pte(pte + i, __pte(0)); | ||
75 | } | 865 | } |
866 | return false; | ||
867 | } | ||
76 | 868 | ||
77 | if (kvm_mips_map_page(vcpu->kvm, gfn) < 0) | 869 | static bool kvm_mips_flush_gva_pmd(pmd_t *pmd, unsigned long start_gva, |
78 | return KVM_INVALID_ADDR; | 870 | unsigned long end_gva) |
871 | { | ||
872 | pte_t *pte; | ||
873 | unsigned long end = ~0ul; | ||
874 | int i_min = __pmd_offset(start_gva); | ||
875 | int i_max = __pmd_offset(end_gva); | ||
876 | bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1); | ||
877 | int i; | ||
878 | |||
879 | for (i = i_min; i <= i_max; ++i, start_gva = 0) { | ||
880 | if (!pmd_present(pmd[i])) | ||
881 | continue; | ||
882 | |||
883 | pte = pte_offset(pmd + i, 0); | ||
884 | if (i == i_max) | ||
885 | end = end_gva; | ||
886 | |||
887 | if (kvm_mips_flush_gva_pte(pte, start_gva, end)) { | ||
888 | pmd_clear(pmd + i); | ||
889 | pte_free_kernel(NULL, pte); | ||
890 | } else { | ||
891 | safe_to_remove = false; | ||
892 | } | ||
893 | } | ||
894 | return safe_to_remove; | ||
895 | } | ||
79 | 896 | ||
80 | return (kvm->arch.guest_pmap[gfn] << PAGE_SHIFT) + offset; | 897 | static bool kvm_mips_flush_gva_pud(pud_t *pud, unsigned long start_gva, |
898 | unsigned long end_gva) | ||
899 | { | ||
900 | pmd_t *pmd; | ||
901 | unsigned long end = ~0ul; | ||
902 | int i_min = __pud_offset(start_gva); | ||
903 | int i_max = __pud_offset(end_gva); | ||
904 | bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1); | ||
905 | int i; | ||
906 | |||
907 | for (i = i_min; i <= i_max; ++i, start_gva = 0) { | ||
908 | if (!pud_present(pud[i])) | ||
909 | continue; | ||
910 | |||
911 | pmd = pmd_offset(pud + i, 0); | ||
912 | if (i == i_max) | ||
913 | end = end_gva; | ||
914 | |||
915 | if (kvm_mips_flush_gva_pmd(pmd, start_gva, end)) { | ||
916 | pud_clear(pud + i); | ||
917 | pmd_free(NULL, pmd); | ||
918 | } else { | ||
919 | safe_to_remove = false; | ||
920 | } | ||
921 | } | ||
922 | return safe_to_remove; | ||
923 | } | ||
924 | |||
925 | static bool kvm_mips_flush_gva_pgd(pgd_t *pgd, unsigned long start_gva, | ||
926 | unsigned long end_gva) | ||
927 | { | ||
928 | pud_t *pud; | ||
929 | unsigned long end = ~0ul; | ||
930 | int i_min = pgd_index(start_gva); | ||
931 | int i_max = pgd_index(end_gva); | ||
932 | bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1); | ||
933 | int i; | ||
934 | |||
935 | for (i = i_min; i <= i_max; ++i, start_gva = 0) { | ||
936 | if (!pgd_present(pgd[i])) | ||
937 | continue; | ||
938 | |||
939 | pud = pud_offset(pgd + i, 0); | ||
940 | if (i == i_max) | ||
941 | end = end_gva; | ||
942 | |||
943 | if (kvm_mips_flush_gva_pud(pud, start_gva, end)) { | ||
944 | pgd_clear(pgd + i); | ||
945 | pud_free(NULL, pud); | ||
946 | } else { | ||
947 | safe_to_remove = false; | ||
948 | } | ||
949 | } | ||
950 | return safe_to_remove; | ||
951 | } | ||
952 | |||
953 | void kvm_mips_flush_gva_pt(pgd_t *pgd, enum kvm_mips_flush flags) | ||
954 | { | ||
955 | if (flags & KMF_GPA) { | ||
956 | /* all of guest virtual address space could be affected */ | ||
957 | if (flags & KMF_KERN) | ||
958 | /* useg, kseg0, seg2/3 */ | ||
959 | kvm_mips_flush_gva_pgd(pgd, 0, 0x7fffffff); | ||
960 | else | ||
961 | /* useg */ | ||
962 | kvm_mips_flush_gva_pgd(pgd, 0, 0x3fffffff); | ||
963 | } else { | ||
964 | /* useg */ | ||
965 | kvm_mips_flush_gva_pgd(pgd, 0, 0x3fffffff); | ||
966 | |||
967 | /* kseg2/3 */ | ||
968 | if (flags & KMF_KERN) | ||
969 | kvm_mips_flush_gva_pgd(pgd, 0x60000000, 0x7fffffff); | ||
970 | } | ||
971 | } | ||
972 | |||
973 | static pte_t kvm_mips_gpa_pte_to_gva_unmapped(pte_t pte) | ||
974 | { | ||
975 | /* | ||
976 | * Don't leak writeable but clean entries from GPA page tables. We don't | ||
977 | * want the normal Linux tlbmod handler to handle dirtying when KVM | ||
978 | * accesses guest memory. | ||
979 | */ | ||
980 | if (!pte_dirty(pte)) | ||
981 | pte = pte_wrprotect(pte); | ||
982 | |||
983 | return pte; | ||
984 | } | ||
985 | |||
986 | static pte_t kvm_mips_gpa_pte_to_gva_mapped(pte_t pte, long entrylo) | ||
987 | { | ||
988 | /* Guest EntryLo overrides host EntryLo */ | ||
989 | if (!(entrylo & ENTRYLO_D)) | ||
990 | pte = pte_mkclean(pte); | ||
991 | |||
992 | return kvm_mips_gpa_pte_to_gva_unmapped(pte); | ||
81 | } | 993 | } |
82 | 994 | ||
83 | /* XXXKYMA: Must be called with interrupts disabled */ | 995 | /* XXXKYMA: Must be called with interrupts disabled */ |
84 | int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, | 996 | int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, |
85 | struct kvm_vcpu *vcpu) | 997 | struct kvm_vcpu *vcpu, |
998 | bool write_fault) | ||
86 | { | 999 | { |
87 | gfn_t gfn; | 1000 | unsigned long gpa; |
88 | kvm_pfn_t pfn0, pfn1; | 1001 | pte_t pte_gpa[2], *ptep_gva; |
89 | unsigned long vaddr = 0; | 1002 | int idx; |
90 | unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0; | ||
91 | struct kvm *kvm = vcpu->kvm; | ||
92 | const int flush_dcache_mask = 0; | ||
93 | int ret; | ||
94 | 1003 | ||
95 | if (KVM_GUEST_KSEGX(badvaddr) != KVM_GUEST_KSEG0) { | 1004 | if (KVM_GUEST_KSEGX(badvaddr) != KVM_GUEST_KSEG0) { |
96 | kvm_err("%s: Invalid BadVaddr: %#lx\n", __func__, badvaddr); | 1005 | kvm_err("%s: Invalid BadVaddr: %#lx\n", __func__, badvaddr); |
@@ -98,49 +1007,39 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, | |||
98 | return -1; | 1007 | return -1; |
99 | } | 1008 | } |
100 | 1009 | ||
101 | gfn = (KVM_GUEST_CPHYSADDR(badvaddr) >> PAGE_SHIFT); | 1010 | /* Get the GPA page table entry */ |
102 | if ((gfn | 1) >= kvm->arch.guest_pmap_npages) { | 1011 | gpa = KVM_GUEST_CPHYSADDR(badvaddr); |
103 | kvm_err("%s: Invalid gfn: %#llx, BadVaddr: %#lx\n", __func__, | 1012 | idx = (badvaddr >> PAGE_SHIFT) & 1; |
104 | gfn, badvaddr); | 1013 | if (kvm_mips_map_page(vcpu, gpa, write_fault, &pte_gpa[idx], |
105 | kvm_mips_dump_host_tlbs(); | 1014 | &pte_gpa[!idx]) < 0) |
106 | return -1; | 1015 | return -1; |
107 | } | ||
108 | vaddr = badvaddr & (PAGE_MASK << 1); | ||
109 | 1016 | ||
110 | if (kvm_mips_map_page(vcpu->kvm, gfn) < 0) | 1017 | /* Get the GVA page table entry */ |
1018 | ptep_gva = kvm_trap_emul_pte_for_gva(vcpu, badvaddr & ~PAGE_SIZE); | ||
1019 | if (!ptep_gva) { | ||
1020 | kvm_err("No ptep for gva %lx\n", badvaddr); | ||
111 | return -1; | 1021 | return -1; |
1022 | } | ||
112 | 1023 | ||
113 | if (kvm_mips_map_page(vcpu->kvm, gfn ^ 0x1) < 0) | 1024 | /* Copy a pair of entries from GPA page table to GVA page table */ |
114 | return -1; | 1025 | ptep_gva[0] = kvm_mips_gpa_pte_to_gva_unmapped(pte_gpa[0]); |
115 | 1026 | ptep_gva[1] = kvm_mips_gpa_pte_to_gva_unmapped(pte_gpa[1]); | |
116 | pfn0 = kvm->arch.guest_pmap[gfn & ~0x1]; | ||
117 | pfn1 = kvm->arch.guest_pmap[gfn | 0x1]; | ||
118 | |||
119 | entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | | ||
120 | ((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) | | ||
121 | ENTRYLO_D | ENTRYLO_V; | ||
122 | entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | | ||
123 | ((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) | | ||
124 | ENTRYLO_D | ENTRYLO_V; | ||
125 | |||
126 | preempt_disable(); | ||
127 | entryhi = (vaddr | kvm_mips_get_kernel_asid(vcpu)); | ||
128 | ret = kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1, | ||
129 | flush_dcache_mask); | ||
130 | preempt_enable(); | ||
131 | 1027 | ||
132 | return ret; | 1028 | /* Invalidate this entry in the TLB, guest kernel ASID only */ |
1029 | kvm_mips_host_tlb_inv(vcpu, badvaddr, false, true); | ||
1030 | return 0; | ||
133 | } | 1031 | } |
134 | 1032 | ||
135 | int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, | 1033 | int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, |
136 | struct kvm_mips_tlb *tlb) | 1034 | struct kvm_mips_tlb *tlb, |
1035 | unsigned long gva, | ||
1036 | bool write_fault) | ||
137 | { | 1037 | { |
138 | unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0; | ||
139 | struct kvm *kvm = vcpu->kvm; | 1038 | struct kvm *kvm = vcpu->kvm; |
140 | kvm_pfn_t pfn0, pfn1; | ||
141 | gfn_t gfn0, gfn1; | ||
142 | long tlb_lo[2]; | 1039 | long tlb_lo[2]; |
143 | int ret; | 1040 | pte_t pte_gpa[2], *ptep_buddy, *ptep_gva; |
1041 | unsigned int idx = TLB_LO_IDX(*tlb, gva); | ||
1042 | bool kernel = KVM_GUEST_KERNEL_MODE(vcpu); | ||
144 | 1043 | ||
145 | tlb_lo[0] = tlb->tlb_lo[0]; | 1044 | tlb_lo[0] = tlb->tlb_lo[0]; |
146 | tlb_lo[1] = tlb->tlb_lo[1]; | 1045 | tlb_lo[1] = tlb->tlb_lo[1]; |
@@ -149,70 +1048,64 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, | |||
149 | * The commpage address must not be mapped to anything else if the guest | 1048 | * The commpage address must not be mapped to anything else if the guest |
150 | * TLB contains entries nearby, or commpage accesses will break. | 1049 | * TLB contains entries nearby, or commpage accesses will break. |
151 | */ | 1050 | */ |
152 | if (!((tlb->tlb_hi ^ KVM_GUEST_COMMPAGE_ADDR) & | 1051 | if (!((gva ^ KVM_GUEST_COMMPAGE_ADDR) & VPN2_MASK & (PAGE_MASK << 1))) |
153 | VPN2_MASK & (PAGE_MASK << 1))) | 1052 | tlb_lo[TLB_LO_IDX(*tlb, KVM_GUEST_COMMPAGE_ADDR)] = 0; |
154 | tlb_lo[(KVM_GUEST_COMMPAGE_ADDR >> PAGE_SHIFT) & 1] = 0; | ||
155 | |||
156 | gfn0 = mips3_tlbpfn_to_paddr(tlb_lo[0]) >> PAGE_SHIFT; | ||
157 | gfn1 = mips3_tlbpfn_to_paddr(tlb_lo[1]) >> PAGE_SHIFT; | ||
158 | if (gfn0 >= kvm->arch.guest_pmap_npages || | ||
159 | gfn1 >= kvm->arch.guest_pmap_npages) { | ||
160 | kvm_err("%s: Invalid gfn: [%#llx, %#llx], EHi: %#lx\n", | ||
161 | __func__, gfn0, gfn1, tlb->tlb_hi); | ||
162 | kvm_mips_dump_guest_tlbs(vcpu); | ||
163 | return -1; | ||
164 | } | ||
165 | 1053 | ||
166 | if (kvm_mips_map_page(kvm, gfn0) < 0) | 1054 | /* Get the GPA page table entry */ |
1055 | if (kvm_mips_map_page(vcpu, mips3_tlbpfn_to_paddr(tlb_lo[idx]), | ||
1056 | write_fault, &pte_gpa[idx], NULL) < 0) | ||
167 | return -1; | 1057 | return -1; |
168 | 1058 | ||
169 | if (kvm_mips_map_page(kvm, gfn1) < 0) | 1059 | /* And its GVA buddy's GPA page table entry if it also exists */ |
1060 | pte_gpa[!idx] = pfn_pte(0, __pgprot(0)); | ||
1061 | if (tlb_lo[!idx] & ENTRYLO_V) { | ||
1062 | spin_lock(&kvm->mmu_lock); | ||
1063 | ptep_buddy = kvm_mips_pte_for_gpa(kvm, NULL, | ||
1064 | mips3_tlbpfn_to_paddr(tlb_lo[!idx])); | ||
1065 | if (ptep_buddy) | ||
1066 | pte_gpa[!idx] = *ptep_buddy; | ||
1067 | spin_unlock(&kvm->mmu_lock); | ||
1068 | } | ||
1069 | |||
1070 | /* Get the GVA page table entry pair */ | ||
1071 | ptep_gva = kvm_trap_emul_pte_for_gva(vcpu, gva & ~PAGE_SIZE); | ||
1072 | if (!ptep_gva) { | ||
1073 | kvm_err("No ptep for gva %lx\n", gva); | ||
170 | return -1; | 1074 | return -1; |
1075 | } | ||
171 | 1076 | ||
172 | pfn0 = kvm->arch.guest_pmap[gfn0]; | 1077 | /* Copy a pair of entries from GPA page table to GVA page table */ |
173 | pfn1 = kvm->arch.guest_pmap[gfn1]; | 1078 | ptep_gva[0] = kvm_mips_gpa_pte_to_gva_mapped(pte_gpa[0], tlb_lo[0]); |
1079 | ptep_gva[1] = kvm_mips_gpa_pte_to_gva_mapped(pte_gpa[1], tlb_lo[1]); | ||
174 | 1080 | ||
175 | /* Get attributes from the Guest TLB */ | 1081 | /* Invalidate this entry in the TLB, current guest mode ASID only */ |
176 | entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | | 1082 | kvm_mips_host_tlb_inv(vcpu, gva, !kernel, kernel); |
177 | ((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) | | ||
178 | (tlb_lo[0] & ENTRYLO_D) | | ||
179 | (tlb_lo[0] & ENTRYLO_V); | ||
180 | entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | | ||
181 | ((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) | | ||
182 | (tlb_lo[1] & ENTRYLO_D) | | ||
183 | (tlb_lo[1] & ENTRYLO_V); | ||
184 | 1083 | ||
185 | kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc, | 1084 | kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc, |
186 | tlb->tlb_lo[0], tlb->tlb_lo[1]); | 1085 | tlb->tlb_lo[0], tlb->tlb_lo[1]); |
187 | 1086 | ||
188 | preempt_disable(); | 1087 | return 0; |
189 | entryhi = (tlb->tlb_hi & VPN2_MASK) | (KVM_GUEST_KERNEL_MODE(vcpu) ? | ||
190 | kvm_mips_get_kernel_asid(vcpu) : | ||
191 | kvm_mips_get_user_asid(vcpu)); | ||
192 | ret = kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1, | ||
193 | tlb->tlb_mask); | ||
194 | preempt_enable(); | ||
195 | |||
196 | return ret; | ||
197 | } | 1088 | } |
198 | 1089 | ||
199 | void kvm_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu, | 1090 | int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, |
200 | struct kvm_vcpu *vcpu) | 1091 | struct kvm_vcpu *vcpu) |
201 | { | 1092 | { |
202 | unsigned long asid = asid_cache(cpu); | 1093 | kvm_pfn_t pfn; |
203 | 1094 | pte_t *ptep; | |
204 | asid += cpu_asid_inc(); | ||
205 | if (!(asid & cpu_asid_mask(&cpu_data[cpu]))) { | ||
206 | if (cpu_has_vtag_icache) | ||
207 | flush_icache_all(); | ||
208 | |||
209 | kvm_local_flush_tlb_all(); /* start new asid cycle */ | ||
210 | 1095 | ||
211 | if (!asid) /* fix version if needed */ | 1096 | ptep = kvm_trap_emul_pte_for_gva(vcpu, badvaddr); |
212 | asid = asid_first_version(cpu); | 1097 | if (!ptep) { |
1098 | kvm_err("No ptep for commpage %lx\n", badvaddr); | ||
1099 | return -1; | ||
213 | } | 1100 | } |
214 | 1101 | ||
215 | cpu_context(cpu, mm) = asid_cache(cpu) = asid; | 1102 | pfn = PFN_DOWN(virt_to_phys(vcpu->arch.kseg0_commpage)); |
1103 | /* Also set valid and dirty, so refill handler doesn't have to */ | ||
1104 | *ptep = pte_mkyoung(pte_mkdirty(pfn_pte(pfn, PAGE_SHARED))); | ||
1105 | |||
1106 | /* Invalidate this entry in the TLB, guest kernel ASID only */ | ||
1107 | kvm_mips_host_tlb_inv(vcpu, badvaddr, false, true); | ||
1108 | return 0; | ||
216 | } | 1109 | } |
217 | 1110 | ||
218 | /** | 1111 | /** |
@@ -235,42 +1128,13 @@ static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu) | |||
235 | /* Restore ASID once we are scheduled back after preemption */ | 1128 | /* Restore ASID once we are scheduled back after preemption */ |
236 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 1129 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
237 | { | 1130 | { |
238 | unsigned long asid_mask = cpu_asid_mask(&cpu_data[cpu]); | ||
239 | unsigned long flags; | 1131 | unsigned long flags; |
240 | int newasid = 0; | ||
241 | 1132 | ||
242 | kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu); | 1133 | kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu); |
243 | 1134 | ||
244 | /* Allocate new kernel and user ASIDs if needed */ | ||
245 | |||
246 | local_irq_save(flags); | 1135 | local_irq_save(flags); |
247 | 1136 | ||
248 | if ((vcpu->arch.guest_kernel_asid[cpu] ^ asid_cache(cpu)) & | 1137 | vcpu->cpu = cpu; |
249 | asid_version_mask(cpu)) { | ||
250 | kvm_get_new_mmu_context(&vcpu->arch.guest_kernel_mm, cpu, vcpu); | ||
251 | vcpu->arch.guest_kernel_asid[cpu] = | ||
252 | vcpu->arch.guest_kernel_mm.context.asid[cpu]; | ||
253 | newasid++; | ||
254 | |||
255 | kvm_debug("[%d]: cpu_context: %#lx\n", cpu, | ||
256 | cpu_context(cpu, current->mm)); | ||
257 | kvm_debug("[%d]: Allocated new ASID for Guest Kernel: %#x\n", | ||
258 | cpu, vcpu->arch.guest_kernel_asid[cpu]); | ||
259 | } | ||
260 | |||
261 | if ((vcpu->arch.guest_user_asid[cpu] ^ asid_cache(cpu)) & | ||
262 | asid_version_mask(cpu)) { | ||
263 | kvm_get_new_mmu_context(&vcpu->arch.guest_user_mm, cpu, vcpu); | ||
264 | vcpu->arch.guest_user_asid[cpu] = | ||
265 | vcpu->arch.guest_user_mm.context.asid[cpu]; | ||
266 | newasid++; | ||
267 | |||
268 | kvm_debug("[%d]: cpu_context: %#lx\n", cpu, | ||
269 | cpu_context(cpu, current->mm)); | ||
270 | kvm_debug("[%d]: Allocated new ASID for Guest User: %#x\n", cpu, | ||
271 | vcpu->arch.guest_user_asid[cpu]); | ||
272 | } | ||
273 | |||
274 | if (vcpu->arch.last_sched_cpu != cpu) { | 1138 | if (vcpu->arch.last_sched_cpu != cpu) { |
275 | kvm_debug("[%d->%d]KVM VCPU[%d] switch\n", | 1139 | kvm_debug("[%d->%d]KVM VCPU[%d] switch\n", |
276 | vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id); | 1140 | vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id); |
@@ -282,42 +1146,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
282 | kvm_mips_migrate_count(vcpu); | 1146 | kvm_mips_migrate_count(vcpu); |
283 | } | 1147 | } |
284 | 1148 | ||
285 | if (!newasid) { | ||
286 | /* | ||
287 | * If we preempted while the guest was executing, then reload | ||
288 | * the pre-empted ASID | ||
289 | */ | ||
290 | if (current->flags & PF_VCPU) { | ||
291 | write_c0_entryhi(vcpu->arch. | ||
292 | preempt_entryhi & asid_mask); | ||
293 | ehb(); | ||
294 | } | ||
295 | } else { | ||
296 | /* New ASIDs were allocated for the VM */ | ||
297 | |||
298 | /* | ||
299 | * Were we in guest context? If so then the pre-empted ASID is | ||
300 | * no longer valid, we need to set it to what it should be based | ||
301 | * on the mode of the Guest (Kernel/User) | ||
302 | */ | ||
303 | if (current->flags & PF_VCPU) { | ||
304 | if (KVM_GUEST_KERNEL_MODE(vcpu)) | ||
305 | write_c0_entryhi(vcpu->arch. | ||
306 | guest_kernel_asid[cpu] & | ||
307 | asid_mask); | ||
308 | else | ||
309 | write_c0_entryhi(vcpu->arch. | ||
310 | guest_user_asid[cpu] & | ||
311 | asid_mask); | ||
312 | ehb(); | ||
313 | } | ||
314 | } | ||
315 | |||
316 | /* restore guest state to registers */ | 1149 | /* restore guest state to registers */ |
317 | kvm_mips_callbacks->vcpu_set_regs(vcpu); | 1150 | kvm_mips_callbacks->vcpu_load(vcpu, cpu); |
318 | 1151 | ||
319 | local_irq_restore(flags); | 1152 | local_irq_restore(flags); |
320 | |||
321 | } | 1153 | } |
322 | 1154 | ||
323 | /* ASID can change if another task is scheduled during preemption */ | 1155 | /* ASID can change if another task is scheduled during preemption */ |
@@ -329,75 +1161,90 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | |||
329 | local_irq_save(flags); | 1161 | local_irq_save(flags); |
330 | 1162 | ||
331 | cpu = smp_processor_id(); | 1163 | cpu = smp_processor_id(); |
332 | |||
333 | vcpu->arch.preempt_entryhi = read_c0_entryhi(); | ||
334 | vcpu->arch.last_sched_cpu = cpu; | 1164 | vcpu->arch.last_sched_cpu = cpu; |
1165 | vcpu->cpu = -1; | ||
335 | 1166 | ||
336 | /* save guest state in registers */ | 1167 | /* save guest state in registers */ |
337 | kvm_mips_callbacks->vcpu_get_regs(vcpu); | 1168 | kvm_mips_callbacks->vcpu_put(vcpu, cpu); |
338 | |||
339 | if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) & | ||
340 | asid_version_mask(cpu))) { | ||
341 | kvm_debug("%s: Dropping MMU Context: %#lx\n", __func__, | ||
342 | cpu_context(cpu, current->mm)); | ||
343 | drop_mmu_context(current->mm, cpu); | ||
344 | } | ||
345 | write_c0_entryhi(cpu_asid(cpu, current->mm)); | ||
346 | ehb(); | ||
347 | 1169 | ||
348 | local_irq_restore(flags); | 1170 | local_irq_restore(flags); |
349 | } | 1171 | } |
350 | 1172 | ||
351 | u32 kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu) | 1173 | /** |
1174 | * kvm_trap_emul_gva_fault() - Safely attempt to handle a GVA access fault. | ||
1175 | * @vcpu: Virtual CPU. | ||
1176 | * @gva: Guest virtual address to be accessed. | ||
1177 | * @write: True if write attempted (must be dirtied and made writable). | ||
1178 | * | ||
1179 | * Safely attempt to handle a GVA fault, mapping GVA pages if necessary, and | ||
1180 | * dirtying the page if @write so that guest instructions can be modified. | ||
1181 | * | ||
1182 | * Returns: KVM_MIPS_MAPPED on success. | ||
1183 | * KVM_MIPS_GVA if bad guest virtual address. | ||
1184 | * KVM_MIPS_GPA if bad guest physical address. | ||
1185 | * KVM_MIPS_TLB if guest TLB not present. | ||
1186 | * KVM_MIPS_TLBINV if guest TLB present but not valid. | ||
1187 | * KVM_MIPS_TLBMOD if guest TLB read only. | ||
1188 | */ | ||
1189 | enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu, | ||
1190 | unsigned long gva, | ||
1191 | bool write) | ||
352 | { | 1192 | { |
353 | struct mips_coproc *cop0 = vcpu->arch.cop0; | 1193 | struct mips_coproc *cop0 = vcpu->arch.cop0; |
354 | unsigned long paddr, flags, vpn2, asid; | 1194 | struct kvm_mips_tlb *tlb; |
355 | unsigned long va = (unsigned long)opc; | ||
356 | void *vaddr; | ||
357 | u32 inst; | ||
358 | int index; | 1195 | int index; |
359 | 1196 | ||
360 | if (KVM_GUEST_KSEGX(va) < KVM_GUEST_KSEG0 || | 1197 | if (KVM_GUEST_KSEGX(gva) == KVM_GUEST_KSEG0) { |
361 | KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG23) { | 1198 | if (kvm_mips_handle_kseg0_tlb_fault(gva, vcpu, write) < 0) |
362 | local_irq_save(flags); | 1199 | return KVM_MIPS_GPA; |
363 | index = kvm_mips_host_tlb_lookup(vcpu, va); | 1200 | } else if ((KVM_GUEST_KSEGX(gva) < KVM_GUEST_KSEG0) || |
364 | if (index >= 0) { | 1201 | KVM_GUEST_KSEGX(gva) == KVM_GUEST_KSEG23) { |
365 | inst = *(opc); | 1202 | /* Address should be in the guest TLB */ |
366 | } else { | 1203 | index = kvm_mips_guest_tlb_lookup(vcpu, (gva & VPN2_MASK) | |
367 | vpn2 = va & VPN2_MASK; | 1204 | (kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID)); |
368 | asid = kvm_read_c0_guest_entryhi(cop0) & | 1205 | if (index < 0) |
369 | KVM_ENTRYHI_ASID; | 1206 | return KVM_MIPS_TLB; |
370 | index = kvm_mips_guest_tlb_lookup(vcpu, vpn2 | asid); | 1207 | tlb = &vcpu->arch.guest_tlb[index]; |
371 | if (index < 0) { | 1208 | |
372 | kvm_err("%s: get_user_failed for %p, vcpu: %p, ASID: %#lx\n", | 1209 | /* Entry should be valid, and dirty for writes */ |
373 | __func__, opc, vcpu, read_c0_entryhi()); | 1210 | if (!TLB_IS_VALID(*tlb, gva)) |
374 | kvm_mips_dump_host_tlbs(); | 1211 | return KVM_MIPS_TLBINV; |
375 | kvm_mips_dump_guest_tlbs(vcpu); | 1212 | if (write && !TLB_IS_DIRTY(*tlb, gva)) |
376 | local_irq_restore(flags); | 1213 | return KVM_MIPS_TLBMOD; |
377 | return KVM_INVALID_INST; | 1214 | |
378 | } | 1215 | if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, gva, write)) |
379 | if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, | 1216 | return KVM_MIPS_GPA; |
380 | &vcpu->arch.guest_tlb[index])) { | ||
381 | kvm_err("%s: handling mapped seg tlb fault failed for %p, index: %u, vcpu: %p, ASID: %#lx\n", | ||
382 | __func__, opc, index, vcpu, | ||
383 | read_c0_entryhi()); | ||
384 | kvm_mips_dump_guest_tlbs(vcpu); | ||
385 | local_irq_restore(flags); | ||
386 | return KVM_INVALID_INST; | ||
387 | } | ||
388 | inst = *(opc); | ||
389 | } | ||
390 | local_irq_restore(flags); | ||
391 | } else if (KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG0) { | ||
392 | paddr = kvm_mips_translate_guest_kseg0_to_hpa(vcpu, va); | ||
393 | vaddr = kmap_atomic(pfn_to_page(PHYS_PFN(paddr))); | ||
394 | vaddr += paddr & ~PAGE_MASK; | ||
395 | inst = *(u32 *)vaddr; | ||
396 | kunmap_atomic(vaddr); | ||
397 | } else { | 1217 | } else { |
398 | kvm_err("%s: illegal address: %p\n", __func__, opc); | 1218 | return KVM_MIPS_GVA; |
399 | return KVM_INVALID_INST; | ||
400 | } | 1219 | } |
401 | 1220 | ||
402 | return inst; | 1221 | return KVM_MIPS_MAPPED; |
1222 | } | ||
1223 | |||
1224 | int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out) | ||
1225 | { | ||
1226 | int err; | ||
1227 | |||
1228 | retry: | ||
1229 | kvm_trap_emul_gva_lockless_begin(vcpu); | ||
1230 | err = get_user(*out, opc); | ||
1231 | kvm_trap_emul_gva_lockless_end(vcpu); | ||
1232 | |||
1233 | if (unlikely(err)) { | ||
1234 | /* | ||
1235 | * Try to handle the fault, maybe we just raced with a GVA | ||
1236 | * invalidation. | ||
1237 | */ | ||
1238 | err = kvm_trap_emul_gva_fault(vcpu, (unsigned long)opc, | ||
1239 | false); | ||
1240 | if (unlikely(err)) { | ||
1241 | kvm_err("%s: illegal address: %p\n", | ||
1242 | __func__, opc); | ||
1243 | return -EFAULT; | ||
1244 | } | ||
1245 | |||
1246 | /* Hopefully it'll work now */ | ||
1247 | goto retry; | ||
1248 | } | ||
1249 | return 0; | ||
403 | } | 1250 | } |
diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c index 254377d8e0b9..2819eb793345 100644 --- a/arch/mips/kvm/tlb.c +++ b/arch/mips/kvm/tlb.c | |||
@@ -33,28 +33,20 @@ | |||
33 | #define KVM_GUEST_PC_TLB 0 | 33 | #define KVM_GUEST_PC_TLB 0 |
34 | #define KVM_GUEST_SP_TLB 1 | 34 | #define KVM_GUEST_SP_TLB 1 |
35 | 35 | ||
36 | atomic_t kvm_mips_instance; | ||
37 | EXPORT_SYMBOL_GPL(kvm_mips_instance); | ||
38 | |||
39 | static u32 kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu) | 36 | static u32 kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu) |
40 | { | 37 | { |
38 | struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; | ||
41 | int cpu = smp_processor_id(); | 39 | int cpu = smp_processor_id(); |
42 | 40 | ||
43 | return vcpu->arch.guest_kernel_asid[cpu] & | 41 | return cpu_asid(cpu, kern_mm); |
44 | cpu_asid_mask(&cpu_data[cpu]); | ||
45 | } | 42 | } |
46 | 43 | ||
47 | static u32 kvm_mips_get_user_asid(struct kvm_vcpu *vcpu) | 44 | static u32 kvm_mips_get_user_asid(struct kvm_vcpu *vcpu) |
48 | { | 45 | { |
46 | struct mm_struct *user_mm = &vcpu->arch.guest_user_mm; | ||
49 | int cpu = smp_processor_id(); | 47 | int cpu = smp_processor_id(); |
50 | 48 | ||
51 | return vcpu->arch.guest_user_asid[cpu] & | 49 | return cpu_asid(cpu, user_mm); |
52 | cpu_asid_mask(&cpu_data[cpu]); | ||
53 | } | ||
54 | |||
55 | inline u32 kvm_mips_get_commpage_asid(struct kvm_vcpu *vcpu) | ||
56 | { | ||
57 | return vcpu->kvm->arch.commpage_tlb; | ||
58 | } | 50 | } |
59 | 51 | ||
60 | /* Structure defining an tlb entry data set. */ | 52 | /* Structure defining an tlb entry data set. */ |
@@ -104,109 +96,6 @@ void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu) | |||
104 | } | 96 | } |
105 | EXPORT_SYMBOL_GPL(kvm_mips_dump_guest_tlbs); | 97 | EXPORT_SYMBOL_GPL(kvm_mips_dump_guest_tlbs); |
106 | 98 | ||
107 | /* XXXKYMA: Must be called with interrupts disabled */ | ||
108 | /* set flush_dcache_mask == 0 if no dcache flush required */ | ||
109 | int kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi, | ||
110 | unsigned long entrylo0, unsigned long entrylo1, | ||
111 | int flush_dcache_mask) | ||
112 | { | ||
113 | unsigned long flags; | ||
114 | unsigned long old_entryhi; | ||
115 | int idx; | ||
116 | |||
117 | local_irq_save(flags); | ||
118 | |||
119 | old_entryhi = read_c0_entryhi(); | ||
120 | write_c0_entryhi(entryhi); | ||
121 | mtc0_tlbw_hazard(); | ||
122 | |||
123 | tlb_probe(); | ||
124 | tlb_probe_hazard(); | ||
125 | idx = read_c0_index(); | ||
126 | |||
127 | if (idx > current_cpu_data.tlbsize) { | ||
128 | kvm_err("%s: Invalid Index: %d\n", __func__, idx); | ||
129 | kvm_mips_dump_host_tlbs(); | ||
130 | local_irq_restore(flags); | ||
131 | return -1; | ||
132 | } | ||
133 | |||
134 | write_c0_entrylo0(entrylo0); | ||
135 | write_c0_entrylo1(entrylo1); | ||
136 | mtc0_tlbw_hazard(); | ||
137 | |||
138 | if (idx < 0) | ||
139 | tlb_write_random(); | ||
140 | else | ||
141 | tlb_write_indexed(); | ||
142 | tlbw_use_hazard(); | ||
143 | |||
144 | kvm_debug("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0(R): 0x%08lx, entrylo1(R): 0x%08lx\n", | ||
145 | vcpu->arch.pc, idx, read_c0_entryhi(), | ||
146 | read_c0_entrylo0(), read_c0_entrylo1()); | ||
147 | |||
148 | /* Flush D-cache */ | ||
149 | if (flush_dcache_mask) { | ||
150 | if (entrylo0 & ENTRYLO_V) { | ||
151 | ++vcpu->stat.flush_dcache_exits; | ||
152 | flush_data_cache_page((entryhi & VPN2_MASK) & | ||
153 | ~flush_dcache_mask); | ||
154 | } | ||
155 | if (entrylo1 & ENTRYLO_V) { | ||
156 | ++vcpu->stat.flush_dcache_exits; | ||
157 | flush_data_cache_page(((entryhi & VPN2_MASK) & | ||
158 | ~flush_dcache_mask) | | ||
159 | (0x1 << PAGE_SHIFT)); | ||
160 | } | ||
161 | } | ||
162 | |||
163 | /* Restore old ASID */ | ||
164 | write_c0_entryhi(old_entryhi); | ||
165 | mtc0_tlbw_hazard(); | ||
166 | local_irq_restore(flags); | ||
167 | return 0; | ||
168 | } | ||
169 | EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_write); | ||
170 | |||
171 | int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, | ||
172 | struct kvm_vcpu *vcpu) | ||
173 | { | ||
174 | kvm_pfn_t pfn; | ||
175 | unsigned long flags, old_entryhi = 0, vaddr = 0; | ||
176 | unsigned long entrylo[2] = { 0, 0 }; | ||
177 | unsigned int pair_idx; | ||
178 | |||
179 | pfn = PFN_DOWN(virt_to_phys(vcpu->arch.kseg0_commpage)); | ||
180 | pair_idx = (badvaddr >> PAGE_SHIFT) & 1; | ||
181 | entrylo[pair_idx] = mips3_paddr_to_tlbpfn(pfn << PAGE_SHIFT) | | ||
182 | ((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) | | ||
183 | ENTRYLO_D | ENTRYLO_V; | ||
184 | |||
185 | local_irq_save(flags); | ||
186 | |||
187 | old_entryhi = read_c0_entryhi(); | ||
188 | vaddr = badvaddr & (PAGE_MASK << 1); | ||
189 | write_c0_entryhi(vaddr | kvm_mips_get_kernel_asid(vcpu)); | ||
190 | write_c0_entrylo0(entrylo[0]); | ||
191 | write_c0_entrylo1(entrylo[1]); | ||
192 | write_c0_index(kvm_mips_get_commpage_asid(vcpu)); | ||
193 | mtc0_tlbw_hazard(); | ||
194 | tlb_write_indexed(); | ||
195 | tlbw_use_hazard(); | ||
196 | |||
197 | kvm_debug("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0 (R): 0x%08lx, entrylo1(R): 0x%08lx\n", | ||
198 | vcpu->arch.pc, read_c0_index(), read_c0_entryhi(), | ||
199 | read_c0_entrylo0(), read_c0_entrylo1()); | ||
200 | |||
201 | /* Restore old ASID */ | ||
202 | write_c0_entryhi(old_entryhi); | ||
203 | mtc0_tlbw_hazard(); | ||
204 | local_irq_restore(flags); | ||
205 | |||
206 | return 0; | ||
207 | } | ||
208 | EXPORT_SYMBOL_GPL(kvm_mips_handle_commpage_tlb_fault); | ||
209 | |||
210 | int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi) | 99 | int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi) |
211 | { | 100 | { |
212 | int i; | 101 | int i; |
@@ -228,51 +117,11 @@ int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi) | |||
228 | } | 117 | } |
229 | EXPORT_SYMBOL_GPL(kvm_mips_guest_tlb_lookup); | 118 | EXPORT_SYMBOL_GPL(kvm_mips_guest_tlb_lookup); |
230 | 119 | ||
231 | int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr) | 120 | static int _kvm_mips_host_tlb_inv(unsigned long entryhi) |
232 | { | ||
233 | unsigned long old_entryhi, flags; | ||
234 | int idx; | ||
235 | |||
236 | local_irq_save(flags); | ||
237 | |||
238 | old_entryhi = read_c0_entryhi(); | ||
239 | |||
240 | if (KVM_GUEST_KERNEL_MODE(vcpu)) | ||
241 | write_c0_entryhi((vaddr & VPN2_MASK) | | ||
242 | kvm_mips_get_kernel_asid(vcpu)); | ||
243 | else { | ||
244 | write_c0_entryhi((vaddr & VPN2_MASK) | | ||
245 | kvm_mips_get_user_asid(vcpu)); | ||
246 | } | ||
247 | |||
248 | mtc0_tlbw_hazard(); | ||
249 | |||
250 | tlb_probe(); | ||
251 | tlb_probe_hazard(); | ||
252 | idx = read_c0_index(); | ||
253 | |||
254 | /* Restore old ASID */ | ||
255 | write_c0_entryhi(old_entryhi); | ||
256 | mtc0_tlbw_hazard(); | ||
257 | |||
258 | local_irq_restore(flags); | ||
259 | |||
260 | kvm_debug("Host TLB lookup, %#lx, idx: %2d\n", vaddr, idx); | ||
261 | |||
262 | return idx; | ||
263 | } | ||
264 | EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_lookup); | ||
265 | |||
266 | int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va) | ||
267 | { | 121 | { |
268 | int idx; | 122 | int idx; |
269 | unsigned long flags, old_entryhi; | ||
270 | |||
271 | local_irq_save(flags); | ||
272 | |||
273 | old_entryhi = read_c0_entryhi(); | ||
274 | 123 | ||
275 | write_c0_entryhi((va & VPN2_MASK) | kvm_mips_get_user_asid(vcpu)); | 124 | write_c0_entryhi(entryhi); |
276 | mtc0_tlbw_hazard(); | 125 | mtc0_tlbw_hazard(); |
277 | 126 | ||
278 | tlb_probe(); | 127 | tlb_probe(); |
@@ -282,7 +131,7 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va) | |||
282 | if (idx >= current_cpu_data.tlbsize) | 131 | if (idx >= current_cpu_data.tlbsize) |
283 | BUG(); | 132 | BUG(); |
284 | 133 | ||
285 | if (idx > 0) { | 134 | if (idx >= 0) { |
286 | write_c0_entryhi(UNIQUE_ENTRYHI(idx)); | 135 | write_c0_entryhi(UNIQUE_ENTRYHI(idx)); |
287 | write_c0_entrylo0(0); | 136 | write_c0_entrylo0(0); |
288 | write_c0_entrylo1(0); | 137 | write_c0_entrylo1(0); |
@@ -292,93 +141,75 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va) | |||
292 | tlbw_use_hazard(); | 141 | tlbw_use_hazard(); |
293 | } | 142 | } |
294 | 143 | ||
295 | write_c0_entryhi(old_entryhi); | 144 | return idx; |
296 | mtc0_tlbw_hazard(); | ||
297 | |||
298 | local_irq_restore(flags); | ||
299 | |||
300 | if (idx > 0) | ||
301 | kvm_debug("%s: Invalidated entryhi %#lx @ idx %d\n", __func__, | ||
302 | (va & VPN2_MASK) | kvm_mips_get_user_asid(vcpu), idx); | ||
303 | |||
304 | return 0; | ||
305 | } | 145 | } |
306 | EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_inv); | ||
307 | 146 | ||
308 | void kvm_mips_flush_host_tlb(int skip_kseg0) | 147 | int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va, |
148 | bool user, bool kernel) | ||
309 | { | 149 | { |
310 | unsigned long flags; | 150 | int idx_user, idx_kernel; |
311 | unsigned long old_entryhi, entryhi; | 151 | unsigned long flags, old_entryhi; |
312 | unsigned long old_pagemask; | ||
313 | int entry = 0; | ||
314 | int maxentry = current_cpu_data.tlbsize; | ||
315 | 152 | ||
316 | local_irq_save(flags); | 153 | local_irq_save(flags); |
317 | 154 | ||
318 | old_entryhi = read_c0_entryhi(); | 155 | old_entryhi = read_c0_entryhi(); |
319 | old_pagemask = read_c0_pagemask(); | ||
320 | |||
321 | /* Blast 'em all away. */ | ||
322 | for (entry = 0; entry < maxentry; entry++) { | ||
323 | write_c0_index(entry); | ||
324 | |||
325 | if (skip_kseg0) { | ||
326 | mtc0_tlbr_hazard(); | ||
327 | tlb_read(); | ||
328 | tlb_read_hazard(); | ||
329 | |||
330 | entryhi = read_c0_entryhi(); | ||
331 | 156 | ||
332 | /* Don't blow away guest kernel entries */ | 157 | if (user) |
333 | if (KVM_GUEST_KSEGX(entryhi) == KVM_GUEST_KSEG0) | 158 | idx_user = _kvm_mips_host_tlb_inv((va & VPN2_MASK) | |
334 | continue; | 159 | kvm_mips_get_user_asid(vcpu)); |
335 | 160 | if (kernel) | |
336 | write_c0_pagemask(old_pagemask); | 161 | idx_kernel = _kvm_mips_host_tlb_inv((va & VPN2_MASK) | |
337 | } | 162 | kvm_mips_get_kernel_asid(vcpu)); |
338 | |||
339 | /* Make sure all entries differ. */ | ||
340 | write_c0_entryhi(UNIQUE_ENTRYHI(entry)); | ||
341 | write_c0_entrylo0(0); | ||
342 | write_c0_entrylo1(0); | ||
343 | mtc0_tlbw_hazard(); | ||
344 | |||
345 | tlb_write_indexed(); | ||
346 | tlbw_use_hazard(); | ||
347 | } | ||
348 | 163 | ||
349 | write_c0_entryhi(old_entryhi); | 164 | write_c0_entryhi(old_entryhi); |
350 | write_c0_pagemask(old_pagemask); | ||
351 | mtc0_tlbw_hazard(); | 165 | mtc0_tlbw_hazard(); |
352 | 166 | ||
353 | local_irq_restore(flags); | 167 | local_irq_restore(flags); |
168 | |||
169 | if (user && idx_user >= 0) | ||
170 | kvm_debug("%s: Invalidated guest user entryhi %#lx @ idx %d\n", | ||
171 | __func__, (va & VPN2_MASK) | | ||
172 | kvm_mips_get_user_asid(vcpu), idx_user); | ||
173 | if (kernel && idx_kernel >= 0) | ||
174 | kvm_debug("%s: Invalidated guest kernel entryhi %#lx @ idx %d\n", | ||
175 | __func__, (va & VPN2_MASK) | | ||
176 | kvm_mips_get_kernel_asid(vcpu), idx_kernel); | ||
177 | |||
178 | return 0; | ||
354 | } | 179 | } |
355 | EXPORT_SYMBOL_GPL(kvm_mips_flush_host_tlb); | 180 | EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_inv); |
356 | 181 | ||
357 | void kvm_local_flush_tlb_all(void) | 182 | /** |
183 | * kvm_mips_suspend_mm() - Suspend the active mm. | ||
184 | * @cpu The CPU we're running on. | ||
185 | * | ||
186 | * Suspend the active_mm, ready for a switch to a KVM guest virtual address | ||
187 | * space. This is left active for the duration of guest context, including time | ||
188 | * with interrupts enabled, so we need to be careful not to confuse e.g. cache | ||
189 | * management IPIs. | ||
190 | * | ||
191 | * kvm_mips_resume_mm() should be called before context switching to a different | ||
192 | * process so we don't need to worry about reference counting. | ||
193 | * | ||
194 | * This needs to be in static kernel code to avoid exporting init_mm. | ||
195 | */ | ||
196 | void kvm_mips_suspend_mm(int cpu) | ||
358 | { | 197 | { |
359 | unsigned long flags; | 198 | cpumask_clear_cpu(cpu, mm_cpumask(current->active_mm)); |
360 | unsigned long old_ctx; | 199 | current->active_mm = &init_mm; |
361 | int entry = 0; | 200 | } |
362 | 201 | EXPORT_SYMBOL_GPL(kvm_mips_suspend_mm); | |
363 | local_irq_save(flags); | ||
364 | /* Save old context and create impossible VPN2 value */ | ||
365 | old_ctx = read_c0_entryhi(); | ||
366 | write_c0_entrylo0(0); | ||
367 | write_c0_entrylo1(0); | ||
368 | |||
369 | /* Blast 'em all away. */ | ||
370 | while (entry < current_cpu_data.tlbsize) { | ||
371 | /* Make sure all entries differ. */ | ||
372 | write_c0_entryhi(UNIQUE_ENTRYHI(entry)); | ||
373 | write_c0_index(entry); | ||
374 | mtc0_tlbw_hazard(); | ||
375 | tlb_write_indexed(); | ||
376 | tlbw_use_hazard(); | ||
377 | entry++; | ||
378 | } | ||
379 | write_c0_entryhi(old_ctx); | ||
380 | mtc0_tlbw_hazard(); | ||
381 | 202 | ||
382 | local_irq_restore(flags); | 203 | /** |
204 | * kvm_mips_resume_mm() - Resume the current process mm. | ||
205 | * @cpu The CPU we're running on. | ||
206 | * | ||
207 | * Resume the mm of the current process, after a switch back from a KVM guest | ||
208 | * virtual address space (see kvm_mips_suspend_mm()). | ||
209 | */ | ||
210 | void kvm_mips_resume_mm(int cpu) | ||
211 | { | ||
212 | cpumask_set_cpu(cpu, mm_cpumask(current->mm)); | ||
213 | current->active_mm = current->mm; | ||
383 | } | 214 | } |
384 | EXPORT_SYMBOL_GPL(kvm_local_flush_tlb_all); | 215 | EXPORT_SYMBOL_GPL(kvm_mips_resume_mm); |
diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c index 3b20441f2beb..b1fa53b252ea 100644 --- a/arch/mips/kvm/trap_emul.c +++ b/arch/mips/kvm/trap_emul.c | |||
@@ -11,9 +11,11 @@ | |||
11 | 11 | ||
12 | #include <linux/errno.h> | 12 | #include <linux/errno.h> |
13 | #include <linux/err.h> | 13 | #include <linux/err.h> |
14 | #include <linux/vmalloc.h> | ||
15 | |||
16 | #include <linux/kvm_host.h> | 14 | #include <linux/kvm_host.h> |
15 | #include <linux/uaccess.h> | ||
16 | #include <linux/vmalloc.h> | ||
17 | #include <asm/mmu_context.h> | ||
18 | #include <asm/pgalloc.h> | ||
17 | 19 | ||
18 | #include "interrupt.h" | 20 | #include "interrupt.h" |
19 | 21 | ||
@@ -21,9 +23,12 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva) | |||
21 | { | 23 | { |
22 | gpa_t gpa; | 24 | gpa_t gpa; |
23 | gva_t kseg = KSEGX(gva); | 25 | gva_t kseg = KSEGX(gva); |
26 | gva_t gkseg = KVM_GUEST_KSEGX(gva); | ||
24 | 27 | ||
25 | if ((kseg == CKSEG0) || (kseg == CKSEG1)) | 28 | if ((kseg == CKSEG0) || (kseg == CKSEG1)) |
26 | gpa = CPHYSADDR(gva); | 29 | gpa = CPHYSADDR(gva); |
30 | else if (gkseg == KVM_GUEST_KSEG0) | ||
31 | gpa = KVM_GUEST_CPHYSADDR(gva); | ||
27 | else { | 32 | else { |
28 | kvm_err("%s: cannot find GPA for GVA: %#lx\n", __func__, gva); | 33 | kvm_err("%s: cannot find GPA for GVA: %#lx\n", __func__, gva); |
29 | kvm_mips_dump_host_tlbs(); | 34 | kvm_mips_dump_host_tlbs(); |
@@ -83,48 +88,134 @@ static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu) | |||
83 | return ret; | 88 | return ret; |
84 | } | 89 | } |
85 | 90 | ||
91 | static int kvm_mips_bad_load(u32 cause, u32 *opc, struct kvm_run *run, | ||
92 | struct kvm_vcpu *vcpu) | ||
93 | { | ||
94 | enum emulation_result er; | ||
95 | union mips_instruction inst; | ||
96 | int err; | ||
97 | |||
98 | /* A code fetch fault doesn't count as an MMIO */ | ||
99 | if (kvm_is_ifetch_fault(&vcpu->arch)) { | ||
100 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
101 | return RESUME_HOST; | ||
102 | } | ||
103 | |||
104 | /* Fetch the instruction. */ | ||
105 | if (cause & CAUSEF_BD) | ||
106 | opc += 1; | ||
107 | err = kvm_get_badinstr(opc, vcpu, &inst.word); | ||
108 | if (err) { | ||
109 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
110 | return RESUME_HOST; | ||
111 | } | ||
112 | |||
113 | /* Emulate the load */ | ||
114 | er = kvm_mips_emulate_load(inst, cause, run, vcpu); | ||
115 | if (er == EMULATE_FAIL) { | ||
116 | kvm_err("Emulate load from MMIO space failed\n"); | ||
117 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
118 | } else { | ||
119 | run->exit_reason = KVM_EXIT_MMIO; | ||
120 | } | ||
121 | return RESUME_HOST; | ||
122 | } | ||
123 | |||
124 | static int kvm_mips_bad_store(u32 cause, u32 *opc, struct kvm_run *run, | ||
125 | struct kvm_vcpu *vcpu) | ||
126 | { | ||
127 | enum emulation_result er; | ||
128 | union mips_instruction inst; | ||
129 | int err; | ||
130 | |||
131 | /* Fetch the instruction. */ | ||
132 | if (cause & CAUSEF_BD) | ||
133 | opc += 1; | ||
134 | err = kvm_get_badinstr(opc, vcpu, &inst.word); | ||
135 | if (err) { | ||
136 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
137 | return RESUME_HOST; | ||
138 | } | ||
139 | |||
140 | /* Emulate the store */ | ||
141 | er = kvm_mips_emulate_store(inst, cause, run, vcpu); | ||
142 | if (er == EMULATE_FAIL) { | ||
143 | kvm_err("Emulate store to MMIO space failed\n"); | ||
144 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
145 | } else { | ||
146 | run->exit_reason = KVM_EXIT_MMIO; | ||
147 | } | ||
148 | return RESUME_HOST; | ||
149 | } | ||
150 | |||
151 | static int kvm_mips_bad_access(u32 cause, u32 *opc, struct kvm_run *run, | ||
152 | struct kvm_vcpu *vcpu, bool store) | ||
153 | { | ||
154 | if (store) | ||
155 | return kvm_mips_bad_store(cause, opc, run, vcpu); | ||
156 | else | ||
157 | return kvm_mips_bad_load(cause, opc, run, vcpu); | ||
158 | } | ||
159 | |||
86 | static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu) | 160 | static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu) |
87 | { | 161 | { |
162 | struct mips_coproc *cop0 = vcpu->arch.cop0; | ||
88 | struct kvm_run *run = vcpu->run; | 163 | struct kvm_run *run = vcpu->run; |
89 | u32 __user *opc = (u32 __user *) vcpu->arch.pc; | 164 | u32 __user *opc = (u32 __user *) vcpu->arch.pc; |
90 | unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; | 165 | unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; |
91 | u32 cause = vcpu->arch.host_cp0_cause; | 166 | u32 cause = vcpu->arch.host_cp0_cause; |
92 | enum emulation_result er = EMULATE_DONE; | 167 | struct kvm_mips_tlb *tlb; |
93 | int ret = RESUME_GUEST; | 168 | unsigned long entryhi; |
169 | int index; | ||
94 | 170 | ||
95 | if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0 | 171 | if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0 |
96 | || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) { | 172 | || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) { |
97 | kvm_debug("USER/KSEG23 ADDR TLB MOD fault: cause %#x, PC: %p, BadVaddr: %#lx\n", | 173 | /* |
98 | cause, opc, badvaddr); | 174 | * First find the mapping in the guest TLB. If the failure to |
99 | er = kvm_mips_handle_tlbmod(cause, opc, run, vcpu); | 175 | * write was due to the guest TLB, it should be up to the guest |
176 | * to handle it. | ||
177 | */ | ||
178 | entryhi = (badvaddr & VPN2_MASK) | | ||
179 | (kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID); | ||
180 | index = kvm_mips_guest_tlb_lookup(vcpu, entryhi); | ||
100 | 181 | ||
101 | if (er == EMULATE_DONE) | 182 | /* |
102 | ret = RESUME_GUEST; | 183 | * These should never happen. |
103 | else { | 184 | * They would indicate stale host TLB entries. |
185 | */ | ||
186 | if (unlikely(index < 0)) { | ||
104 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 187 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
105 | ret = RESUME_HOST; | 188 | return RESUME_HOST; |
106 | } | 189 | } |
107 | } else if (KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG0) { | 190 | tlb = vcpu->arch.guest_tlb + index; |
191 | if (unlikely(!TLB_IS_VALID(*tlb, badvaddr))) { | ||
192 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
193 | return RESUME_HOST; | ||
194 | } | ||
195 | |||
108 | /* | 196 | /* |
109 | * XXXKYMA: The guest kernel does not expect to get this fault | 197 | * Guest entry not dirty? That would explain the TLB modified |
110 | * when we are not using HIGHMEM. Need to address this in a | 198 | * exception. Relay that on to the guest so it can handle it. |
111 | * HIGHMEM kernel | ||
112 | */ | 199 | */ |
113 | kvm_err("TLB MOD fault not handled, cause %#x, PC: %p, BadVaddr: %#lx\n", | 200 | if (!TLB_IS_DIRTY(*tlb, badvaddr)) { |
114 | cause, opc, badvaddr); | 201 | kvm_mips_emulate_tlbmod(cause, opc, run, vcpu); |
115 | kvm_mips_dump_host_tlbs(); | 202 | return RESUME_GUEST; |
116 | kvm_arch_vcpu_dump_regs(vcpu); | 203 | } |
117 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 204 | |
118 | ret = RESUME_HOST; | 205 | if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, badvaddr, |
206 | true)) | ||
207 | /* Not writable, needs handling as MMIO */ | ||
208 | return kvm_mips_bad_store(cause, opc, run, vcpu); | ||
209 | return RESUME_GUEST; | ||
210 | } else if (KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG0) { | ||
211 | if (kvm_mips_handle_kseg0_tlb_fault(badvaddr, vcpu, true) < 0) | ||
212 | /* Not writable, needs handling as MMIO */ | ||
213 | return kvm_mips_bad_store(cause, opc, run, vcpu); | ||
214 | return RESUME_GUEST; | ||
119 | } else { | 215 | } else { |
120 | kvm_err("Illegal TLB Mod fault address , cause %#x, PC: %p, BadVaddr: %#lx\n", | 216 | /* host kernel addresses are all handled as MMIO */ |
121 | cause, opc, badvaddr); | 217 | return kvm_mips_bad_store(cause, opc, run, vcpu); |
122 | kvm_mips_dump_host_tlbs(); | ||
123 | kvm_arch_vcpu_dump_regs(vcpu); | ||
124 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
125 | ret = RESUME_HOST; | ||
126 | } | 218 | } |
127 | return ret; | ||
128 | } | 219 | } |
129 | 220 | ||
130 | static int kvm_trap_emul_handle_tlb_miss(struct kvm_vcpu *vcpu, bool store) | 221 | static int kvm_trap_emul_handle_tlb_miss(struct kvm_vcpu *vcpu, bool store) |
@@ -157,7 +248,7 @@ static int kvm_trap_emul_handle_tlb_miss(struct kvm_vcpu *vcpu, bool store) | |||
157 | * into the shadow host TLB | 248 | * into the shadow host TLB |
158 | */ | 249 | */ |
159 | 250 | ||
160 | er = kvm_mips_handle_tlbmiss(cause, opc, run, vcpu); | 251 | er = kvm_mips_handle_tlbmiss(cause, opc, run, vcpu, store); |
161 | if (er == EMULATE_DONE) | 252 | if (er == EMULATE_DONE) |
162 | ret = RESUME_GUEST; | 253 | ret = RESUME_GUEST; |
163 | else { | 254 | else { |
@@ -169,29 +260,15 @@ static int kvm_trap_emul_handle_tlb_miss(struct kvm_vcpu *vcpu, bool store) | |||
169 | * All KSEG0 faults are handled by KVM, as the guest kernel does | 260 | * All KSEG0 faults are handled by KVM, as the guest kernel does |
170 | * not expect to ever get them | 261 | * not expect to ever get them |
171 | */ | 262 | */ |
172 | if (kvm_mips_handle_kseg0_tlb_fault | 263 | if (kvm_mips_handle_kseg0_tlb_fault(badvaddr, vcpu, store) < 0) |
173 | (vcpu->arch.host_cp0_badvaddr, vcpu) < 0) { | 264 | ret = kvm_mips_bad_access(cause, opc, run, vcpu, store); |
174 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
175 | ret = RESUME_HOST; | ||
176 | } | ||
177 | } else if (KVM_GUEST_KERNEL_MODE(vcpu) | 265 | } else if (KVM_GUEST_KERNEL_MODE(vcpu) |
178 | && (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1)) { | 266 | && (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1)) { |
179 | /* | 267 | /* |
180 | * With EVA we may get a TLB exception instead of an address | 268 | * With EVA we may get a TLB exception instead of an address |
181 | * error when the guest performs MMIO to KSeg1 addresses. | 269 | * error when the guest performs MMIO to KSeg1 addresses. |
182 | */ | 270 | */ |
183 | kvm_debug("Emulate %s MMIO space\n", | 271 | ret = kvm_mips_bad_access(cause, opc, run, vcpu, store); |
184 | store ? "Store to" : "Load from"); | ||
185 | er = kvm_mips_emulate_inst(cause, opc, run, vcpu); | ||
186 | if (er == EMULATE_FAIL) { | ||
187 | kvm_err("Emulate %s MMIO space failed\n", | ||
188 | store ? "Store to" : "Load from"); | ||
189 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
190 | ret = RESUME_HOST; | ||
191 | } else { | ||
192 | run->exit_reason = KVM_EXIT_MMIO; | ||
193 | ret = RESUME_HOST; | ||
194 | } | ||
195 | } else { | 272 | } else { |
196 | kvm_err("Illegal TLB %s fault address , cause %#x, PC: %p, BadVaddr: %#lx\n", | 273 | kvm_err("Illegal TLB %s fault address , cause %#x, PC: %p, BadVaddr: %#lx\n", |
197 | store ? "ST" : "LD", cause, opc, badvaddr); | 274 | store ? "ST" : "LD", cause, opc, badvaddr); |
@@ -219,21 +296,11 @@ static int kvm_trap_emul_handle_addr_err_st(struct kvm_vcpu *vcpu) | |||
219 | u32 __user *opc = (u32 __user *) vcpu->arch.pc; | 296 | u32 __user *opc = (u32 __user *) vcpu->arch.pc; |
220 | unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; | 297 | unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; |
221 | u32 cause = vcpu->arch.host_cp0_cause; | 298 | u32 cause = vcpu->arch.host_cp0_cause; |
222 | enum emulation_result er = EMULATE_DONE; | ||
223 | int ret = RESUME_GUEST; | 299 | int ret = RESUME_GUEST; |
224 | 300 | ||
225 | if (KVM_GUEST_KERNEL_MODE(vcpu) | 301 | if (KVM_GUEST_KERNEL_MODE(vcpu) |
226 | && (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1)) { | 302 | && (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1)) { |
227 | kvm_debug("Emulate Store to MMIO space\n"); | 303 | ret = kvm_mips_bad_store(cause, opc, run, vcpu); |
228 | er = kvm_mips_emulate_inst(cause, opc, run, vcpu); | ||
229 | if (er == EMULATE_FAIL) { | ||
230 | kvm_err("Emulate Store to MMIO space failed\n"); | ||
231 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
232 | ret = RESUME_HOST; | ||
233 | } else { | ||
234 | run->exit_reason = KVM_EXIT_MMIO; | ||
235 | ret = RESUME_HOST; | ||
236 | } | ||
237 | } else { | 304 | } else { |
238 | kvm_err("Address Error (STORE): cause %#x, PC: %p, BadVaddr: %#lx\n", | 305 | kvm_err("Address Error (STORE): cause %#x, PC: %p, BadVaddr: %#lx\n", |
239 | cause, opc, badvaddr); | 306 | cause, opc, badvaddr); |
@@ -249,26 +316,15 @@ static int kvm_trap_emul_handle_addr_err_ld(struct kvm_vcpu *vcpu) | |||
249 | u32 __user *opc = (u32 __user *) vcpu->arch.pc; | 316 | u32 __user *opc = (u32 __user *) vcpu->arch.pc; |
250 | unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; | 317 | unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; |
251 | u32 cause = vcpu->arch.host_cp0_cause; | 318 | u32 cause = vcpu->arch.host_cp0_cause; |
252 | enum emulation_result er = EMULATE_DONE; | ||
253 | int ret = RESUME_GUEST; | 319 | int ret = RESUME_GUEST; |
254 | 320 | ||
255 | if (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1) { | 321 | if (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1) { |
256 | kvm_debug("Emulate Load from MMIO space @ %#lx\n", badvaddr); | 322 | ret = kvm_mips_bad_load(cause, opc, run, vcpu); |
257 | er = kvm_mips_emulate_inst(cause, opc, run, vcpu); | ||
258 | if (er == EMULATE_FAIL) { | ||
259 | kvm_err("Emulate Load from MMIO space failed\n"); | ||
260 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
261 | ret = RESUME_HOST; | ||
262 | } else { | ||
263 | run->exit_reason = KVM_EXIT_MMIO; | ||
264 | ret = RESUME_HOST; | ||
265 | } | ||
266 | } else { | 323 | } else { |
267 | kvm_err("Address Error (LOAD): cause %#x, PC: %p, BadVaddr: %#lx\n", | 324 | kvm_err("Address Error (LOAD): cause %#x, PC: %p, BadVaddr: %#lx\n", |
268 | cause, opc, badvaddr); | 325 | cause, opc, badvaddr); |
269 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 326 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
270 | ret = RESUME_HOST; | 327 | ret = RESUME_HOST; |
271 | er = EMULATE_FAIL; | ||
272 | } | 328 | } |
273 | return ret; | 329 | return ret; |
274 | } | 330 | } |
@@ -428,16 +484,75 @@ static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu) | |||
428 | return ret; | 484 | return ret; |
429 | } | 485 | } |
430 | 486 | ||
431 | static int kvm_trap_emul_vm_init(struct kvm *kvm) | 487 | static int kvm_trap_emul_vcpu_init(struct kvm_vcpu *vcpu) |
432 | { | 488 | { |
489 | struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; | ||
490 | struct mm_struct *user_mm = &vcpu->arch.guest_user_mm; | ||
491 | |||
492 | /* | ||
493 | * Allocate GVA -> HPA page tables. | ||
494 | * MIPS doesn't use the mm_struct pointer argument. | ||
495 | */ | ||
496 | kern_mm->pgd = pgd_alloc(kern_mm); | ||
497 | if (!kern_mm->pgd) | ||
498 | return -ENOMEM; | ||
499 | |||
500 | user_mm->pgd = pgd_alloc(user_mm); | ||
501 | if (!user_mm->pgd) { | ||
502 | pgd_free(kern_mm, kern_mm->pgd); | ||
503 | return -ENOMEM; | ||
504 | } | ||
505 | |||
433 | return 0; | 506 | return 0; |
434 | } | 507 | } |
435 | 508 | ||
436 | static int kvm_trap_emul_vcpu_init(struct kvm_vcpu *vcpu) | 509 | static void kvm_mips_emul_free_gva_pt(pgd_t *pgd) |
437 | { | 510 | { |
438 | vcpu->arch.kscratch_enabled = 0xfc; | 511 | /* Don't free host kernel page tables copied from init_mm.pgd */ |
512 | const unsigned long end = 0x80000000; | ||
513 | unsigned long pgd_va, pud_va, pmd_va; | ||
514 | pud_t *pud; | ||
515 | pmd_t *pmd; | ||
516 | pte_t *pte; | ||
517 | int i, j, k; | ||
518 | |||
519 | for (i = 0; i < USER_PTRS_PER_PGD; i++) { | ||
520 | if (pgd_none(pgd[i])) | ||
521 | continue; | ||
522 | |||
523 | pgd_va = (unsigned long)i << PGDIR_SHIFT; | ||
524 | if (pgd_va >= end) | ||
525 | break; | ||
526 | pud = pud_offset(pgd + i, 0); | ||
527 | for (j = 0; j < PTRS_PER_PUD; j++) { | ||
528 | if (pud_none(pud[j])) | ||
529 | continue; | ||
530 | |||
531 | pud_va = pgd_va | ((unsigned long)j << PUD_SHIFT); | ||
532 | if (pud_va >= end) | ||
533 | break; | ||
534 | pmd = pmd_offset(pud + j, 0); | ||
535 | for (k = 0; k < PTRS_PER_PMD; k++) { | ||
536 | if (pmd_none(pmd[k])) | ||
537 | continue; | ||
538 | |||
539 | pmd_va = pud_va | (k << PMD_SHIFT); | ||
540 | if (pmd_va >= end) | ||
541 | break; | ||
542 | pte = pte_offset(pmd + k, 0); | ||
543 | pte_free_kernel(NULL, pte); | ||
544 | } | ||
545 | pmd_free(NULL, pmd); | ||
546 | } | ||
547 | pud_free(NULL, pud); | ||
548 | } | ||
549 | pgd_free(NULL, pgd); | ||
550 | } | ||
439 | 551 | ||
440 | return 0; | 552 | static void kvm_trap_emul_vcpu_uninit(struct kvm_vcpu *vcpu) |
553 | { | ||
554 | kvm_mips_emul_free_gva_pt(vcpu->arch.guest_kernel_mm.pgd); | ||
555 | kvm_mips_emul_free_gva_pt(vcpu->arch.guest_user_mm.pgd); | ||
441 | } | 556 | } |
442 | 557 | ||
443 | static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) | 558 | static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) |
@@ -499,6 +614,9 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) | |||
499 | /* Set Wait IE/IXMT Ignore in Config7, IAR, AR */ | 614 | /* Set Wait IE/IXMT Ignore in Config7, IAR, AR */ |
500 | kvm_write_c0_guest_config7(cop0, (MIPS_CONF7_WII) | (1 << 10)); | 615 | kvm_write_c0_guest_config7(cop0, (MIPS_CONF7_WII) | (1 << 10)); |
501 | 616 | ||
617 | /* Status */ | ||
618 | kvm_write_c0_guest_status(cop0, ST0_BEV | ST0_ERL); | ||
619 | |||
502 | /* | 620 | /* |
503 | * Setup IntCtl defaults, compatibility mode for timer interrupts (HW5) | 621 | * Setup IntCtl defaults, compatibility mode for timer interrupts (HW5) |
504 | */ | 622 | */ |
@@ -508,17 +626,76 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) | |||
508 | kvm_write_c0_guest_ebase(cop0, KVM_GUEST_KSEG0 | | 626 | kvm_write_c0_guest_ebase(cop0, KVM_GUEST_KSEG0 | |
509 | (vcpu_id & MIPS_EBASE_CPUNUM)); | 627 | (vcpu_id & MIPS_EBASE_CPUNUM)); |
510 | 628 | ||
629 | /* Put PC at guest reset vector */ | ||
630 | vcpu->arch.pc = KVM_GUEST_CKSEG1ADDR(0x1fc00000); | ||
631 | |||
511 | return 0; | 632 | return 0; |
512 | } | 633 | } |
513 | 634 | ||
635 | static void kvm_trap_emul_flush_shadow_all(struct kvm *kvm) | ||
636 | { | ||
637 | /* Flush GVA page tables and invalidate GVA ASIDs on all VCPUs */ | ||
638 | kvm_flush_remote_tlbs(kvm); | ||
639 | } | ||
640 | |||
641 | static void kvm_trap_emul_flush_shadow_memslot(struct kvm *kvm, | ||
642 | const struct kvm_memory_slot *slot) | ||
643 | { | ||
644 | kvm_trap_emul_flush_shadow_all(kvm); | ||
645 | } | ||
646 | |||
647 | static u64 kvm_trap_emul_get_one_regs[] = { | ||
648 | KVM_REG_MIPS_CP0_INDEX, | ||
649 | KVM_REG_MIPS_CP0_ENTRYLO0, | ||
650 | KVM_REG_MIPS_CP0_ENTRYLO1, | ||
651 | KVM_REG_MIPS_CP0_CONTEXT, | ||
652 | KVM_REG_MIPS_CP0_USERLOCAL, | ||
653 | KVM_REG_MIPS_CP0_PAGEMASK, | ||
654 | KVM_REG_MIPS_CP0_WIRED, | ||
655 | KVM_REG_MIPS_CP0_HWRENA, | ||
656 | KVM_REG_MIPS_CP0_BADVADDR, | ||
657 | KVM_REG_MIPS_CP0_COUNT, | ||
658 | KVM_REG_MIPS_CP0_ENTRYHI, | ||
659 | KVM_REG_MIPS_CP0_COMPARE, | ||
660 | KVM_REG_MIPS_CP0_STATUS, | ||
661 | KVM_REG_MIPS_CP0_INTCTL, | ||
662 | KVM_REG_MIPS_CP0_CAUSE, | ||
663 | KVM_REG_MIPS_CP0_EPC, | ||
664 | KVM_REG_MIPS_CP0_PRID, | ||
665 | KVM_REG_MIPS_CP0_EBASE, | ||
666 | KVM_REG_MIPS_CP0_CONFIG, | ||
667 | KVM_REG_MIPS_CP0_CONFIG1, | ||
668 | KVM_REG_MIPS_CP0_CONFIG2, | ||
669 | KVM_REG_MIPS_CP0_CONFIG3, | ||
670 | KVM_REG_MIPS_CP0_CONFIG4, | ||
671 | KVM_REG_MIPS_CP0_CONFIG5, | ||
672 | KVM_REG_MIPS_CP0_CONFIG7, | ||
673 | KVM_REG_MIPS_CP0_ERROREPC, | ||
674 | KVM_REG_MIPS_CP0_KSCRATCH1, | ||
675 | KVM_REG_MIPS_CP0_KSCRATCH2, | ||
676 | KVM_REG_MIPS_CP0_KSCRATCH3, | ||
677 | KVM_REG_MIPS_CP0_KSCRATCH4, | ||
678 | KVM_REG_MIPS_CP0_KSCRATCH5, | ||
679 | KVM_REG_MIPS_CP0_KSCRATCH6, | ||
680 | |||
681 | KVM_REG_MIPS_COUNT_CTL, | ||
682 | KVM_REG_MIPS_COUNT_RESUME, | ||
683 | KVM_REG_MIPS_COUNT_HZ, | ||
684 | }; | ||
685 | |||
514 | static unsigned long kvm_trap_emul_num_regs(struct kvm_vcpu *vcpu) | 686 | static unsigned long kvm_trap_emul_num_regs(struct kvm_vcpu *vcpu) |
515 | { | 687 | { |
516 | return 0; | 688 | return ARRAY_SIZE(kvm_trap_emul_get_one_regs); |
517 | } | 689 | } |
518 | 690 | ||
519 | static int kvm_trap_emul_copy_reg_indices(struct kvm_vcpu *vcpu, | 691 | static int kvm_trap_emul_copy_reg_indices(struct kvm_vcpu *vcpu, |
520 | u64 __user *indices) | 692 | u64 __user *indices) |
521 | { | 693 | { |
694 | if (copy_to_user(indices, kvm_trap_emul_get_one_regs, | ||
695 | sizeof(kvm_trap_emul_get_one_regs))) | ||
696 | return -EFAULT; | ||
697 | indices += ARRAY_SIZE(kvm_trap_emul_get_one_regs); | ||
698 | |||
522 | return 0; | 699 | return 0; |
523 | } | 700 | } |
524 | 701 | ||
@@ -526,7 +703,81 @@ static int kvm_trap_emul_get_one_reg(struct kvm_vcpu *vcpu, | |||
526 | const struct kvm_one_reg *reg, | 703 | const struct kvm_one_reg *reg, |
527 | s64 *v) | 704 | s64 *v) |
528 | { | 705 | { |
706 | struct mips_coproc *cop0 = vcpu->arch.cop0; | ||
707 | |||
529 | switch (reg->id) { | 708 | switch (reg->id) { |
709 | case KVM_REG_MIPS_CP0_INDEX: | ||
710 | *v = (long)kvm_read_c0_guest_index(cop0); | ||
711 | break; | ||
712 | case KVM_REG_MIPS_CP0_ENTRYLO0: | ||
713 | *v = kvm_read_c0_guest_entrylo0(cop0); | ||
714 | break; | ||
715 | case KVM_REG_MIPS_CP0_ENTRYLO1: | ||
716 | *v = kvm_read_c0_guest_entrylo1(cop0); | ||
717 | break; | ||
718 | case KVM_REG_MIPS_CP0_CONTEXT: | ||
719 | *v = (long)kvm_read_c0_guest_context(cop0); | ||
720 | break; | ||
721 | case KVM_REG_MIPS_CP0_USERLOCAL: | ||
722 | *v = (long)kvm_read_c0_guest_userlocal(cop0); | ||
723 | break; | ||
724 | case KVM_REG_MIPS_CP0_PAGEMASK: | ||
725 | *v = (long)kvm_read_c0_guest_pagemask(cop0); | ||
726 | break; | ||
727 | case KVM_REG_MIPS_CP0_WIRED: | ||
728 | *v = (long)kvm_read_c0_guest_wired(cop0); | ||
729 | break; | ||
730 | case KVM_REG_MIPS_CP0_HWRENA: | ||
731 | *v = (long)kvm_read_c0_guest_hwrena(cop0); | ||
732 | break; | ||
733 | case KVM_REG_MIPS_CP0_BADVADDR: | ||
734 | *v = (long)kvm_read_c0_guest_badvaddr(cop0); | ||
735 | break; | ||
736 | case KVM_REG_MIPS_CP0_ENTRYHI: | ||
737 | *v = (long)kvm_read_c0_guest_entryhi(cop0); | ||
738 | break; | ||
739 | case KVM_REG_MIPS_CP0_COMPARE: | ||
740 | *v = (long)kvm_read_c0_guest_compare(cop0); | ||
741 | break; | ||
742 | case KVM_REG_MIPS_CP0_STATUS: | ||
743 | *v = (long)kvm_read_c0_guest_status(cop0); | ||
744 | break; | ||
745 | case KVM_REG_MIPS_CP0_INTCTL: | ||
746 | *v = (long)kvm_read_c0_guest_intctl(cop0); | ||
747 | break; | ||
748 | case KVM_REG_MIPS_CP0_CAUSE: | ||
749 | *v = (long)kvm_read_c0_guest_cause(cop0); | ||
750 | break; | ||
751 | case KVM_REG_MIPS_CP0_EPC: | ||
752 | *v = (long)kvm_read_c0_guest_epc(cop0); | ||
753 | break; | ||
754 | case KVM_REG_MIPS_CP0_PRID: | ||
755 | *v = (long)kvm_read_c0_guest_prid(cop0); | ||
756 | break; | ||
757 | case KVM_REG_MIPS_CP0_EBASE: | ||
758 | *v = (long)kvm_read_c0_guest_ebase(cop0); | ||
759 | break; | ||
760 | case KVM_REG_MIPS_CP0_CONFIG: | ||
761 | *v = (long)kvm_read_c0_guest_config(cop0); | ||
762 | break; | ||
763 | case KVM_REG_MIPS_CP0_CONFIG1: | ||
764 | *v = (long)kvm_read_c0_guest_config1(cop0); | ||
765 | break; | ||
766 | case KVM_REG_MIPS_CP0_CONFIG2: | ||
767 | *v = (long)kvm_read_c0_guest_config2(cop0); | ||
768 | break; | ||
769 | case KVM_REG_MIPS_CP0_CONFIG3: | ||
770 | *v = (long)kvm_read_c0_guest_config3(cop0); | ||
771 | break; | ||
772 | case KVM_REG_MIPS_CP0_CONFIG4: | ||
773 | *v = (long)kvm_read_c0_guest_config4(cop0); | ||
774 | break; | ||
775 | case KVM_REG_MIPS_CP0_CONFIG5: | ||
776 | *v = (long)kvm_read_c0_guest_config5(cop0); | ||
777 | break; | ||
778 | case KVM_REG_MIPS_CP0_CONFIG7: | ||
779 | *v = (long)kvm_read_c0_guest_config7(cop0); | ||
780 | break; | ||
530 | case KVM_REG_MIPS_CP0_COUNT: | 781 | case KVM_REG_MIPS_CP0_COUNT: |
531 | *v = kvm_mips_read_count(vcpu); | 782 | *v = kvm_mips_read_count(vcpu); |
532 | break; | 783 | break; |
@@ -539,6 +790,27 @@ static int kvm_trap_emul_get_one_reg(struct kvm_vcpu *vcpu, | |||
539 | case KVM_REG_MIPS_COUNT_HZ: | 790 | case KVM_REG_MIPS_COUNT_HZ: |
540 | *v = vcpu->arch.count_hz; | 791 | *v = vcpu->arch.count_hz; |
541 | break; | 792 | break; |
793 | case KVM_REG_MIPS_CP0_ERROREPC: | ||
794 | *v = (long)kvm_read_c0_guest_errorepc(cop0); | ||
795 | break; | ||
796 | case KVM_REG_MIPS_CP0_KSCRATCH1: | ||
797 | *v = (long)kvm_read_c0_guest_kscratch1(cop0); | ||
798 | break; | ||
799 | case KVM_REG_MIPS_CP0_KSCRATCH2: | ||
800 | *v = (long)kvm_read_c0_guest_kscratch2(cop0); | ||
801 | break; | ||
802 | case KVM_REG_MIPS_CP0_KSCRATCH3: | ||
803 | *v = (long)kvm_read_c0_guest_kscratch3(cop0); | ||
804 | break; | ||
805 | case KVM_REG_MIPS_CP0_KSCRATCH4: | ||
806 | *v = (long)kvm_read_c0_guest_kscratch4(cop0); | ||
807 | break; | ||
808 | case KVM_REG_MIPS_CP0_KSCRATCH5: | ||
809 | *v = (long)kvm_read_c0_guest_kscratch5(cop0); | ||
810 | break; | ||
811 | case KVM_REG_MIPS_CP0_KSCRATCH6: | ||
812 | *v = (long)kvm_read_c0_guest_kscratch6(cop0); | ||
813 | break; | ||
542 | default: | 814 | default: |
543 | return -EINVAL; | 815 | return -EINVAL; |
544 | } | 816 | } |
@@ -554,6 +826,56 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu, | |||
554 | unsigned int cur, change; | 826 | unsigned int cur, change; |
555 | 827 | ||
556 | switch (reg->id) { | 828 | switch (reg->id) { |
829 | case KVM_REG_MIPS_CP0_INDEX: | ||
830 | kvm_write_c0_guest_index(cop0, v); | ||
831 | break; | ||
832 | case KVM_REG_MIPS_CP0_ENTRYLO0: | ||
833 | kvm_write_c0_guest_entrylo0(cop0, v); | ||
834 | break; | ||
835 | case KVM_REG_MIPS_CP0_ENTRYLO1: | ||
836 | kvm_write_c0_guest_entrylo1(cop0, v); | ||
837 | break; | ||
838 | case KVM_REG_MIPS_CP0_CONTEXT: | ||
839 | kvm_write_c0_guest_context(cop0, v); | ||
840 | break; | ||
841 | case KVM_REG_MIPS_CP0_USERLOCAL: | ||
842 | kvm_write_c0_guest_userlocal(cop0, v); | ||
843 | break; | ||
844 | case KVM_REG_MIPS_CP0_PAGEMASK: | ||
845 | kvm_write_c0_guest_pagemask(cop0, v); | ||
846 | break; | ||
847 | case KVM_REG_MIPS_CP0_WIRED: | ||
848 | kvm_write_c0_guest_wired(cop0, v); | ||
849 | break; | ||
850 | case KVM_REG_MIPS_CP0_HWRENA: | ||
851 | kvm_write_c0_guest_hwrena(cop0, v); | ||
852 | break; | ||
853 | case KVM_REG_MIPS_CP0_BADVADDR: | ||
854 | kvm_write_c0_guest_badvaddr(cop0, v); | ||
855 | break; | ||
856 | case KVM_REG_MIPS_CP0_ENTRYHI: | ||
857 | kvm_write_c0_guest_entryhi(cop0, v); | ||
858 | break; | ||
859 | case KVM_REG_MIPS_CP0_STATUS: | ||
860 | kvm_write_c0_guest_status(cop0, v); | ||
861 | break; | ||
862 | case KVM_REG_MIPS_CP0_INTCTL: | ||
863 | /* No VInt, so no VS, read-only for now */ | ||
864 | break; | ||
865 | case KVM_REG_MIPS_CP0_EPC: | ||
866 | kvm_write_c0_guest_epc(cop0, v); | ||
867 | break; | ||
868 | case KVM_REG_MIPS_CP0_PRID: | ||
869 | kvm_write_c0_guest_prid(cop0, v); | ||
870 | break; | ||
871 | case KVM_REG_MIPS_CP0_EBASE: | ||
872 | /* | ||
873 | * Allow core number to be written, but the exception base must | ||
874 | * remain in guest KSeg0. | ||
875 | */ | ||
876 | kvm_change_c0_guest_ebase(cop0, 0x1ffff000 | MIPS_EBASE_CPUNUM, | ||
877 | v); | ||
878 | break; | ||
557 | case KVM_REG_MIPS_CP0_COUNT: | 879 | case KVM_REG_MIPS_CP0_COUNT: |
558 | kvm_mips_write_count(vcpu, v); | 880 | kvm_mips_write_count(vcpu, v); |
559 | break; | 881 | break; |
@@ -618,6 +940,9 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu, | |||
618 | kvm_write_c0_guest_config5(cop0, v); | 940 | kvm_write_c0_guest_config5(cop0, v); |
619 | } | 941 | } |
620 | break; | 942 | break; |
943 | case KVM_REG_MIPS_CP0_CONFIG7: | ||
944 | /* writes ignored */ | ||
945 | break; | ||
621 | case KVM_REG_MIPS_COUNT_CTL: | 946 | case KVM_REG_MIPS_COUNT_CTL: |
622 | ret = kvm_mips_set_count_ctl(vcpu, v); | 947 | ret = kvm_mips_set_count_ctl(vcpu, v); |
623 | break; | 948 | break; |
@@ -627,24 +952,269 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu, | |||
627 | case KVM_REG_MIPS_COUNT_HZ: | 952 | case KVM_REG_MIPS_COUNT_HZ: |
628 | ret = kvm_mips_set_count_hz(vcpu, v); | 953 | ret = kvm_mips_set_count_hz(vcpu, v); |
629 | break; | 954 | break; |
955 | case KVM_REG_MIPS_CP0_ERROREPC: | ||
956 | kvm_write_c0_guest_errorepc(cop0, v); | ||
957 | break; | ||
958 | case KVM_REG_MIPS_CP0_KSCRATCH1: | ||
959 | kvm_write_c0_guest_kscratch1(cop0, v); | ||
960 | break; | ||
961 | case KVM_REG_MIPS_CP0_KSCRATCH2: | ||
962 | kvm_write_c0_guest_kscratch2(cop0, v); | ||
963 | break; | ||
964 | case KVM_REG_MIPS_CP0_KSCRATCH3: | ||
965 | kvm_write_c0_guest_kscratch3(cop0, v); | ||
966 | break; | ||
967 | case KVM_REG_MIPS_CP0_KSCRATCH4: | ||
968 | kvm_write_c0_guest_kscratch4(cop0, v); | ||
969 | break; | ||
970 | case KVM_REG_MIPS_CP0_KSCRATCH5: | ||
971 | kvm_write_c0_guest_kscratch5(cop0, v); | ||
972 | break; | ||
973 | case KVM_REG_MIPS_CP0_KSCRATCH6: | ||
974 | kvm_write_c0_guest_kscratch6(cop0, v); | ||
975 | break; | ||
630 | default: | 976 | default: |
631 | return -EINVAL; | 977 | return -EINVAL; |
632 | } | 978 | } |
633 | return ret; | 979 | return ret; |
634 | } | 980 | } |
635 | 981 | ||
636 | static int kvm_trap_emul_vcpu_get_regs(struct kvm_vcpu *vcpu) | 982 | static int kvm_trap_emul_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
637 | { | 983 | { |
638 | kvm_lose_fpu(vcpu); | 984 | struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; |
985 | struct mm_struct *user_mm = &vcpu->arch.guest_user_mm; | ||
986 | struct mm_struct *mm; | ||
987 | |||
988 | /* | ||
989 | * Were we in guest context? If so, restore the appropriate ASID based | ||
990 | * on the mode of the Guest (Kernel/User). | ||
991 | */ | ||
992 | if (current->flags & PF_VCPU) { | ||
993 | mm = KVM_GUEST_KERNEL_MODE(vcpu) ? kern_mm : user_mm; | ||
994 | if ((cpu_context(cpu, mm) ^ asid_cache(cpu)) & | ||
995 | asid_version_mask(cpu)) | ||
996 | get_new_mmu_context(mm, cpu); | ||
997 | write_c0_entryhi(cpu_asid(cpu, mm)); | ||
998 | TLBMISS_HANDLER_SETUP_PGD(mm->pgd); | ||
999 | kvm_mips_suspend_mm(cpu); | ||
1000 | ehb(); | ||
1001 | } | ||
639 | 1002 | ||
640 | return 0; | 1003 | return 0; |
641 | } | 1004 | } |
642 | 1005 | ||
643 | static int kvm_trap_emul_vcpu_set_regs(struct kvm_vcpu *vcpu) | 1006 | static int kvm_trap_emul_vcpu_put(struct kvm_vcpu *vcpu, int cpu) |
644 | { | 1007 | { |
1008 | kvm_lose_fpu(vcpu); | ||
1009 | |||
1010 | if (current->flags & PF_VCPU) { | ||
1011 | /* Restore normal Linux process memory map */ | ||
1012 | if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) & | ||
1013 | asid_version_mask(cpu))) | ||
1014 | get_new_mmu_context(current->mm, cpu); | ||
1015 | write_c0_entryhi(cpu_asid(cpu, current->mm)); | ||
1016 | TLBMISS_HANDLER_SETUP_PGD(current->mm->pgd); | ||
1017 | kvm_mips_resume_mm(cpu); | ||
1018 | ehb(); | ||
1019 | } | ||
1020 | |||
645 | return 0; | 1021 | return 0; |
646 | } | 1022 | } |
647 | 1023 | ||
1024 | static void kvm_trap_emul_check_requests(struct kvm_vcpu *vcpu, int cpu, | ||
1025 | bool reload_asid) | ||
1026 | { | ||
1027 | struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; | ||
1028 | struct mm_struct *user_mm = &vcpu->arch.guest_user_mm; | ||
1029 | struct mm_struct *mm; | ||
1030 | int i; | ||
1031 | |||
1032 | if (likely(!vcpu->requests)) | ||
1033 | return; | ||
1034 | |||
1035 | if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { | ||
1036 | /* | ||
1037 | * Both kernel & user GVA mappings must be invalidated. The | ||
1038 | * caller is just about to check whether the ASID is stale | ||
1039 | * anyway so no need to reload it here. | ||
1040 | */ | ||
1041 | kvm_mips_flush_gva_pt(kern_mm->pgd, KMF_GPA | KMF_KERN); | ||
1042 | kvm_mips_flush_gva_pt(user_mm->pgd, KMF_GPA | KMF_USER); | ||
1043 | for_each_possible_cpu(i) { | ||
1044 | cpu_context(i, kern_mm) = 0; | ||
1045 | cpu_context(i, user_mm) = 0; | ||
1046 | } | ||
1047 | |||
1048 | /* Generate new ASID for current mode */ | ||
1049 | if (reload_asid) { | ||
1050 | mm = KVM_GUEST_KERNEL_MODE(vcpu) ? kern_mm : user_mm; | ||
1051 | get_new_mmu_context(mm, cpu); | ||
1052 | htw_stop(); | ||
1053 | write_c0_entryhi(cpu_asid(cpu, mm)); | ||
1054 | TLBMISS_HANDLER_SETUP_PGD(mm->pgd); | ||
1055 | htw_start(); | ||
1056 | } | ||
1057 | } | ||
1058 | } | ||
1059 | |||
1060 | /** | ||
1061 | * kvm_trap_emul_gva_lockless_begin() - Begin lockless access to GVA space. | ||
1062 | * @vcpu: VCPU pointer. | ||
1063 | * | ||
1064 | * Call before a GVA space access outside of guest mode, to ensure that | ||
1065 | * asynchronous TLB flush requests are handled or delayed until completion of | ||
1066 | * the GVA access (as indicated by a matching kvm_trap_emul_gva_lockless_end()). | ||
1067 | * | ||
1068 | * Should be called with IRQs already enabled. | ||
1069 | */ | ||
1070 | void kvm_trap_emul_gva_lockless_begin(struct kvm_vcpu *vcpu) | ||
1071 | { | ||
1072 | /* We re-enable IRQs in kvm_trap_emul_gva_lockless_end() */ | ||
1073 | WARN_ON_ONCE(irqs_disabled()); | ||
1074 | |||
1075 | /* | ||
1076 | * The caller is about to access the GVA space, so we set the mode to | ||
1077 | * force TLB flush requests to send an IPI, and also disable IRQs to | ||
1078 | * delay IPI handling until kvm_trap_emul_gva_lockless_end(). | ||
1079 | */ | ||
1080 | local_irq_disable(); | ||
1081 | |||
1082 | /* | ||
1083 | * Make sure the read of VCPU requests is not reordered ahead of the | ||
1084 | * write to vcpu->mode, or we could miss a TLB flush request while | ||
1085 | * the requester sees the VCPU as outside of guest mode and not needing | ||
1086 | * an IPI. | ||
1087 | */ | ||
1088 | smp_store_mb(vcpu->mode, READING_SHADOW_PAGE_TABLES); | ||
1089 | |||
1090 | /* | ||
1091 | * If a TLB flush has been requested (potentially while | ||
1092 | * OUTSIDE_GUEST_MODE and assumed immediately effective), perform it | ||
1093 | * before accessing the GVA space, and be sure to reload the ASID if | ||
1094 | * necessary as it'll be immediately used. | ||
1095 | * | ||
1096 | * TLB flush requests after this check will trigger an IPI due to the | ||
1097 | * mode change above, which will be delayed due to IRQs disabled. | ||
1098 | */ | ||
1099 | kvm_trap_emul_check_requests(vcpu, smp_processor_id(), true); | ||
1100 | } | ||
1101 | |||
1102 | /** | ||
1103 | * kvm_trap_emul_gva_lockless_end() - End lockless access to GVA space. | ||
1104 | * @vcpu: VCPU pointer. | ||
1105 | * | ||
1106 | * Called after a GVA space access outside of guest mode. Should have a matching | ||
1107 | * call to kvm_trap_emul_gva_lockless_begin(). | ||
1108 | */ | ||
1109 | void kvm_trap_emul_gva_lockless_end(struct kvm_vcpu *vcpu) | ||
1110 | { | ||
1111 | /* | ||
1112 | * Make sure the write to vcpu->mode is not reordered in front of GVA | ||
1113 | * accesses, or a TLB flush requester may not think it necessary to send | ||
1114 | * an IPI. | ||
1115 | */ | ||
1116 | smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE); | ||
1117 | |||
1118 | /* | ||
1119 | * Now that the access to GVA space is complete, its safe for pending | ||
1120 | * TLB flush request IPIs to be handled (which indicates completion). | ||
1121 | */ | ||
1122 | local_irq_enable(); | ||
1123 | } | ||
1124 | |||
1125 | static void kvm_trap_emul_vcpu_reenter(struct kvm_run *run, | ||
1126 | struct kvm_vcpu *vcpu) | ||
1127 | { | ||
1128 | struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; | ||
1129 | struct mm_struct *user_mm = &vcpu->arch.guest_user_mm; | ||
1130 | struct mm_struct *mm; | ||
1131 | struct mips_coproc *cop0 = vcpu->arch.cop0; | ||
1132 | int i, cpu = smp_processor_id(); | ||
1133 | unsigned int gasid; | ||
1134 | |||
1135 | /* | ||
1136 | * No need to reload ASID, IRQs are disabled already so there's no rush, | ||
1137 | * and we'll check if we need to regenerate below anyway before | ||
1138 | * re-entering the guest. | ||
1139 | */ | ||
1140 | kvm_trap_emul_check_requests(vcpu, cpu, false); | ||
1141 | |||
1142 | if (KVM_GUEST_KERNEL_MODE(vcpu)) { | ||
1143 | mm = kern_mm; | ||
1144 | } else { | ||
1145 | mm = user_mm; | ||
1146 | |||
1147 | /* | ||
1148 | * Lazy host ASID regeneration / PT flush for guest user mode. | ||
1149 | * If the guest ASID has changed since the last guest usermode | ||
1150 | * execution, invalidate the stale TLB entries and flush GVA PT | ||
1151 | * entries too. | ||
1152 | */ | ||
1153 | gasid = kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID; | ||
1154 | if (gasid != vcpu->arch.last_user_gasid) { | ||
1155 | kvm_mips_flush_gva_pt(user_mm->pgd, KMF_USER); | ||
1156 | for_each_possible_cpu(i) | ||
1157 | cpu_context(i, user_mm) = 0; | ||
1158 | vcpu->arch.last_user_gasid = gasid; | ||
1159 | } | ||
1160 | } | ||
1161 | |||
1162 | /* | ||
1163 | * Check if ASID is stale. This may happen due to a TLB flush request or | ||
1164 | * a lazy user MM invalidation. | ||
1165 | */ | ||
1166 | if ((cpu_context(cpu, mm) ^ asid_cache(cpu)) & | ||
1167 | asid_version_mask(cpu)) | ||
1168 | get_new_mmu_context(mm, cpu); | ||
1169 | } | ||
1170 | |||
1171 | static int kvm_trap_emul_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) | ||
1172 | { | ||
1173 | int cpu = smp_processor_id(); | ||
1174 | int r; | ||
1175 | |||
1176 | /* Check if we have any exceptions/interrupts pending */ | ||
1177 | kvm_mips_deliver_interrupts(vcpu, | ||
1178 | kvm_read_c0_guest_cause(vcpu->arch.cop0)); | ||
1179 | |||
1180 | kvm_trap_emul_vcpu_reenter(run, vcpu); | ||
1181 | |||
1182 | /* | ||
1183 | * We use user accessors to access guest memory, but we don't want to | ||
1184 | * invoke Linux page faulting. | ||
1185 | */ | ||
1186 | pagefault_disable(); | ||
1187 | |||
1188 | /* Disable hardware page table walking while in guest */ | ||
1189 | htw_stop(); | ||
1190 | |||
1191 | /* | ||
1192 | * While in guest context we're in the guest's address space, not the | ||
1193 | * host process address space, so we need to be careful not to confuse | ||
1194 | * e.g. cache management IPIs. | ||
1195 | */ | ||
1196 | kvm_mips_suspend_mm(cpu); | ||
1197 | |||
1198 | r = vcpu->arch.vcpu_run(run, vcpu); | ||
1199 | |||
1200 | /* We may have migrated while handling guest exits */ | ||
1201 | cpu = smp_processor_id(); | ||
1202 | |||
1203 | /* Restore normal Linux process memory map */ | ||
1204 | if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) & | ||
1205 | asid_version_mask(cpu))) | ||
1206 | get_new_mmu_context(current->mm, cpu); | ||
1207 | write_c0_entryhi(cpu_asid(cpu, current->mm)); | ||
1208 | TLBMISS_HANDLER_SETUP_PGD(current->mm->pgd); | ||
1209 | kvm_mips_resume_mm(cpu); | ||
1210 | |||
1211 | htw_start(); | ||
1212 | |||
1213 | pagefault_enable(); | ||
1214 | |||
1215 | return r; | ||
1216 | } | ||
1217 | |||
648 | static struct kvm_mips_callbacks kvm_trap_emul_callbacks = { | 1218 | static struct kvm_mips_callbacks kvm_trap_emul_callbacks = { |
649 | /* exit handlers */ | 1219 | /* exit handlers */ |
650 | .handle_cop_unusable = kvm_trap_emul_handle_cop_unusable, | 1220 | .handle_cop_unusable = kvm_trap_emul_handle_cop_unusable, |
@@ -661,9 +1231,11 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = { | |||
661 | .handle_fpe = kvm_trap_emul_handle_fpe, | 1231 | .handle_fpe = kvm_trap_emul_handle_fpe, |
662 | .handle_msa_disabled = kvm_trap_emul_handle_msa_disabled, | 1232 | .handle_msa_disabled = kvm_trap_emul_handle_msa_disabled, |
663 | 1233 | ||
664 | .vm_init = kvm_trap_emul_vm_init, | ||
665 | .vcpu_init = kvm_trap_emul_vcpu_init, | 1234 | .vcpu_init = kvm_trap_emul_vcpu_init, |
1235 | .vcpu_uninit = kvm_trap_emul_vcpu_uninit, | ||
666 | .vcpu_setup = kvm_trap_emul_vcpu_setup, | 1236 | .vcpu_setup = kvm_trap_emul_vcpu_setup, |
1237 | .flush_shadow_all = kvm_trap_emul_flush_shadow_all, | ||
1238 | .flush_shadow_memslot = kvm_trap_emul_flush_shadow_memslot, | ||
667 | .gva_to_gpa = kvm_trap_emul_gva_to_gpa_cb, | 1239 | .gva_to_gpa = kvm_trap_emul_gva_to_gpa_cb, |
668 | .queue_timer_int = kvm_mips_queue_timer_int_cb, | 1240 | .queue_timer_int = kvm_mips_queue_timer_int_cb, |
669 | .dequeue_timer_int = kvm_mips_dequeue_timer_int_cb, | 1241 | .dequeue_timer_int = kvm_mips_dequeue_timer_int_cb, |
@@ -675,8 +1247,10 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = { | |||
675 | .copy_reg_indices = kvm_trap_emul_copy_reg_indices, | 1247 | .copy_reg_indices = kvm_trap_emul_copy_reg_indices, |
676 | .get_one_reg = kvm_trap_emul_get_one_reg, | 1248 | .get_one_reg = kvm_trap_emul_get_one_reg, |
677 | .set_one_reg = kvm_trap_emul_set_one_reg, | 1249 | .set_one_reg = kvm_trap_emul_set_one_reg, |
678 | .vcpu_get_regs = kvm_trap_emul_vcpu_get_regs, | 1250 | .vcpu_load = kvm_trap_emul_vcpu_load, |
679 | .vcpu_set_regs = kvm_trap_emul_vcpu_set_regs, | 1251 | .vcpu_put = kvm_trap_emul_vcpu_put, |
1252 | .vcpu_run = kvm_trap_emul_vcpu_run, | ||
1253 | .vcpu_reenter = kvm_trap_emul_vcpu_reenter, | ||
680 | }; | 1254 | }; |
681 | 1255 | ||
682 | int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks) | 1256 | int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks) |
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 0db010cc4e65..d9b48f5bb606 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h | |||
@@ -22,6 +22,10 @@ | |||
22 | 22 | ||
23 | #include <asm/book3s/64/mmu-hash.h> | 23 | #include <asm/book3s/64/mmu-hash.h> |
24 | 24 | ||
25 | /* Power architecture requires HPT is at least 256kiB, at most 64TiB */ | ||
26 | #define PPC_MIN_HPT_ORDER 18 | ||
27 | #define PPC_MAX_HPT_ORDER 46 | ||
28 | |||
25 | #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE | 29 | #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE |
26 | static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu) | 30 | static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu) |
27 | { | 31 | { |
@@ -356,6 +360,18 @@ extern void kvmppc_mmu_debugfs_init(struct kvm *kvm); | |||
356 | 360 | ||
357 | extern void kvmhv_rm_send_ipi(int cpu); | 361 | extern void kvmhv_rm_send_ipi(int cpu); |
358 | 362 | ||
363 | static inline unsigned long kvmppc_hpt_npte(struct kvm_hpt_info *hpt) | ||
364 | { | ||
365 | /* HPTEs are 2**4 bytes long */ | ||
366 | return 1UL << (hpt->order - 4); | ||
367 | } | ||
368 | |||
369 | static inline unsigned long kvmppc_hpt_mask(struct kvm_hpt_info *hpt) | ||
370 | { | ||
371 | /* 128 (2**7) bytes in each HPTEG */ | ||
372 | return (1UL << (hpt->order - 7)) - 1; | ||
373 | } | ||
374 | |||
359 | #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ | 375 | #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ |
360 | 376 | ||
361 | #endif /* __ASM_KVM_BOOK3S_64_H__ */ | 377 | #endif /* __ASM_KVM_BOOK3S_64_H__ */ |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index b2dbeac3f450..7bba8f415627 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -241,12 +241,24 @@ struct kvm_arch_memory_slot { | |||
241 | #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ | 241 | #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ |
242 | }; | 242 | }; |
243 | 243 | ||
244 | struct kvm_hpt_info { | ||
245 | /* Host virtual (linear mapping) address of guest HPT */ | ||
246 | unsigned long virt; | ||
247 | /* Array of reverse mapping entries for each guest HPTE */ | ||
248 | struct revmap_entry *rev; | ||
249 | /* Guest HPT size is 2**(order) bytes */ | ||
250 | u32 order; | ||
251 | /* 1 if HPT allocated with CMA, 0 otherwise */ | ||
252 | int cma; | ||
253 | }; | ||
254 | |||
255 | struct kvm_resize_hpt; | ||
256 | |||
244 | struct kvm_arch { | 257 | struct kvm_arch { |
245 | unsigned int lpid; | 258 | unsigned int lpid; |
246 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | 259 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE |
247 | unsigned int tlb_sets; | 260 | unsigned int tlb_sets; |
248 | unsigned long hpt_virt; | 261 | struct kvm_hpt_info hpt; |
249 | struct revmap_entry *revmap; | ||
250 | atomic64_t mmio_update; | 262 | atomic64_t mmio_update; |
251 | unsigned int host_lpid; | 263 | unsigned int host_lpid; |
252 | unsigned long host_lpcr; | 264 | unsigned long host_lpcr; |
@@ -256,20 +268,17 @@ struct kvm_arch { | |||
256 | unsigned long lpcr; | 268 | unsigned long lpcr; |
257 | unsigned long vrma_slb_v; | 269 | unsigned long vrma_slb_v; |
258 | int hpte_setup_done; | 270 | int hpte_setup_done; |
259 | u32 hpt_order; | ||
260 | atomic_t vcpus_running; | 271 | atomic_t vcpus_running; |
261 | u32 online_vcores; | 272 | u32 online_vcores; |
262 | unsigned long hpt_npte; | ||
263 | unsigned long hpt_mask; | ||
264 | atomic_t hpte_mod_interest; | 273 | atomic_t hpte_mod_interest; |
265 | cpumask_t need_tlb_flush; | 274 | cpumask_t need_tlb_flush; |
266 | cpumask_t cpu_in_guest; | 275 | cpumask_t cpu_in_guest; |
267 | int hpt_cma_alloc; | ||
268 | u8 radix; | 276 | u8 radix; |
269 | pgd_t *pgtable; | 277 | pgd_t *pgtable; |
270 | u64 process_table; | 278 | u64 process_table; |
271 | struct dentry *debugfs_dir; | 279 | struct dentry *debugfs_dir; |
272 | struct dentry *htab_dentry; | 280 | struct dentry *htab_dentry; |
281 | struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */ | ||
273 | #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ | 282 | #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ |
274 | #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE | 283 | #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE |
275 | struct mutex hpt_mutex; | 284 | struct mutex hpt_mutex; |
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 48c760f89590..dd11c4c8c56a 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
@@ -155,9 +155,10 @@ extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); | |||
155 | extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu); | 155 | extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu); |
156 | extern void kvmppc_map_magic(struct kvm_vcpu *vcpu); | 156 | extern void kvmppc_map_magic(struct kvm_vcpu *vcpu); |
157 | 157 | ||
158 | extern long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp); | 158 | extern int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order); |
159 | extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp); | 159 | extern void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info); |
160 | extern void kvmppc_free_hpt(struct kvm *kvm); | 160 | extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order); |
161 | extern void kvmppc_free_hpt(struct kvm_hpt_info *info); | ||
161 | extern long kvmppc_prepare_vrma(struct kvm *kvm, | 162 | extern long kvmppc_prepare_vrma(struct kvm *kvm, |
162 | struct kvm_userspace_memory_region *mem); | 163 | struct kvm_userspace_memory_region *mem); |
163 | extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, | 164 | extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, |
@@ -186,8 +187,8 @@ extern long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, | |||
186 | unsigned long tce_value, unsigned long npages); | 187 | unsigned long tce_value, unsigned long npages); |
187 | extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | 188 | extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, |
188 | unsigned long ioba); | 189 | unsigned long ioba); |
189 | extern struct page *kvm_alloc_hpt(unsigned long nr_pages); | 190 | extern struct page *kvm_alloc_hpt_cma(unsigned long nr_pages); |
190 | extern void kvm_release_hpt(struct page *page, unsigned long nr_pages); | 191 | extern void kvm_free_hpt_cma(struct page *page, unsigned long nr_pages); |
191 | extern int kvmppc_core_init_vm(struct kvm *kvm); | 192 | extern int kvmppc_core_init_vm(struct kvm *kvm); |
192 | extern void kvmppc_core_destroy_vm(struct kvm *kvm); | 193 | extern void kvmppc_core_destroy_vm(struct kvm *kvm); |
193 | extern void kvmppc_core_free_memslot(struct kvm *kvm, | 194 | extern void kvmppc_core_free_memslot(struct kvm *kvm, |
@@ -214,6 +215,10 @@ extern void kvmppc_bookehv_exit(void); | |||
214 | extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu); | 215 | extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu); |
215 | 216 | ||
216 | extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *); | 217 | extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *); |
218 | extern long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm, | ||
219 | struct kvm_ppc_resize_hpt *rhpt); | ||
220 | extern long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm, | ||
221 | struct kvm_ppc_resize_hpt *rhpt); | ||
217 | 222 | ||
218 | int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); | 223 | int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); |
219 | 224 | ||
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index cc0908b6c2a0..4edbe4bb0e8b 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h | |||
@@ -633,5 +633,7 @@ struct kvm_ppc_rmmu_info { | |||
633 | #define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40) | 633 | #define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40) |
634 | #define KVM_XICS_MASKED (1ULL << 41) | 634 | #define KVM_XICS_MASKED (1ULL << 41) |
635 | #define KVM_XICS_PENDING (1ULL << 42) | 635 | #define KVM_XICS_PENDING (1ULL << 42) |
636 | #define KVM_XICS_PRESENTED (1ULL << 43) | ||
637 | #define KVM_XICS_QUEUED (1ULL << 44) | ||
636 | 638 | ||
637 | #endif /* __LINUX_KVM_POWERPC_H */ | 639 | #endif /* __LINUX_KVM_POWERPC_H */ |
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index a2eb6d354a57..1992676c7a94 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c | |||
@@ -224,7 +224,8 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr, | |||
224 | ptem = kvmppc_mmu_book3s_32_get_ptem(sre, eaddr, primary); | 224 | ptem = kvmppc_mmu_book3s_32_get_ptem(sre, eaddr, primary); |
225 | 225 | ||
226 | if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) { | 226 | if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) { |
227 | printk(KERN_ERR "KVM: Can't copy data from 0x%lx!\n", ptegp); | 227 | printk_ratelimited(KERN_ERR |
228 | "KVM: Can't copy data from 0x%lx!\n", ptegp); | ||
228 | goto no_page_found; | 229 | goto no_page_found; |
229 | } | 230 | } |
230 | 231 | ||
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index b9131aa1aedf..70153578131a 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c | |||
@@ -265,7 +265,8 @@ do_second: | |||
265 | goto no_page_found; | 265 | goto no_page_found; |
266 | 266 | ||
267 | if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) { | 267 | if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) { |
268 | printk(KERN_ERR "KVM can't copy data from 0x%lx!\n", ptegp); | 268 | printk_ratelimited(KERN_ERR |
269 | "KVM: Can't copy data from 0x%lx!\n", ptegp); | ||
269 | goto no_page_found; | 270 | goto no_page_found; |
270 | } | 271 | } |
271 | 272 | ||
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 9df3d940acec..f3158fb16de3 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
@@ -40,84 +40,101 @@ | |||
40 | 40 | ||
41 | #include "trace_hv.h" | 41 | #include "trace_hv.h" |
42 | 42 | ||
43 | /* Power architecture requires HPT is at least 256kB */ | 43 | //#define DEBUG_RESIZE_HPT 1 |
44 | #define PPC_MIN_HPT_ORDER 18 | 44 | |
45 | #ifdef DEBUG_RESIZE_HPT | ||
46 | #define resize_hpt_debug(resize, ...) \ | ||
47 | do { \ | ||
48 | printk(KERN_DEBUG "RESIZE HPT %p: ", resize); \ | ||
49 | printk(__VA_ARGS__); \ | ||
50 | } while (0) | ||
51 | #else | ||
52 | #define resize_hpt_debug(resize, ...) \ | ||
53 | do { } while (0) | ||
54 | #endif | ||
45 | 55 | ||
46 | static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, | 56 | static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, |
47 | long pte_index, unsigned long pteh, | 57 | long pte_index, unsigned long pteh, |
48 | unsigned long ptel, unsigned long *pte_idx_ret); | 58 | unsigned long ptel, unsigned long *pte_idx_ret); |
59 | |||
60 | struct kvm_resize_hpt { | ||
61 | /* These fields read-only after init */ | ||
62 | struct kvm *kvm; | ||
63 | struct work_struct work; | ||
64 | u32 order; | ||
65 | |||
66 | /* These fields protected by kvm->lock */ | ||
67 | int error; | ||
68 | bool prepare_done; | ||
69 | |||
70 | /* Private to the work thread, until prepare_done is true, | ||
71 | * then protected by kvm->resize_hpt_sem */ | ||
72 | struct kvm_hpt_info hpt; | ||
73 | }; | ||
74 | |||
49 | static void kvmppc_rmap_reset(struct kvm *kvm); | 75 | static void kvmppc_rmap_reset(struct kvm *kvm); |
50 | 76 | ||
51 | long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) | 77 | int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order) |
52 | { | 78 | { |
53 | unsigned long hpt = 0; | 79 | unsigned long hpt = 0; |
54 | struct revmap_entry *rev; | 80 | int cma = 0; |
55 | struct page *page = NULL; | 81 | struct page *page = NULL; |
56 | long order = KVM_DEFAULT_HPT_ORDER; | 82 | struct revmap_entry *rev; |
83 | unsigned long npte; | ||
57 | 84 | ||
58 | if (htab_orderp) { | 85 | if ((order < PPC_MIN_HPT_ORDER) || (order > PPC_MAX_HPT_ORDER)) |
59 | order = *htab_orderp; | 86 | return -EINVAL; |
60 | if (order < PPC_MIN_HPT_ORDER) | ||
61 | order = PPC_MIN_HPT_ORDER; | ||
62 | } | ||
63 | 87 | ||
64 | kvm->arch.hpt_cma_alloc = 0; | 88 | page = kvm_alloc_hpt_cma(1ul << (order - PAGE_SHIFT)); |
65 | page = kvm_alloc_hpt(1ul << (order - PAGE_SHIFT)); | ||
66 | if (page) { | 89 | if (page) { |
67 | hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); | 90 | hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); |
68 | memset((void *)hpt, 0, (1ul << order)); | 91 | memset((void *)hpt, 0, (1ul << order)); |
69 | kvm->arch.hpt_cma_alloc = 1; | 92 | cma = 1; |
70 | } | 93 | } |
71 | 94 | ||
72 | /* Lastly try successively smaller sizes from the page allocator */ | 95 | if (!hpt) |
73 | /* Only do this if userspace didn't specify a size via ioctl */ | 96 | hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT |
74 | while (!hpt && order > PPC_MIN_HPT_ORDER && !htab_orderp) { | 97 | |__GFP_NOWARN, order - PAGE_SHIFT); |
75 | hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT| | ||
76 | __GFP_NOWARN, order - PAGE_SHIFT); | ||
77 | if (!hpt) | ||
78 | --order; | ||
79 | } | ||
80 | 98 | ||
81 | if (!hpt) | 99 | if (!hpt) |
82 | return -ENOMEM; | 100 | return -ENOMEM; |
83 | 101 | ||
84 | kvm->arch.hpt_virt = hpt; | ||
85 | kvm->arch.hpt_order = order; | ||
86 | /* HPTEs are 2**4 bytes long */ | 102 | /* HPTEs are 2**4 bytes long */ |
87 | kvm->arch.hpt_npte = 1ul << (order - 4); | 103 | npte = 1ul << (order - 4); |
88 | /* 128 (2**7) bytes in each HPTEG */ | ||
89 | kvm->arch.hpt_mask = (1ul << (order - 7)) - 1; | ||
90 | |||
91 | atomic64_set(&kvm->arch.mmio_update, 0); | ||
92 | 104 | ||
93 | /* Allocate reverse map array */ | 105 | /* Allocate reverse map array */ |
94 | rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte); | 106 | rev = vmalloc(sizeof(struct revmap_entry) * npte); |
95 | if (!rev) { | 107 | if (!rev) { |
96 | pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n"); | 108 | pr_err("kvmppc_allocate_hpt: Couldn't alloc reverse map array\n"); |
97 | goto out_freehpt; | 109 | if (cma) |
110 | kvm_free_hpt_cma(page, 1 << (order - PAGE_SHIFT)); | ||
111 | else | ||
112 | free_pages(hpt, order - PAGE_SHIFT); | ||
113 | return -ENOMEM; | ||
98 | } | 114 | } |
99 | kvm->arch.revmap = rev; | ||
100 | kvm->arch.sdr1 = __pa(hpt) | (order - 18); | ||
101 | 115 | ||
102 | pr_info("KVM guest htab at %lx (order %ld), LPID %x\n", | 116 | info->order = order; |
103 | hpt, order, kvm->arch.lpid); | 117 | info->virt = hpt; |
118 | info->cma = cma; | ||
119 | info->rev = rev; | ||
104 | 120 | ||
105 | if (htab_orderp) | ||
106 | *htab_orderp = order; | ||
107 | return 0; | 121 | return 0; |
122 | } | ||
108 | 123 | ||
109 | out_freehpt: | 124 | void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info) |
110 | if (kvm->arch.hpt_cma_alloc) | 125 | { |
111 | kvm_release_hpt(page, 1 << (order - PAGE_SHIFT)); | 126 | atomic64_set(&kvm->arch.mmio_update, 0); |
112 | else | 127 | kvm->arch.hpt = *info; |
113 | free_pages(hpt, order - PAGE_SHIFT); | 128 | kvm->arch.sdr1 = __pa(info->virt) | (info->order - 18); |
114 | return -ENOMEM; | 129 | |
130 | pr_debug("KVM guest htab at %lx (order %ld), LPID %x\n", | ||
131 | info->virt, (long)info->order, kvm->arch.lpid); | ||
115 | } | 132 | } |
116 | 133 | ||
117 | long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp) | 134 | long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order) |
118 | { | 135 | { |
119 | long err = -EBUSY; | 136 | long err = -EBUSY; |
120 | long order; | 137 | struct kvm_hpt_info info; |
121 | 138 | ||
122 | if (kvm_is_radix(kvm)) | 139 | if (kvm_is_radix(kvm)) |
123 | return -EINVAL; | 140 | return -EINVAL; |
@@ -132,36 +149,44 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp) | |||
132 | goto out; | 149 | goto out; |
133 | } | 150 | } |
134 | } | 151 | } |
135 | if (kvm->arch.hpt_virt) { | 152 | if (kvm->arch.hpt.order == order) { |
136 | order = kvm->arch.hpt_order; | 153 | /* We already have a suitable HPT */ |
154 | |||
137 | /* Set the entire HPT to 0, i.e. invalid HPTEs */ | 155 | /* Set the entire HPT to 0, i.e. invalid HPTEs */ |
138 | memset((void *)kvm->arch.hpt_virt, 0, 1ul << order); | 156 | memset((void *)kvm->arch.hpt.virt, 0, 1ul << order); |
139 | /* | 157 | /* |
140 | * Reset all the reverse-mapping chains for all memslots | 158 | * Reset all the reverse-mapping chains for all memslots |
141 | */ | 159 | */ |
142 | kvmppc_rmap_reset(kvm); | 160 | kvmppc_rmap_reset(kvm); |
143 | /* Ensure that each vcpu will flush its TLB on next entry. */ | 161 | /* Ensure that each vcpu will flush its TLB on next entry. */ |
144 | cpumask_setall(&kvm->arch.need_tlb_flush); | 162 | cpumask_setall(&kvm->arch.need_tlb_flush); |
145 | *htab_orderp = order; | ||
146 | err = 0; | 163 | err = 0; |
147 | } else { | 164 | goto out; |
148 | err = kvmppc_alloc_hpt(kvm, htab_orderp); | ||
149 | order = *htab_orderp; | ||
150 | } | 165 | } |
151 | out: | 166 | |
167 | if (kvm->arch.hpt.virt) | ||
168 | kvmppc_free_hpt(&kvm->arch.hpt); | ||
169 | |||
170 | err = kvmppc_allocate_hpt(&info, order); | ||
171 | if (err < 0) | ||
172 | goto out; | ||
173 | kvmppc_set_hpt(kvm, &info); | ||
174 | |||
175 | out: | ||
152 | mutex_unlock(&kvm->lock); | 176 | mutex_unlock(&kvm->lock); |
153 | return err; | 177 | return err; |
154 | } | 178 | } |
155 | 179 | ||
156 | void kvmppc_free_hpt(struct kvm *kvm) | 180 | void kvmppc_free_hpt(struct kvm_hpt_info *info) |
157 | { | 181 | { |
158 | vfree(kvm->arch.revmap); | 182 | vfree(info->rev); |
159 | if (kvm->arch.hpt_cma_alloc) | 183 | if (info->cma) |
160 | kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt), | 184 | kvm_free_hpt_cma(virt_to_page(info->virt), |
161 | 1 << (kvm->arch.hpt_order - PAGE_SHIFT)); | 185 | 1 << (info->order - PAGE_SHIFT)); |
162 | else if (kvm->arch.hpt_virt) | 186 | else if (info->virt) |
163 | free_pages(kvm->arch.hpt_virt, | 187 | free_pages(info->virt, info->order - PAGE_SHIFT); |
164 | kvm->arch.hpt_order - PAGE_SHIFT); | 188 | info->virt = 0; |
189 | info->order = 0; | ||
165 | } | 190 | } |
166 | 191 | ||
167 | /* Bits in first HPTE dword for pagesize 4k, 64k or 16M */ | 192 | /* Bits in first HPTE dword for pagesize 4k, 64k or 16M */ |
@@ -196,8 +221,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, | |||
196 | if (npages > 1ul << (40 - porder)) | 221 | if (npages > 1ul << (40 - porder)) |
197 | npages = 1ul << (40 - porder); | 222 | npages = 1ul << (40 - porder); |
198 | /* Can't use more than 1 HPTE per HPTEG */ | 223 | /* Can't use more than 1 HPTE per HPTEG */ |
199 | if (npages > kvm->arch.hpt_mask + 1) | 224 | if (npages > kvmppc_hpt_mask(&kvm->arch.hpt) + 1) |
200 | npages = kvm->arch.hpt_mask + 1; | 225 | npages = kvmppc_hpt_mask(&kvm->arch.hpt) + 1; |
201 | 226 | ||
202 | hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | | 227 | hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | |
203 | HPTE_V_BOLTED | hpte0_pgsize_encoding(psize); | 228 | HPTE_V_BOLTED | hpte0_pgsize_encoding(psize); |
@@ -207,7 +232,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, | |||
207 | for (i = 0; i < npages; ++i) { | 232 | for (i = 0; i < npages; ++i) { |
208 | addr = i << porder; | 233 | addr = i << porder; |
209 | /* can't use hpt_hash since va > 64 bits */ | 234 | /* can't use hpt_hash since va > 64 bits */ |
210 | hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & kvm->arch.hpt_mask; | 235 | hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) |
236 | & kvmppc_hpt_mask(&kvm->arch.hpt); | ||
211 | /* | 237 | /* |
212 | * We assume that the hash table is empty and no | 238 | * We assume that the hash table is empty and no |
213 | * vcpus are using it at this stage. Since we create | 239 | * vcpus are using it at this stage. Since we create |
@@ -340,11 +366,11 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, | |||
340 | preempt_enable(); | 366 | preempt_enable(); |
341 | return -ENOENT; | 367 | return -ENOENT; |
342 | } | 368 | } |
343 | hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); | 369 | hptep = (__be64 *)(kvm->arch.hpt.virt + (index << 4)); |
344 | v = orig_v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK; | 370 | v = orig_v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK; |
345 | if (cpu_has_feature(CPU_FTR_ARCH_300)) | 371 | if (cpu_has_feature(CPU_FTR_ARCH_300)) |
346 | v = hpte_new_to_old_v(v, be64_to_cpu(hptep[1])); | 372 | v = hpte_new_to_old_v(v, be64_to_cpu(hptep[1])); |
347 | gr = kvm->arch.revmap[index].guest_rpte; | 373 | gr = kvm->arch.hpt.rev[index].guest_rpte; |
348 | 374 | ||
349 | unlock_hpte(hptep, orig_v); | 375 | unlock_hpte(hptep, orig_v); |
350 | preempt_enable(); | 376 | preempt_enable(); |
@@ -485,8 +511,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
485 | } | 511 | } |
486 | } | 512 | } |
487 | index = vcpu->arch.pgfault_index; | 513 | index = vcpu->arch.pgfault_index; |
488 | hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); | 514 | hptep = (__be64 *)(kvm->arch.hpt.virt + (index << 4)); |
489 | rev = &kvm->arch.revmap[index]; | 515 | rev = &kvm->arch.hpt.rev[index]; |
490 | preempt_disable(); | 516 | preempt_disable(); |
491 | while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) | 517 | while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) |
492 | cpu_relax(); | 518 | cpu_relax(); |
@@ -745,13 +771,53 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
745 | return kvm_handle_hva_range(kvm, hva, hva + 1, handler); | 771 | return kvm_handle_hva_range(kvm, hva, hva + 1, handler); |
746 | } | 772 | } |
747 | 773 | ||
774 | /* Must be called with both HPTE and rmap locked */ | ||
775 | static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i, | ||
776 | unsigned long *rmapp, unsigned long gfn) | ||
777 | { | ||
778 | __be64 *hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4)); | ||
779 | struct revmap_entry *rev = kvm->arch.hpt.rev; | ||
780 | unsigned long j, h; | ||
781 | unsigned long ptel, psize, rcbits; | ||
782 | |||
783 | j = rev[i].forw; | ||
784 | if (j == i) { | ||
785 | /* chain is now empty */ | ||
786 | *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX); | ||
787 | } else { | ||
788 | /* remove i from chain */ | ||
789 | h = rev[i].back; | ||
790 | rev[h].forw = j; | ||
791 | rev[j].back = h; | ||
792 | rev[i].forw = rev[i].back = i; | ||
793 | *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j; | ||
794 | } | ||
795 | |||
796 | /* Now check and modify the HPTE */ | ||
797 | ptel = rev[i].guest_rpte; | ||
798 | psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel); | ||
799 | if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) && | ||
800 | hpte_rpn(ptel, psize) == gfn) { | ||
801 | hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); | ||
802 | kvmppc_invalidate_hpte(kvm, hptep, i); | ||
803 | hptep[1] &= ~cpu_to_be64(HPTE_R_KEY_HI | HPTE_R_KEY_LO); | ||
804 | /* Harvest R and C */ | ||
805 | rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); | ||
806 | *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; | ||
807 | if (rcbits & HPTE_R_C) | ||
808 | kvmppc_update_rmap_change(rmapp, psize); | ||
809 | if (rcbits & ~rev[i].guest_rpte) { | ||
810 | rev[i].guest_rpte = ptel | rcbits; | ||
811 | note_hpte_modification(kvm, &rev[i]); | ||
812 | } | ||
813 | } | ||
814 | } | ||
815 | |||
748 | static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, | 816 | static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, |
749 | unsigned long gfn) | 817 | unsigned long gfn) |
750 | { | 818 | { |
751 | struct revmap_entry *rev = kvm->arch.revmap; | 819 | unsigned long i; |
752 | unsigned long h, i, j; | ||
753 | __be64 *hptep; | 820 | __be64 *hptep; |
754 | unsigned long ptel, psize, rcbits; | ||
755 | unsigned long *rmapp; | 821 | unsigned long *rmapp; |
756 | 822 | ||
757 | rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; | 823 | rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; |
@@ -768,7 +834,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, | |||
768 | * rmap chain lock. | 834 | * rmap chain lock. |
769 | */ | 835 | */ |
770 | i = *rmapp & KVMPPC_RMAP_INDEX; | 836 | i = *rmapp & KVMPPC_RMAP_INDEX; |
771 | hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4)); | 837 | hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4)); |
772 | if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { | 838 | if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { |
773 | /* unlock rmap before spinning on the HPTE lock */ | 839 | /* unlock rmap before spinning on the HPTE lock */ |
774 | unlock_rmap(rmapp); | 840 | unlock_rmap(rmapp); |
@@ -776,37 +842,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, | |||
776 | cpu_relax(); | 842 | cpu_relax(); |
777 | continue; | 843 | continue; |
778 | } | 844 | } |
779 | j = rev[i].forw; | ||
780 | if (j == i) { | ||
781 | /* chain is now empty */ | ||
782 | *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX); | ||
783 | } else { | ||
784 | /* remove i from chain */ | ||
785 | h = rev[i].back; | ||
786 | rev[h].forw = j; | ||
787 | rev[j].back = h; | ||
788 | rev[i].forw = rev[i].back = i; | ||
789 | *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j; | ||
790 | } | ||
791 | 845 | ||
792 | /* Now check and modify the HPTE */ | 846 | kvmppc_unmap_hpte(kvm, i, rmapp, gfn); |
793 | ptel = rev[i].guest_rpte; | ||
794 | psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel); | ||
795 | if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) && | ||
796 | hpte_rpn(ptel, psize) == gfn) { | ||
797 | hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); | ||
798 | kvmppc_invalidate_hpte(kvm, hptep, i); | ||
799 | hptep[1] &= ~cpu_to_be64(HPTE_R_KEY_HI | HPTE_R_KEY_LO); | ||
800 | /* Harvest R and C */ | ||
801 | rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); | ||
802 | *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; | ||
803 | if (rcbits & HPTE_R_C) | ||
804 | kvmppc_update_rmap_change(rmapp, psize); | ||
805 | if (rcbits & ~rev[i].guest_rpte) { | ||
806 | rev[i].guest_rpte = ptel | rcbits; | ||
807 | note_hpte_modification(kvm, &rev[i]); | ||
808 | } | ||
809 | } | ||
810 | unlock_rmap(rmapp); | 847 | unlock_rmap(rmapp); |
811 | __unlock_hpte(hptep, be64_to_cpu(hptep[0])); | 848 | __unlock_hpte(hptep, be64_to_cpu(hptep[0])); |
812 | } | 849 | } |
@@ -860,7 +897,7 @@ void kvmppc_core_flush_memslot_hv(struct kvm *kvm, | |||
860 | static int kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, | 897 | static int kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, |
861 | unsigned long gfn) | 898 | unsigned long gfn) |
862 | { | 899 | { |
863 | struct revmap_entry *rev = kvm->arch.revmap; | 900 | struct revmap_entry *rev = kvm->arch.hpt.rev; |
864 | unsigned long head, i, j; | 901 | unsigned long head, i, j; |
865 | __be64 *hptep; | 902 | __be64 *hptep; |
866 | int ret = 0; | 903 | int ret = 0; |
@@ -880,7 +917,7 @@ static int kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, | |||
880 | 917 | ||
881 | i = head = *rmapp & KVMPPC_RMAP_INDEX; | 918 | i = head = *rmapp & KVMPPC_RMAP_INDEX; |
882 | do { | 919 | do { |
883 | hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4)); | 920 | hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4)); |
884 | j = rev[i].forw; | 921 | j = rev[i].forw; |
885 | 922 | ||
886 | /* If this HPTE isn't referenced, ignore it */ | 923 | /* If this HPTE isn't referenced, ignore it */ |
@@ -923,7 +960,7 @@ int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end) | |||
923 | static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, | 960 | static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, |
924 | unsigned long gfn) | 961 | unsigned long gfn) |
925 | { | 962 | { |
926 | struct revmap_entry *rev = kvm->arch.revmap; | 963 | struct revmap_entry *rev = kvm->arch.hpt.rev; |
927 | unsigned long head, i, j; | 964 | unsigned long head, i, j; |
928 | unsigned long *hp; | 965 | unsigned long *hp; |
929 | int ret = 1; | 966 | int ret = 1; |
@@ -940,7 +977,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, | |||
940 | if (*rmapp & KVMPPC_RMAP_PRESENT) { | 977 | if (*rmapp & KVMPPC_RMAP_PRESENT) { |
941 | i = head = *rmapp & KVMPPC_RMAP_INDEX; | 978 | i = head = *rmapp & KVMPPC_RMAP_INDEX; |
942 | do { | 979 | do { |
943 | hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4)); | 980 | hp = (unsigned long *)(kvm->arch.hpt.virt + (i << 4)); |
944 | j = rev[i].forw; | 981 | j = rev[i].forw; |
945 | if (be64_to_cpu(hp[1]) & HPTE_R_R) | 982 | if (be64_to_cpu(hp[1]) & HPTE_R_R) |
946 | goto out; | 983 | goto out; |
@@ -980,7 +1017,7 @@ static int vcpus_running(struct kvm *kvm) | |||
980 | */ | 1017 | */ |
981 | static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) | 1018 | static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) |
982 | { | 1019 | { |
983 | struct revmap_entry *rev = kvm->arch.revmap; | 1020 | struct revmap_entry *rev = kvm->arch.hpt.rev; |
984 | unsigned long head, i, j; | 1021 | unsigned long head, i, j; |
985 | unsigned long n; | 1022 | unsigned long n; |
986 | unsigned long v, r; | 1023 | unsigned long v, r; |
@@ -1005,7 +1042,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) | |||
1005 | i = head = *rmapp & KVMPPC_RMAP_INDEX; | 1042 | i = head = *rmapp & KVMPPC_RMAP_INDEX; |
1006 | do { | 1043 | do { |
1007 | unsigned long hptep1; | 1044 | unsigned long hptep1; |
1008 | hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4)); | 1045 | hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4)); |
1009 | j = rev[i].forw; | 1046 | j = rev[i].forw; |
1010 | 1047 | ||
1011 | /* | 1048 | /* |
@@ -1172,6 +1209,363 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa, | |||
1172 | } | 1209 | } |
1173 | 1210 | ||
1174 | /* | 1211 | /* |
1212 | * HPT resizing | ||
1213 | */ | ||
1214 | static int resize_hpt_allocate(struct kvm_resize_hpt *resize) | ||
1215 | { | ||
1216 | int rc; | ||
1217 | |||
1218 | rc = kvmppc_allocate_hpt(&resize->hpt, resize->order); | ||
1219 | if (rc < 0) | ||
1220 | return rc; | ||
1221 | |||
1222 | resize_hpt_debug(resize, "resize_hpt_allocate(): HPT @ 0x%lx\n", | ||
1223 | resize->hpt.virt); | ||
1224 | |||
1225 | return 0; | ||
1226 | } | ||
1227 | |||
1228 | static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize, | ||
1229 | unsigned long idx) | ||
1230 | { | ||
1231 | struct kvm *kvm = resize->kvm; | ||
1232 | struct kvm_hpt_info *old = &kvm->arch.hpt; | ||
1233 | struct kvm_hpt_info *new = &resize->hpt; | ||
1234 | unsigned long old_hash_mask = (1ULL << (old->order - 7)) - 1; | ||
1235 | unsigned long new_hash_mask = (1ULL << (new->order - 7)) - 1; | ||
1236 | __be64 *hptep, *new_hptep; | ||
1237 | unsigned long vpte, rpte, guest_rpte; | ||
1238 | int ret; | ||
1239 | struct revmap_entry *rev; | ||
1240 | unsigned long apsize, psize, avpn, pteg, hash; | ||
1241 | unsigned long new_idx, new_pteg, replace_vpte; | ||
1242 | |||
1243 | hptep = (__be64 *)(old->virt + (idx << 4)); | ||
1244 | |||
1245 | /* Guest is stopped, so new HPTEs can't be added or faulted | ||
1246 | * in, only unmapped or altered by host actions. So, it's | ||
1247 | * safe to check this before we take the HPTE lock */ | ||
1248 | vpte = be64_to_cpu(hptep[0]); | ||
1249 | if (!(vpte & HPTE_V_VALID) && !(vpte & HPTE_V_ABSENT)) | ||
1250 | return 0; /* nothing to do */ | ||
1251 | |||
1252 | while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) | ||
1253 | cpu_relax(); | ||
1254 | |||
1255 | vpte = be64_to_cpu(hptep[0]); | ||
1256 | |||
1257 | ret = 0; | ||
1258 | if (!(vpte & HPTE_V_VALID) && !(vpte & HPTE_V_ABSENT)) | ||
1259 | /* Nothing to do */ | ||
1260 | goto out; | ||
1261 | |||
1262 | /* Unmap */ | ||
1263 | rev = &old->rev[idx]; | ||
1264 | guest_rpte = rev->guest_rpte; | ||
1265 | |||
1266 | ret = -EIO; | ||
1267 | apsize = hpte_page_size(vpte, guest_rpte); | ||
1268 | if (!apsize) | ||
1269 | goto out; | ||
1270 | |||
1271 | if (vpte & HPTE_V_VALID) { | ||
1272 | unsigned long gfn = hpte_rpn(guest_rpte, apsize); | ||
1273 | int srcu_idx = srcu_read_lock(&kvm->srcu); | ||
1274 | struct kvm_memory_slot *memslot = | ||
1275 | __gfn_to_memslot(kvm_memslots(kvm), gfn); | ||
1276 | |||
1277 | if (memslot) { | ||
1278 | unsigned long *rmapp; | ||
1279 | rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; | ||
1280 | |||
1281 | lock_rmap(rmapp); | ||
1282 | kvmppc_unmap_hpte(kvm, idx, rmapp, gfn); | ||
1283 | unlock_rmap(rmapp); | ||
1284 | } | ||
1285 | |||
1286 | srcu_read_unlock(&kvm->srcu, srcu_idx); | ||
1287 | } | ||
1288 | |||
1289 | /* Reload PTE after unmap */ | ||
1290 | vpte = be64_to_cpu(hptep[0]); | ||
1291 | |||
1292 | BUG_ON(vpte & HPTE_V_VALID); | ||
1293 | BUG_ON(!(vpte & HPTE_V_ABSENT)); | ||
1294 | |||
1295 | ret = 0; | ||
1296 | if (!(vpte & HPTE_V_BOLTED)) | ||
1297 | goto out; | ||
1298 | |||
1299 | rpte = be64_to_cpu(hptep[1]); | ||
1300 | psize = hpte_base_page_size(vpte, rpte); | ||
1301 | avpn = HPTE_V_AVPN_VAL(vpte) & ~((psize - 1) >> 23); | ||
1302 | pteg = idx / HPTES_PER_GROUP; | ||
1303 | if (vpte & HPTE_V_SECONDARY) | ||
1304 | pteg = ~pteg; | ||
1305 | |||
1306 | if (!(vpte & HPTE_V_1TB_SEG)) { | ||
1307 | unsigned long offset, vsid; | ||
1308 | |||
1309 | /* We only have 28 - 23 bits of offset in avpn */ | ||
1310 | offset = (avpn & 0x1f) << 23; | ||
1311 | vsid = avpn >> 5; | ||
1312 | /* We can find more bits from the pteg value */ | ||
1313 | if (psize < (1ULL << 23)) | ||
1314 | offset |= ((vsid ^ pteg) & old_hash_mask) * psize; | ||
1315 | |||
1316 | hash = vsid ^ (offset / psize); | ||
1317 | } else { | ||
1318 | unsigned long offset, vsid; | ||
1319 | |||
1320 | /* We only have 40 - 23 bits of seg_off in avpn */ | ||
1321 | offset = (avpn & 0x1ffff) << 23; | ||
1322 | vsid = avpn >> 17; | ||
1323 | if (psize < (1ULL << 23)) | ||
1324 | offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) * psize; | ||
1325 | |||
1326 | hash = vsid ^ (vsid << 25) ^ (offset / psize); | ||
1327 | } | ||
1328 | |||
1329 | new_pteg = hash & new_hash_mask; | ||
1330 | if (vpte & HPTE_V_SECONDARY) { | ||
1331 | BUG_ON(~pteg != (hash & old_hash_mask)); | ||
1332 | new_pteg = ~new_pteg; | ||
1333 | } else { | ||
1334 | BUG_ON(pteg != (hash & old_hash_mask)); | ||
1335 | } | ||
1336 | |||
1337 | new_idx = new_pteg * HPTES_PER_GROUP + (idx % HPTES_PER_GROUP); | ||
1338 | new_hptep = (__be64 *)(new->virt + (new_idx << 4)); | ||
1339 | |||
1340 | replace_vpte = be64_to_cpu(new_hptep[0]); | ||
1341 | |||
1342 | if (replace_vpte & (HPTE_V_VALID | HPTE_V_ABSENT)) { | ||
1343 | BUG_ON(new->order >= old->order); | ||
1344 | |||
1345 | if (replace_vpte & HPTE_V_BOLTED) { | ||
1346 | if (vpte & HPTE_V_BOLTED) | ||
1347 | /* Bolted collision, nothing we can do */ | ||
1348 | ret = -ENOSPC; | ||
1349 | /* Discard the new HPTE */ | ||
1350 | goto out; | ||
1351 | } | ||
1352 | |||
1353 | /* Discard the previous HPTE */ | ||
1354 | } | ||
1355 | |||
1356 | new_hptep[1] = cpu_to_be64(rpte); | ||
1357 | new->rev[new_idx].guest_rpte = guest_rpte; | ||
1358 | /* No need for a barrier, since new HPT isn't active */ | ||
1359 | new_hptep[0] = cpu_to_be64(vpte); | ||
1360 | unlock_hpte(new_hptep, vpte); | ||
1361 | |||
1362 | out: | ||
1363 | unlock_hpte(hptep, vpte); | ||
1364 | return ret; | ||
1365 | } | ||
1366 | |||
1367 | static int resize_hpt_rehash(struct kvm_resize_hpt *resize) | ||
1368 | { | ||
1369 | struct kvm *kvm = resize->kvm; | ||
1370 | unsigned long i; | ||
1371 | int rc; | ||
1372 | |||
1373 | /* | ||
1374 | * resize_hpt_rehash_hpte() doesn't handle the new-format HPTEs | ||
1375 | * that POWER9 uses, and could well hit a BUG_ON on POWER9. | ||
1376 | */ | ||
1377 | if (cpu_has_feature(CPU_FTR_ARCH_300)) | ||
1378 | return -EIO; | ||
1379 | for (i = 0; i < kvmppc_hpt_npte(&kvm->arch.hpt); i++) { | ||
1380 | rc = resize_hpt_rehash_hpte(resize, i); | ||
1381 | if (rc != 0) | ||
1382 | return rc; | ||
1383 | } | ||
1384 | |||
1385 | return 0; | ||
1386 | } | ||
1387 | |||
1388 | static void resize_hpt_pivot(struct kvm_resize_hpt *resize) | ||
1389 | { | ||
1390 | struct kvm *kvm = resize->kvm; | ||
1391 | struct kvm_hpt_info hpt_tmp; | ||
1392 | |||
1393 | /* Exchange the pending tables in the resize structure with | ||
1394 | * the active tables */ | ||
1395 | |||
1396 | resize_hpt_debug(resize, "resize_hpt_pivot()\n"); | ||
1397 | |||
1398 | spin_lock(&kvm->mmu_lock); | ||
1399 | asm volatile("ptesync" : : : "memory"); | ||
1400 | |||
1401 | hpt_tmp = kvm->arch.hpt; | ||
1402 | kvmppc_set_hpt(kvm, &resize->hpt); | ||
1403 | resize->hpt = hpt_tmp; | ||
1404 | |||
1405 | spin_unlock(&kvm->mmu_lock); | ||
1406 | |||
1407 | synchronize_srcu_expedited(&kvm->srcu); | ||
1408 | |||
1409 | resize_hpt_debug(resize, "resize_hpt_pivot() done\n"); | ||
1410 | } | ||
1411 | |||
1412 | static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize) | ||
1413 | { | ||
1414 | BUG_ON(kvm->arch.resize_hpt != resize); | ||
1415 | |||
1416 | if (!resize) | ||
1417 | return; | ||
1418 | |||
1419 | if (resize->hpt.virt) | ||
1420 | kvmppc_free_hpt(&resize->hpt); | ||
1421 | |||
1422 | kvm->arch.resize_hpt = NULL; | ||
1423 | kfree(resize); | ||
1424 | } | ||
1425 | |||
1426 | static void resize_hpt_prepare_work(struct work_struct *work) | ||
1427 | { | ||
1428 | struct kvm_resize_hpt *resize = container_of(work, | ||
1429 | struct kvm_resize_hpt, | ||
1430 | work); | ||
1431 | struct kvm *kvm = resize->kvm; | ||
1432 | int err; | ||
1433 | |||
1434 | resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n", | ||
1435 | resize->order); | ||
1436 | |||
1437 | err = resize_hpt_allocate(resize); | ||
1438 | |||
1439 | mutex_lock(&kvm->lock); | ||
1440 | |||
1441 | resize->error = err; | ||
1442 | resize->prepare_done = true; | ||
1443 | |||
1444 | mutex_unlock(&kvm->lock); | ||
1445 | } | ||
1446 | |||
1447 | long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm, | ||
1448 | struct kvm_ppc_resize_hpt *rhpt) | ||
1449 | { | ||
1450 | unsigned long flags = rhpt->flags; | ||
1451 | unsigned long shift = rhpt->shift; | ||
1452 | struct kvm_resize_hpt *resize; | ||
1453 | int ret; | ||
1454 | |||
1455 | if (flags != 0) | ||
1456 | return -EINVAL; | ||
1457 | |||
1458 | if (shift && ((shift < 18) || (shift > 46))) | ||
1459 | return -EINVAL; | ||
1460 | |||
1461 | mutex_lock(&kvm->lock); | ||
1462 | |||
1463 | resize = kvm->arch.resize_hpt; | ||
1464 | |||
1465 | if (resize) { | ||
1466 | if (resize->order == shift) { | ||
1467 | /* Suitable resize in progress */ | ||
1468 | if (resize->prepare_done) { | ||
1469 | ret = resize->error; | ||
1470 | if (ret != 0) | ||
1471 | resize_hpt_release(kvm, resize); | ||
1472 | } else { | ||
1473 | ret = 100; /* estimated time in ms */ | ||
1474 | } | ||
1475 | |||
1476 | goto out; | ||
1477 | } | ||
1478 | |||
1479 | /* not suitable, cancel it */ | ||
1480 | resize_hpt_release(kvm, resize); | ||
1481 | } | ||
1482 | |||
1483 | ret = 0; | ||
1484 | if (!shift) | ||
1485 | goto out; /* nothing to do */ | ||
1486 | |||
1487 | /* start new resize */ | ||
1488 | |||
1489 | resize = kzalloc(sizeof(*resize), GFP_KERNEL); | ||
1490 | resize->order = shift; | ||
1491 | resize->kvm = kvm; | ||
1492 | INIT_WORK(&resize->work, resize_hpt_prepare_work); | ||
1493 | kvm->arch.resize_hpt = resize; | ||
1494 | |||
1495 | schedule_work(&resize->work); | ||
1496 | |||
1497 | ret = 100; /* estimated time in ms */ | ||
1498 | |||
1499 | out: | ||
1500 | mutex_unlock(&kvm->lock); | ||
1501 | return ret; | ||
1502 | } | ||
1503 | |||
1504 | static void resize_hpt_boot_vcpu(void *opaque) | ||
1505 | { | ||
1506 | /* Nothing to do, just force a KVM exit */ | ||
1507 | } | ||
1508 | |||
1509 | long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm, | ||
1510 | struct kvm_ppc_resize_hpt *rhpt) | ||
1511 | { | ||
1512 | unsigned long flags = rhpt->flags; | ||
1513 | unsigned long shift = rhpt->shift; | ||
1514 | struct kvm_resize_hpt *resize; | ||
1515 | long ret; | ||
1516 | |||
1517 | if (flags != 0) | ||
1518 | return -EINVAL; | ||
1519 | |||
1520 | if (shift && ((shift < 18) || (shift > 46))) | ||
1521 | return -EINVAL; | ||
1522 | |||
1523 | mutex_lock(&kvm->lock); | ||
1524 | |||
1525 | resize = kvm->arch.resize_hpt; | ||
1526 | |||
1527 | /* This shouldn't be possible */ | ||
1528 | ret = -EIO; | ||
1529 | if (WARN_ON(!kvm->arch.hpte_setup_done)) | ||
1530 | goto out_no_hpt; | ||
1531 | |||
1532 | /* Stop VCPUs from running while we mess with the HPT */ | ||
1533 | kvm->arch.hpte_setup_done = 0; | ||
1534 | smp_mb(); | ||
1535 | |||
1536 | /* Boot all CPUs out of the guest so they re-read | ||
1537 | * hpte_setup_done */ | ||
1538 | on_each_cpu(resize_hpt_boot_vcpu, NULL, 1); | ||
1539 | |||
1540 | ret = -ENXIO; | ||
1541 | if (!resize || (resize->order != shift)) | ||
1542 | goto out; | ||
1543 | |||
1544 | ret = -EBUSY; | ||
1545 | if (!resize->prepare_done) | ||
1546 | goto out; | ||
1547 | |||
1548 | ret = resize->error; | ||
1549 | if (ret != 0) | ||
1550 | goto out; | ||
1551 | |||
1552 | ret = resize_hpt_rehash(resize); | ||
1553 | if (ret != 0) | ||
1554 | goto out; | ||
1555 | |||
1556 | resize_hpt_pivot(resize); | ||
1557 | |||
1558 | out: | ||
1559 | /* Let VCPUs run again */ | ||
1560 | kvm->arch.hpte_setup_done = 1; | ||
1561 | smp_mb(); | ||
1562 | out_no_hpt: | ||
1563 | resize_hpt_release(kvm, resize); | ||
1564 | mutex_unlock(&kvm->lock); | ||
1565 | return ret; | ||
1566 | } | ||
1567 | |||
1568 | /* | ||
1175 | * Functions for reading and writing the hash table via reads and | 1569 | * Functions for reading and writing the hash table via reads and |
1176 | * writes on a file descriptor. | 1570 | * writes on a file descriptor. |
1177 | * | 1571 | * |
@@ -1311,8 +1705,8 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, | |||
1311 | flags = ctx->flags; | 1705 | flags = ctx->flags; |
1312 | 1706 | ||
1313 | i = ctx->index; | 1707 | i = ctx->index; |
1314 | hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); | 1708 | hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE)); |
1315 | revp = kvm->arch.revmap + i; | 1709 | revp = kvm->arch.hpt.rev + i; |
1316 | lbuf = (unsigned long __user *)buf; | 1710 | lbuf = (unsigned long __user *)buf; |
1317 | 1711 | ||
1318 | nb = 0; | 1712 | nb = 0; |
@@ -1327,7 +1721,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, | |||
1327 | 1721 | ||
1328 | /* Skip uninteresting entries, i.e. clean on not-first pass */ | 1722 | /* Skip uninteresting entries, i.e. clean on not-first pass */ |
1329 | if (!first_pass) { | 1723 | if (!first_pass) { |
1330 | while (i < kvm->arch.hpt_npte && | 1724 | while (i < kvmppc_hpt_npte(&kvm->arch.hpt) && |
1331 | !hpte_dirty(revp, hptp)) { | 1725 | !hpte_dirty(revp, hptp)) { |
1332 | ++i; | 1726 | ++i; |
1333 | hptp += 2; | 1727 | hptp += 2; |
@@ -1337,7 +1731,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, | |||
1337 | hdr.index = i; | 1731 | hdr.index = i; |
1338 | 1732 | ||
1339 | /* Grab a series of valid entries */ | 1733 | /* Grab a series of valid entries */ |
1340 | while (i < kvm->arch.hpt_npte && | 1734 | while (i < kvmppc_hpt_npte(&kvm->arch.hpt) && |
1341 | hdr.n_valid < 0xffff && | 1735 | hdr.n_valid < 0xffff && |
1342 | nb + HPTE_SIZE < count && | 1736 | nb + HPTE_SIZE < count && |
1343 | record_hpte(flags, hptp, hpte, revp, 1, first_pass)) { | 1737 | record_hpte(flags, hptp, hpte, revp, 1, first_pass)) { |
@@ -1353,7 +1747,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, | |||
1353 | ++revp; | 1747 | ++revp; |
1354 | } | 1748 | } |
1355 | /* Now skip invalid entries while we can */ | 1749 | /* Now skip invalid entries while we can */ |
1356 | while (i < kvm->arch.hpt_npte && | 1750 | while (i < kvmppc_hpt_npte(&kvm->arch.hpt) && |
1357 | hdr.n_invalid < 0xffff && | 1751 | hdr.n_invalid < 0xffff && |
1358 | record_hpte(flags, hptp, hpte, revp, 0, first_pass)) { | 1752 | record_hpte(flags, hptp, hpte, revp, 0, first_pass)) { |
1359 | /* found an invalid entry */ | 1753 | /* found an invalid entry */ |
@@ -1374,7 +1768,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, | |||
1374 | } | 1768 | } |
1375 | 1769 | ||
1376 | /* Check if we've wrapped around the hash table */ | 1770 | /* Check if we've wrapped around the hash table */ |
1377 | if (i >= kvm->arch.hpt_npte) { | 1771 | if (i >= kvmppc_hpt_npte(&kvm->arch.hpt)) { |
1378 | i = 0; | 1772 | i = 0; |
1379 | ctx->first_pass = 0; | 1773 | ctx->first_pass = 0; |
1380 | break; | 1774 | break; |
@@ -1433,11 +1827,11 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, | |||
1433 | 1827 | ||
1434 | err = -EINVAL; | 1828 | err = -EINVAL; |
1435 | i = hdr.index; | 1829 | i = hdr.index; |
1436 | if (i >= kvm->arch.hpt_npte || | 1830 | if (i >= kvmppc_hpt_npte(&kvm->arch.hpt) || |
1437 | i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte) | 1831 | i + hdr.n_valid + hdr.n_invalid > kvmppc_hpt_npte(&kvm->arch.hpt)) |
1438 | break; | 1832 | break; |
1439 | 1833 | ||
1440 | hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); | 1834 | hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE)); |
1441 | lbuf = (unsigned long __user *)buf; | 1835 | lbuf = (unsigned long __user *)buf; |
1442 | for (j = 0; j < hdr.n_valid; ++j) { | 1836 | for (j = 0; j < hdr.n_valid; ++j) { |
1443 | __be64 hpte_v; | 1837 | __be64 hpte_v; |
@@ -1624,8 +2018,9 @@ static ssize_t debugfs_htab_read(struct file *file, char __user *buf, | |||
1624 | 2018 | ||
1625 | kvm = p->kvm; | 2019 | kvm = p->kvm; |
1626 | i = p->hpt_index; | 2020 | i = p->hpt_index; |
1627 | hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); | 2021 | hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE)); |
1628 | for (; len != 0 && i < kvm->arch.hpt_npte; ++i, hptp += 2) { | 2022 | for (; len != 0 && i < kvmppc_hpt_npte(&kvm->arch.hpt); |
2023 | ++i, hptp += 2) { | ||
1629 | if (!(be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT))) | 2024 | if (!(be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT))) |
1630 | continue; | 2025 | continue; |
1631 | 2026 | ||
@@ -1635,7 +2030,7 @@ static ssize_t debugfs_htab_read(struct file *file, char __user *buf, | |||
1635 | cpu_relax(); | 2030 | cpu_relax(); |
1636 | v = be64_to_cpu(hptp[0]) & ~HPTE_V_HVLOCK; | 2031 | v = be64_to_cpu(hptp[0]) & ~HPTE_V_HVLOCK; |
1637 | hr = be64_to_cpu(hptp[1]); | 2032 | hr = be64_to_cpu(hptp[1]); |
1638 | gr = kvm->arch.revmap[i].guest_rpte; | 2033 | gr = kvm->arch.hpt.rev[i].guest_rpte; |
1639 | unlock_hpte(hptp, v); | 2034 | unlock_hpte(hptp, v); |
1640 | preempt_enable(); | 2035 | preempt_enable(); |
1641 | 2036 | ||
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index c379ff5a4438..491c5d8120f7 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c | |||
@@ -171,6 +171,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | |||
171 | goto fail; | 171 | goto fail; |
172 | } | 172 | } |
173 | 173 | ||
174 | ret = -ENOMEM; | ||
174 | stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), | 175 | stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), |
175 | GFP_KERNEL); | 176 | GFP_KERNEL); |
176 | if (!stt) | 177 | if (!stt) |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index e4a79679342e..1e107ece4e37 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
@@ -182,7 +182,8 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) | |||
182 | ++vcpu->stat.halt_wakeup; | 182 | ++vcpu->stat.halt_wakeup; |
183 | } | 183 | } |
184 | 184 | ||
185 | if (kvmppc_ipi_thread(vcpu->arch.thread_cpu)) | 185 | cpu = READ_ONCE(vcpu->arch.thread_cpu); |
186 | if (cpu >= 0 && kvmppc_ipi_thread(cpu)) | ||
186 | return; | 187 | return; |
187 | 188 | ||
188 | /* CPU points to the first thread of the core */ | 189 | /* CPU points to the first thread of the core */ |
@@ -773,12 +774,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | |||
773 | } | 774 | } |
774 | tvcpu->arch.prodded = 1; | 775 | tvcpu->arch.prodded = 1; |
775 | smp_mb(); | 776 | smp_mb(); |
776 | if (vcpu->arch.ceded) { | 777 | if (tvcpu->arch.ceded) |
777 | if (swait_active(&vcpu->wq)) { | 778 | kvmppc_fast_vcpu_kick_hv(tvcpu); |
778 | swake_up(&vcpu->wq); | ||
779 | vcpu->stat.halt_wakeup++; | ||
780 | } | ||
781 | } | ||
782 | break; | 779 | break; |
783 | case H_CONFER: | 780 | case H_CONFER: |
784 | target = kvmppc_get_gpr(vcpu, 4); | 781 | target = kvmppc_get_gpr(vcpu, 4); |
@@ -2665,7 +2662,8 @@ static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc) | |||
2665 | int i; | 2662 | int i; |
2666 | 2663 | ||
2667 | for_each_runnable_thread(i, vcpu, vc) { | 2664 | for_each_runnable_thread(i, vcpu, vc) { |
2668 | if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) | 2665 | if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded || |
2666 | vcpu->arch.prodded) | ||
2669 | return 1; | 2667 | return 1; |
2670 | } | 2668 | } |
2671 | 2669 | ||
@@ -2851,7 +2849,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
2851 | break; | 2849 | break; |
2852 | n_ceded = 0; | 2850 | n_ceded = 0; |
2853 | for_each_runnable_thread(i, v, vc) { | 2851 | for_each_runnable_thread(i, v, vc) { |
2854 | if (!v->arch.pending_exceptions) | 2852 | if (!v->arch.pending_exceptions && !v->arch.prodded) |
2855 | n_ceded += v->arch.ceded; | 2853 | n_ceded += v->arch.ceded; |
2856 | else | 2854 | else |
2857 | v->arch.ceded = 0; | 2855 | v->arch.ceded = 0; |
@@ -3199,12 +3197,23 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) | |||
3199 | goto out; /* another vcpu beat us to it */ | 3197 | goto out; /* another vcpu beat us to it */ |
3200 | 3198 | ||
3201 | /* Allocate hashed page table (if not done already) and reset it */ | 3199 | /* Allocate hashed page table (if not done already) and reset it */ |
3202 | if (!kvm->arch.hpt_virt) { | 3200 | if (!kvm->arch.hpt.virt) { |
3203 | err = kvmppc_alloc_hpt(kvm, NULL); | 3201 | int order = KVM_DEFAULT_HPT_ORDER; |
3204 | if (err) { | 3202 | struct kvm_hpt_info info; |
3203 | |||
3204 | err = kvmppc_allocate_hpt(&info, order); | ||
3205 | /* If we get here, it means userspace didn't specify a | ||
3206 | * size explicitly. So, try successively smaller | ||
3207 | * sizes if the default failed. */ | ||
3208 | while ((err == -ENOMEM) && --order >= PPC_MIN_HPT_ORDER) | ||
3209 | err = kvmppc_allocate_hpt(&info, order); | ||
3210 | |||
3211 | if (err < 0) { | ||
3205 | pr_err("KVM: Couldn't alloc HPT\n"); | 3212 | pr_err("KVM: Couldn't alloc HPT\n"); |
3206 | goto out; | 3213 | goto out; |
3207 | } | 3214 | } |
3215 | |||
3216 | kvmppc_set_hpt(kvm, &info); | ||
3208 | } | 3217 | } |
3209 | 3218 | ||
3210 | /* Look up the memslot for guest physical address 0 */ | 3219 | /* Look up the memslot for guest physical address 0 */ |
@@ -3413,6 +3422,9 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) | |||
3413 | 3422 | ||
3414 | kvm->arch.lpcr = lpcr; | 3423 | kvm->arch.lpcr = lpcr; |
3415 | 3424 | ||
3425 | /* Initialization for future HPT resizes */ | ||
3426 | kvm->arch.resize_hpt = NULL; | ||
3427 | |||
3416 | /* | 3428 | /* |
3417 | * Work out how many sets the TLB has, for the use of | 3429 | * Work out how many sets the TLB has, for the use of |
3418 | * the TLB invalidation loop in book3s_hv_rmhandlers.S. | 3430 | * the TLB invalidation loop in book3s_hv_rmhandlers.S. |
@@ -3469,7 +3481,7 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) | |||
3469 | if (kvm_is_radix(kvm)) | 3481 | if (kvm_is_radix(kvm)) |
3470 | kvmppc_free_radix(kvm); | 3482 | kvmppc_free_radix(kvm); |
3471 | else | 3483 | else |
3472 | kvmppc_free_hpt(kvm); | 3484 | kvmppc_free_hpt(&kvm->arch.hpt); |
3473 | 3485 | ||
3474 | kvmppc_free_pimap(kvm); | 3486 | kvmppc_free_pimap(kvm); |
3475 | } | 3487 | } |
@@ -3695,12 +3707,9 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp, | |||
3695 | r = -EFAULT; | 3707 | r = -EFAULT; |
3696 | if (get_user(htab_order, (u32 __user *)argp)) | 3708 | if (get_user(htab_order, (u32 __user *)argp)) |
3697 | break; | 3709 | break; |
3698 | r = kvmppc_alloc_reset_hpt(kvm, &htab_order); | 3710 | r = kvmppc_alloc_reset_hpt(kvm, htab_order); |
3699 | if (r) | 3711 | if (r) |
3700 | break; | 3712 | break; |
3701 | r = -EFAULT; | ||
3702 | if (put_user(htab_order, (u32 __user *)argp)) | ||
3703 | break; | ||
3704 | r = 0; | 3713 | r = 0; |
3705 | break; | 3714 | break; |
3706 | } | 3715 | } |
@@ -3715,6 +3724,28 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp, | |||
3715 | break; | 3724 | break; |
3716 | } | 3725 | } |
3717 | 3726 | ||
3727 | case KVM_PPC_RESIZE_HPT_PREPARE: { | ||
3728 | struct kvm_ppc_resize_hpt rhpt; | ||
3729 | |||
3730 | r = -EFAULT; | ||
3731 | if (copy_from_user(&rhpt, argp, sizeof(rhpt))) | ||
3732 | break; | ||
3733 | |||
3734 | r = kvm_vm_ioctl_resize_hpt_prepare(kvm, &rhpt); | ||
3735 | break; | ||
3736 | } | ||
3737 | |||
3738 | case KVM_PPC_RESIZE_HPT_COMMIT: { | ||
3739 | struct kvm_ppc_resize_hpt rhpt; | ||
3740 | |||
3741 | r = -EFAULT; | ||
3742 | if (copy_from_user(&rhpt, argp, sizeof(rhpt))) | ||
3743 | break; | ||
3744 | |||
3745 | r = kvm_vm_ioctl_resize_hpt_commit(kvm, &rhpt); | ||
3746 | break; | ||
3747 | } | ||
3748 | |||
3718 | default: | 3749 | default: |
3719 | r = -ENOTTY; | 3750 | r = -ENOTTY; |
3720 | } | 3751 | } |
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 2f69fbc19bb0..c42a7e63b39e 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c | |||
@@ -52,19 +52,19 @@ static int __init early_parse_kvm_cma_resv(char *p) | |||
52 | } | 52 | } |
53 | early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv); | 53 | early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv); |
54 | 54 | ||
55 | struct page *kvm_alloc_hpt(unsigned long nr_pages) | 55 | struct page *kvm_alloc_hpt_cma(unsigned long nr_pages) |
56 | { | 56 | { |
57 | VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); | 57 | VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); |
58 | 58 | ||
59 | return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES)); | 59 | return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES)); |
60 | } | 60 | } |
61 | EXPORT_SYMBOL_GPL(kvm_alloc_hpt); | 61 | EXPORT_SYMBOL_GPL(kvm_alloc_hpt_cma); |
62 | 62 | ||
63 | void kvm_release_hpt(struct page *page, unsigned long nr_pages) | 63 | void kvm_free_hpt_cma(struct page *page, unsigned long nr_pages) |
64 | { | 64 | { |
65 | cma_release(kvm_cma, page, nr_pages); | 65 | cma_release(kvm_cma, page, nr_pages); |
66 | } | 66 | } |
67 | EXPORT_SYMBOL_GPL(kvm_release_hpt); | 67 | EXPORT_SYMBOL_GPL(kvm_free_hpt_cma); |
68 | 68 | ||
69 | /** | 69 | /** |
70 | * kvm_cma_reserve() - reserve area for kvm hash pagetable | 70 | * kvm_cma_reserve() - reserve area for kvm hash pagetable |
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index b095afcd4309..6fca970373ee 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c | |||
@@ -86,10 +86,10 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, | |||
86 | 86 | ||
87 | if (*rmap & KVMPPC_RMAP_PRESENT) { | 87 | if (*rmap & KVMPPC_RMAP_PRESENT) { |
88 | i = *rmap & KVMPPC_RMAP_INDEX; | 88 | i = *rmap & KVMPPC_RMAP_INDEX; |
89 | head = &kvm->arch.revmap[i]; | 89 | head = &kvm->arch.hpt.rev[i]; |
90 | if (realmode) | 90 | if (realmode) |
91 | head = real_vmalloc_addr(head); | 91 | head = real_vmalloc_addr(head); |
92 | tail = &kvm->arch.revmap[head->back]; | 92 | tail = &kvm->arch.hpt.rev[head->back]; |
93 | if (realmode) | 93 | if (realmode) |
94 | tail = real_vmalloc_addr(tail); | 94 | tail = real_vmalloc_addr(tail); |
95 | rev->forw = i; | 95 | rev->forw = i; |
@@ -154,8 +154,8 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index, | |||
154 | lock_rmap(rmap); | 154 | lock_rmap(rmap); |
155 | 155 | ||
156 | head = *rmap & KVMPPC_RMAP_INDEX; | 156 | head = *rmap & KVMPPC_RMAP_INDEX; |
157 | next = real_vmalloc_addr(&kvm->arch.revmap[rev->forw]); | 157 | next = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->forw]); |
158 | prev = real_vmalloc_addr(&kvm->arch.revmap[rev->back]); | 158 | prev = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->back]); |
159 | next->back = rev->back; | 159 | next->back = rev->back; |
160 | prev->forw = rev->forw; | 160 | prev->forw = rev->forw; |
161 | if (head == pte_index) { | 161 | if (head == pte_index) { |
@@ -292,11 +292,11 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, | |||
292 | 292 | ||
293 | /* Find and lock the HPTEG slot to use */ | 293 | /* Find and lock the HPTEG slot to use */ |
294 | do_insert: | 294 | do_insert: |
295 | if (pte_index >= kvm->arch.hpt_npte) | 295 | if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) |
296 | return H_PARAMETER; | 296 | return H_PARAMETER; |
297 | if (likely((flags & H_EXACT) == 0)) { | 297 | if (likely((flags & H_EXACT) == 0)) { |
298 | pte_index &= ~7UL; | 298 | pte_index &= ~7UL; |
299 | hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); | 299 | hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); |
300 | for (i = 0; i < 8; ++i) { | 300 | for (i = 0; i < 8; ++i) { |
301 | if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 && | 301 | if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 && |
302 | try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | | 302 | try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | |
@@ -327,7 +327,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, | |||
327 | } | 327 | } |
328 | pte_index += i; | 328 | pte_index += i; |
329 | } else { | 329 | } else { |
330 | hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); | 330 | hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); |
331 | if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | | 331 | if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | |
332 | HPTE_V_ABSENT)) { | 332 | HPTE_V_ABSENT)) { |
333 | /* Lock the slot and check again */ | 333 | /* Lock the slot and check again */ |
@@ -344,7 +344,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, | |||
344 | } | 344 | } |
345 | 345 | ||
346 | /* Save away the guest's idea of the second HPTE dword */ | 346 | /* Save away the guest's idea of the second HPTE dword */ |
347 | rev = &kvm->arch.revmap[pte_index]; | 347 | rev = &kvm->arch.hpt.rev[pte_index]; |
348 | if (realmode) | 348 | if (realmode) |
349 | rev = real_vmalloc_addr(rev); | 349 | rev = real_vmalloc_addr(rev); |
350 | if (rev) { | 350 | if (rev) { |
@@ -469,9 +469,9 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, | |||
469 | 469 | ||
470 | if (kvm_is_radix(kvm)) | 470 | if (kvm_is_radix(kvm)) |
471 | return H_FUNCTION; | 471 | return H_FUNCTION; |
472 | if (pte_index >= kvm->arch.hpt_npte) | 472 | if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) |
473 | return H_PARAMETER; | 473 | return H_PARAMETER; |
474 | hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); | 474 | hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); |
475 | while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) | 475 | while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) |
476 | cpu_relax(); | 476 | cpu_relax(); |
477 | pte = orig_pte = be64_to_cpu(hpte[0]); | 477 | pte = orig_pte = be64_to_cpu(hpte[0]); |
@@ -487,7 +487,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, | |||
487 | return H_NOT_FOUND; | 487 | return H_NOT_FOUND; |
488 | } | 488 | } |
489 | 489 | ||
490 | rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); | 490 | rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); |
491 | v = pte & ~HPTE_V_HVLOCK; | 491 | v = pte & ~HPTE_V_HVLOCK; |
492 | if (v & HPTE_V_VALID) { | 492 | if (v & HPTE_V_VALID) { |
493 | hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); | 493 | hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); |
@@ -557,13 +557,13 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) | |||
557 | break; | 557 | break; |
558 | } | 558 | } |
559 | if (req != 1 || flags == 3 || | 559 | if (req != 1 || flags == 3 || |
560 | pte_index >= kvm->arch.hpt_npte) { | 560 | pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) { |
561 | /* parameter error */ | 561 | /* parameter error */ |
562 | args[j] = ((0xa0 | flags) << 56) + pte_index; | 562 | args[j] = ((0xa0 | flags) << 56) + pte_index; |
563 | ret = H_PARAMETER; | 563 | ret = H_PARAMETER; |
564 | break; | 564 | break; |
565 | } | 565 | } |
566 | hp = (__be64 *) (kvm->arch.hpt_virt + (pte_index << 4)); | 566 | hp = (__be64 *) (kvm->arch.hpt.virt + (pte_index << 4)); |
567 | /* to avoid deadlock, don't spin except for first */ | 567 | /* to avoid deadlock, don't spin except for first */ |
568 | if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) { | 568 | if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) { |
569 | if (n) | 569 | if (n) |
@@ -600,7 +600,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) | |||
600 | } | 600 | } |
601 | 601 | ||
602 | args[j] = ((0x80 | flags) << 56) + pte_index; | 602 | args[j] = ((0x80 | flags) << 56) + pte_index; |
603 | rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); | 603 | rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); |
604 | note_hpte_modification(kvm, rev); | 604 | note_hpte_modification(kvm, rev); |
605 | 605 | ||
606 | if (!(hp0 & HPTE_V_VALID)) { | 606 | if (!(hp0 & HPTE_V_VALID)) { |
@@ -657,10 +657,10 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, | |||
657 | 657 | ||
658 | if (kvm_is_radix(kvm)) | 658 | if (kvm_is_radix(kvm)) |
659 | return H_FUNCTION; | 659 | return H_FUNCTION; |
660 | if (pte_index >= kvm->arch.hpt_npte) | 660 | if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) |
661 | return H_PARAMETER; | 661 | return H_PARAMETER; |
662 | 662 | ||
663 | hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); | 663 | hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); |
664 | while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) | 664 | while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) |
665 | cpu_relax(); | 665 | cpu_relax(); |
666 | v = pte_v = be64_to_cpu(hpte[0]); | 666 | v = pte_v = be64_to_cpu(hpte[0]); |
@@ -680,7 +680,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, | |||
680 | /* Update guest view of 2nd HPTE dword */ | 680 | /* Update guest view of 2nd HPTE dword */ |
681 | mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | | 681 | mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | |
682 | HPTE_R_KEY_HI | HPTE_R_KEY_LO; | 682 | HPTE_R_KEY_HI | HPTE_R_KEY_LO; |
683 | rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); | 683 | rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); |
684 | if (rev) { | 684 | if (rev) { |
685 | r = (rev->guest_rpte & ~mask) | bits; | 685 | r = (rev->guest_rpte & ~mask) | bits; |
686 | rev->guest_rpte = r; | 686 | rev->guest_rpte = r; |
@@ -728,15 +728,15 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, | |||
728 | 728 | ||
729 | if (kvm_is_radix(kvm)) | 729 | if (kvm_is_radix(kvm)) |
730 | return H_FUNCTION; | 730 | return H_FUNCTION; |
731 | if (pte_index >= kvm->arch.hpt_npte) | 731 | if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) |
732 | return H_PARAMETER; | 732 | return H_PARAMETER; |
733 | if (flags & H_READ_4) { | 733 | if (flags & H_READ_4) { |
734 | pte_index &= ~3; | 734 | pte_index &= ~3; |
735 | n = 4; | 735 | n = 4; |
736 | } | 736 | } |
737 | rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); | 737 | rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); |
738 | for (i = 0; i < n; ++i, ++pte_index) { | 738 | for (i = 0; i < n; ++i, ++pte_index) { |
739 | hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); | 739 | hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); |
740 | v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; | 740 | v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; |
741 | r = be64_to_cpu(hpte[1]); | 741 | r = be64_to_cpu(hpte[1]); |
742 | if (cpu_has_feature(CPU_FTR_ARCH_300)) { | 742 | if (cpu_has_feature(CPU_FTR_ARCH_300)) { |
@@ -769,11 +769,11 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, | |||
769 | 769 | ||
770 | if (kvm_is_radix(kvm)) | 770 | if (kvm_is_radix(kvm)) |
771 | return H_FUNCTION; | 771 | return H_FUNCTION; |
772 | if (pte_index >= kvm->arch.hpt_npte) | 772 | if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) |
773 | return H_PARAMETER; | 773 | return H_PARAMETER; |
774 | 774 | ||
775 | rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); | 775 | rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); |
776 | hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); | 776 | hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); |
777 | while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) | 777 | while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) |
778 | cpu_relax(); | 778 | cpu_relax(); |
779 | v = be64_to_cpu(hpte[0]); | 779 | v = be64_to_cpu(hpte[0]); |
@@ -817,11 +817,11 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, | |||
817 | 817 | ||
818 | if (kvm_is_radix(kvm)) | 818 | if (kvm_is_radix(kvm)) |
819 | return H_FUNCTION; | 819 | return H_FUNCTION; |
820 | if (pte_index >= kvm->arch.hpt_npte) | 820 | if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) |
821 | return H_PARAMETER; | 821 | return H_PARAMETER; |
822 | 822 | ||
823 | rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); | 823 | rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); |
824 | hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); | 824 | hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); |
825 | while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) | 825 | while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) |
826 | cpu_relax(); | 826 | cpu_relax(); |
827 | v = be64_to_cpu(hpte[0]); | 827 | v = be64_to_cpu(hpte[0]); |
@@ -970,7 +970,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, | |||
970 | somask = (1UL << 28) - 1; | 970 | somask = (1UL << 28) - 1; |
971 | vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT; | 971 | vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT; |
972 | } | 972 | } |
973 | hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvm->arch.hpt_mask; | 973 | hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvmppc_hpt_mask(&kvm->arch.hpt); |
974 | avpn = slb_v & ~(somask >> 16); /* also includes B */ | 974 | avpn = slb_v & ~(somask >> 16); /* also includes B */ |
975 | avpn |= (eaddr & somask) >> 16; | 975 | avpn |= (eaddr & somask) >> 16; |
976 | 976 | ||
@@ -981,7 +981,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, | |||
981 | val |= avpn; | 981 | val |= avpn; |
982 | 982 | ||
983 | for (;;) { | 983 | for (;;) { |
984 | hpte = (__be64 *)(kvm->arch.hpt_virt + (hash << 7)); | 984 | hpte = (__be64 *)(kvm->arch.hpt.virt + (hash << 7)); |
985 | 985 | ||
986 | for (i = 0; i < 16; i += 2) { | 986 | for (i = 0; i < 16; i += 2) { |
987 | /* Read the PTE racily */ | 987 | /* Read the PTE racily */ |
@@ -1017,7 +1017,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, | |||
1017 | if (val & HPTE_V_SECONDARY) | 1017 | if (val & HPTE_V_SECONDARY) |
1018 | break; | 1018 | break; |
1019 | val |= HPTE_V_SECONDARY; | 1019 | val |= HPTE_V_SECONDARY; |
1020 | hash = hash ^ kvm->arch.hpt_mask; | 1020 | hash = hash ^ kvmppc_hpt_mask(&kvm->arch.hpt); |
1021 | } | 1021 | } |
1022 | return -1; | 1022 | return -1; |
1023 | } | 1023 | } |
@@ -1066,14 +1066,14 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, | |||
1066 | return status; /* there really was no HPTE */ | 1066 | return status; /* there really was no HPTE */ |
1067 | return 0; /* for prot fault, HPTE disappeared */ | 1067 | return 0; /* for prot fault, HPTE disappeared */ |
1068 | } | 1068 | } |
1069 | hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); | 1069 | hpte = (__be64 *)(kvm->arch.hpt.virt + (index << 4)); |
1070 | v = orig_v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; | 1070 | v = orig_v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; |
1071 | r = be64_to_cpu(hpte[1]); | 1071 | r = be64_to_cpu(hpte[1]); |
1072 | if (cpu_has_feature(CPU_FTR_ARCH_300)) { | 1072 | if (cpu_has_feature(CPU_FTR_ARCH_300)) { |
1073 | v = hpte_new_to_old_v(v, r); | 1073 | v = hpte_new_to_old_v(v, r); |
1074 | r = hpte_new_to_old_r(r); | 1074 | r = hpte_new_to_old_r(r); |
1075 | } | 1075 | } |
1076 | rev = real_vmalloc_addr(&kvm->arch.revmap[index]); | 1076 | rev = real_vmalloc_addr(&kvm->arch.hpt.rev[index]); |
1077 | gr = rev->guest_rpte; | 1077 | gr = rev->guest_rpte; |
1078 | 1078 | ||
1079 | unlock_hpte(hpte, orig_v); | 1079 | unlock_hpte(hpte, orig_v); |
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 29f43ed6d5eb..e78542d99cd6 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c | |||
@@ -35,7 +35,7 @@ int kvm_irq_bypass = 1; | |||
35 | EXPORT_SYMBOL(kvm_irq_bypass); | 35 | EXPORT_SYMBOL(kvm_irq_bypass); |
36 | 36 | ||
37 | static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, | 37 | static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, |
38 | u32 new_irq); | 38 | u32 new_irq, bool check_resend); |
39 | static int xics_opal_set_server(unsigned int hw_irq, int server_cpu); | 39 | static int xics_opal_set_server(unsigned int hw_irq, int server_cpu); |
40 | 40 | ||
41 | /* -- ICS routines -- */ | 41 | /* -- ICS routines -- */ |
@@ -44,20 +44,12 @@ static void ics_rm_check_resend(struct kvmppc_xics *xics, | |||
44 | { | 44 | { |
45 | int i; | 45 | int i; |
46 | 46 | ||
47 | arch_spin_lock(&ics->lock); | ||
48 | |||
49 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { | 47 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { |
50 | struct ics_irq_state *state = &ics->irq_state[i]; | 48 | struct ics_irq_state *state = &ics->irq_state[i]; |
51 | 49 | if (state->resend) | |
52 | if (!state->resend) | 50 | icp_rm_deliver_irq(xics, icp, state->number, true); |
53 | continue; | ||
54 | |||
55 | arch_spin_unlock(&ics->lock); | ||
56 | icp_rm_deliver_irq(xics, icp, state->number); | ||
57 | arch_spin_lock(&ics->lock); | ||
58 | } | 51 | } |
59 | 52 | ||
60 | arch_spin_unlock(&ics->lock); | ||
61 | } | 53 | } |
62 | 54 | ||
63 | /* -- ICP routines -- */ | 55 | /* -- ICP routines -- */ |
@@ -288,7 +280,7 @@ static bool icp_rm_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority, | |||
288 | } | 280 | } |
289 | 281 | ||
290 | static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, | 282 | static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, |
291 | u32 new_irq) | 283 | u32 new_irq, bool check_resend) |
292 | { | 284 | { |
293 | struct ics_irq_state *state; | 285 | struct ics_irq_state *state; |
294 | struct kvmppc_ics *ics; | 286 | struct kvmppc_ics *ics; |
@@ -333,6 +325,10 @@ static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, | |||
333 | } | 325 | } |
334 | } | 326 | } |
335 | 327 | ||
328 | if (check_resend) | ||
329 | if (!state->resend) | ||
330 | goto out; | ||
331 | |||
336 | /* Clear the resend bit of that interrupt */ | 332 | /* Clear the resend bit of that interrupt */ |
337 | state->resend = 0; | 333 | state->resend = 0; |
338 | 334 | ||
@@ -378,7 +374,9 @@ static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, | |||
378 | */ | 374 | */ |
379 | if (reject && reject != XICS_IPI) { | 375 | if (reject && reject != XICS_IPI) { |
380 | arch_spin_unlock(&ics->lock); | 376 | arch_spin_unlock(&ics->lock); |
377 | icp->n_reject++; | ||
381 | new_irq = reject; | 378 | new_irq = reject; |
379 | check_resend = 0; | ||
382 | goto again; | 380 | goto again; |
383 | } | 381 | } |
384 | } else { | 382 | } else { |
@@ -386,10 +384,16 @@ static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, | |||
386 | * We failed to deliver the interrupt we need to set the | 384 | * We failed to deliver the interrupt we need to set the |
387 | * resend map bit and mark the ICS state as needing a resend | 385 | * resend map bit and mark the ICS state as needing a resend |
388 | */ | 386 | */ |
389 | set_bit(ics->icsid, icp->resend_map); | ||
390 | state->resend = 1; | 387 | state->resend = 1; |
391 | 388 | ||
392 | /* | 389 | /* |
390 | * Make sure when checking resend, we don't miss the resend | ||
391 | * if resend_map bit is seen and cleared. | ||
392 | */ | ||
393 | smp_wmb(); | ||
394 | set_bit(ics->icsid, icp->resend_map); | ||
395 | |||
396 | /* | ||
393 | * If the need_resend flag got cleared in the ICP some time | 397 | * If the need_resend flag got cleared in the ICP some time |
394 | * between icp_rm_try_to_deliver() atomic update and now, then | 398 | * between icp_rm_try_to_deliver() atomic update and now, then |
395 | * we know it might have missed the resend_map bit. So we | 399 | * we know it might have missed the resend_map bit. So we |
@@ -397,7 +401,9 @@ static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, | |||
397 | */ | 401 | */ |
398 | smp_mb(); | 402 | smp_mb(); |
399 | if (!icp->state.need_resend) { | 403 | if (!icp->state.need_resend) { |
404 | state->resend = 0; | ||
400 | arch_spin_unlock(&ics->lock); | 405 | arch_spin_unlock(&ics->lock); |
406 | check_resend = 0; | ||
401 | goto again; | 407 | goto again; |
402 | } | 408 | } |
403 | } | 409 | } |
@@ -592,7 +598,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, | |||
592 | /* Handle reject in real mode */ | 598 | /* Handle reject in real mode */ |
593 | if (reject && reject != XICS_IPI) { | 599 | if (reject && reject != XICS_IPI) { |
594 | this_icp->n_reject++; | 600 | this_icp->n_reject++; |
595 | icp_rm_deliver_irq(xics, icp, reject); | 601 | icp_rm_deliver_irq(xics, icp, reject, false); |
596 | } | 602 | } |
597 | 603 | ||
598 | /* Handle resends in real mode */ | 604 | /* Handle resends in real mode */ |
@@ -660,59 +666,45 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) | |||
660 | */ | 666 | */ |
661 | if (reject && reject != XICS_IPI) { | 667 | if (reject && reject != XICS_IPI) { |
662 | icp->n_reject++; | 668 | icp->n_reject++; |
663 | icp_rm_deliver_irq(xics, icp, reject); | 669 | icp_rm_deliver_irq(xics, icp, reject, false); |
664 | } | 670 | } |
665 | bail: | 671 | bail: |
666 | return check_too_hard(xics, icp); | 672 | return check_too_hard(xics, icp); |
667 | } | 673 | } |
668 | 674 | ||
669 | int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) | 675 | static int ics_rm_eoi(struct kvm_vcpu *vcpu, u32 irq) |
670 | { | 676 | { |
671 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | 677 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; |
672 | struct kvmppc_icp *icp = vcpu->arch.icp; | 678 | struct kvmppc_icp *icp = vcpu->arch.icp; |
673 | struct kvmppc_ics *ics; | 679 | struct kvmppc_ics *ics; |
674 | struct ics_irq_state *state; | 680 | struct ics_irq_state *state; |
675 | u32 irq = xirr & 0x00ffffff; | ||
676 | u16 src; | 681 | u16 src; |
677 | 682 | u32 pq_old, pq_new; | |
678 | if (!xics || !xics->real_mode) | ||
679 | return H_TOO_HARD; | ||
680 | 683 | ||
681 | /* | 684 | /* |
682 | * ICP State: EOI | 685 | * ICS EOI handling: For LSI, if P bit is still set, we need to |
686 | * resend it. | ||
683 | * | 687 | * |
684 | * Note: If EOI is incorrectly used by SW to lower the CPPR | 688 | * For MSI, we move Q bit into P (and clear Q). If it is set, |
685 | * value (ie more favored), we do not check for rejection of | 689 | * resend it. |
686 | * a pending interrupt, this is a SW error and PAPR sepcifies | ||
687 | * that we don't have to deal with it. | ||
688 | * | ||
689 | * The sending of an EOI to the ICS is handled after the | ||
690 | * CPPR update | ||
691 | * | ||
692 | * ICP State: Down_CPPR which we handle | ||
693 | * in a separate function as it's shared with H_CPPR. | ||
694 | */ | 690 | */ |
695 | icp_rm_down_cppr(xics, icp, xirr >> 24); | ||
696 | 691 | ||
697 | /* IPIs have no EOI */ | ||
698 | if (irq == XICS_IPI) | ||
699 | goto bail; | ||
700 | /* | ||
701 | * EOI handling: If the interrupt is still asserted, we need to | ||
702 | * resend it. We can take a lockless "peek" at the ICS state here. | ||
703 | * | ||
704 | * "Message" interrupts will never have "asserted" set | ||
705 | */ | ||
706 | ics = kvmppc_xics_find_ics(xics, irq, &src); | 692 | ics = kvmppc_xics_find_ics(xics, irq, &src); |
707 | if (!ics) | 693 | if (!ics) |
708 | goto bail; | 694 | goto bail; |
695 | |||
709 | state = &ics->irq_state[src]; | 696 | state = &ics->irq_state[src]; |
710 | 697 | ||
711 | /* Still asserted, resend it */ | 698 | if (state->lsi) |
712 | if (state->asserted) { | 699 | pq_new = state->pq_state; |
713 | icp->n_reject++; | 700 | else |
714 | icp_rm_deliver_irq(xics, icp, irq); | 701 | do { |
715 | } | 702 | pq_old = state->pq_state; |
703 | pq_new = pq_old >> 1; | ||
704 | } while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old); | ||
705 | |||
706 | if (pq_new & PQ_PRESENTED) | ||
707 | icp_rm_deliver_irq(xics, NULL, irq, false); | ||
716 | 708 | ||
717 | if (!hlist_empty(&vcpu->kvm->irq_ack_notifier_list)) { | 709 | if (!hlist_empty(&vcpu->kvm->irq_ack_notifier_list)) { |
718 | icp->rm_action |= XICS_RM_NOTIFY_EOI; | 710 | icp->rm_action |= XICS_RM_NOTIFY_EOI; |
@@ -733,10 +725,43 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) | |||
733 | state->intr_cpu = -1; | 725 | state->intr_cpu = -1; |
734 | } | 726 | } |
735 | } | 727 | } |
728 | |||
736 | bail: | 729 | bail: |
737 | return check_too_hard(xics, icp); | 730 | return check_too_hard(xics, icp); |
738 | } | 731 | } |
739 | 732 | ||
733 | int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) | ||
734 | { | ||
735 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | ||
736 | struct kvmppc_icp *icp = vcpu->arch.icp; | ||
737 | u32 irq = xirr & 0x00ffffff; | ||
738 | |||
739 | if (!xics || !xics->real_mode) | ||
740 | return H_TOO_HARD; | ||
741 | |||
742 | /* | ||
743 | * ICP State: EOI | ||
744 | * | ||
745 | * Note: If EOI is incorrectly used by SW to lower the CPPR | ||
746 | * value (ie more favored), we do not check for rejection of | ||
747 | * a pending interrupt, this is a SW error and PAPR specifies | ||
748 | * that we don't have to deal with it. | ||
749 | * | ||
750 | * The sending of an EOI to the ICS is handled after the | ||
751 | * CPPR update | ||
752 | * | ||
753 | * ICP State: Down_CPPR which we handle | ||
754 | * in a separate function as it's shared with H_CPPR. | ||
755 | */ | ||
756 | icp_rm_down_cppr(xics, icp, xirr >> 24); | ||
757 | |||
758 | /* IPIs have no EOI */ | ||
759 | if (irq == XICS_IPI) | ||
760 | return check_too_hard(xics, icp); | ||
761 | |||
762 | return ics_rm_eoi(vcpu, irq); | ||
763 | } | ||
764 | |||
740 | unsigned long eoi_rc; | 765 | unsigned long eoi_rc; |
741 | 766 | ||
742 | static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again) | 767 | static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again) |
@@ -823,14 +848,33 @@ long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, | |||
823 | { | 848 | { |
824 | struct kvmppc_xics *xics; | 849 | struct kvmppc_xics *xics; |
825 | struct kvmppc_icp *icp; | 850 | struct kvmppc_icp *icp; |
851 | struct kvmppc_ics *ics; | ||
852 | struct ics_irq_state *state; | ||
826 | u32 irq; | 853 | u32 irq; |
854 | u16 src; | ||
855 | u32 pq_old, pq_new; | ||
827 | 856 | ||
828 | irq = irq_map->v_hwirq; | 857 | irq = irq_map->v_hwirq; |
829 | xics = vcpu->kvm->arch.xics; | 858 | xics = vcpu->kvm->arch.xics; |
830 | icp = vcpu->arch.icp; | 859 | icp = vcpu->arch.icp; |
831 | 860 | ||
832 | kvmppc_rm_handle_irq_desc(irq_map->desc); | 861 | kvmppc_rm_handle_irq_desc(irq_map->desc); |
833 | icp_rm_deliver_irq(xics, icp, irq); | 862 | |
863 | ics = kvmppc_xics_find_ics(xics, irq, &src); | ||
864 | if (!ics) | ||
865 | return 2; | ||
866 | |||
867 | state = &ics->irq_state[src]; | ||
868 | |||
869 | /* only MSIs register bypass producers, so it must be MSI here */ | ||
870 | do { | ||
871 | pq_old = state->pq_state; | ||
872 | pq_new = ((pq_old << 1) & 3) | PQ_PRESENTED; | ||
873 | } while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old); | ||
874 | |||
875 | /* Test P=1, Q=0, this is the only case where we present */ | ||
876 | if (pq_new == PQ_PRESENTED) | ||
877 | icp_rm_deliver_irq(xics, icp, irq, false); | ||
834 | 878 | ||
835 | /* EOI the interrupt */ | 879 | /* EOI the interrupt */ |
836 | icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr, | 880 | icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr, |
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 1482961ceb4d..d4dfc0ca2a44 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c | |||
@@ -902,6 +902,69 @@ static void kvmppc_clear_debug(struct kvm_vcpu *vcpu) | |||
902 | } | 902 | } |
903 | } | 903 | } |
904 | 904 | ||
905 | static int kvmppc_exit_pr_progint(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
906 | unsigned int exit_nr) | ||
907 | { | ||
908 | enum emulation_result er; | ||
909 | ulong flags; | ||
910 | u32 last_inst; | ||
911 | int emul, r; | ||
912 | |||
913 | /* | ||
914 | * shadow_srr1 only contains valid flags if we came here via a program | ||
915 | * exception. The other exceptions (emulation assist, FP unavailable, | ||
916 | * etc.) do not provide flags in SRR1, so use an illegal-instruction | ||
917 | * exception when injecting a program interrupt into the guest. | ||
918 | */ | ||
919 | if (exit_nr == BOOK3S_INTERRUPT_PROGRAM) | ||
920 | flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; | ||
921 | else | ||
922 | flags = SRR1_PROGILL; | ||
923 | |||
924 | emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); | ||
925 | if (emul != EMULATE_DONE) | ||
926 | return RESUME_GUEST; | ||
927 | |||
928 | if (kvmppc_get_msr(vcpu) & MSR_PR) { | ||
929 | #ifdef EXIT_DEBUG | ||
930 | pr_info("Userspace triggered 0x700 exception at\n 0x%lx (0x%x)\n", | ||
931 | kvmppc_get_pc(vcpu), last_inst); | ||
932 | #endif | ||
933 | if ((last_inst & 0xff0007ff) != (INS_DCBZ & 0xfffffff7)) { | ||
934 | kvmppc_core_queue_program(vcpu, flags); | ||
935 | return RESUME_GUEST; | ||
936 | } | ||
937 | } | ||
938 | |||
939 | vcpu->stat.emulated_inst_exits++; | ||
940 | er = kvmppc_emulate_instruction(run, vcpu); | ||
941 | switch (er) { | ||
942 | case EMULATE_DONE: | ||
943 | r = RESUME_GUEST_NV; | ||
944 | break; | ||
945 | case EMULATE_AGAIN: | ||
946 | r = RESUME_GUEST; | ||
947 | break; | ||
948 | case EMULATE_FAIL: | ||
949 | pr_crit("%s: emulation at %lx failed (%08x)\n", | ||
950 | __func__, kvmppc_get_pc(vcpu), last_inst); | ||
951 | kvmppc_core_queue_program(vcpu, flags); | ||
952 | r = RESUME_GUEST; | ||
953 | break; | ||
954 | case EMULATE_DO_MMIO: | ||
955 | run->exit_reason = KVM_EXIT_MMIO; | ||
956 | r = RESUME_HOST_NV; | ||
957 | break; | ||
958 | case EMULATE_EXIT_USER: | ||
959 | r = RESUME_HOST_NV; | ||
960 | break; | ||
961 | default: | ||
962 | BUG(); | ||
963 | } | ||
964 | |||
965 | return r; | ||
966 | } | ||
967 | |||
905 | int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, | 968 | int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, |
906 | unsigned int exit_nr) | 969 | unsigned int exit_nr) |
907 | { | 970 | { |
@@ -1044,71 +1107,8 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
1044 | break; | 1107 | break; |
1045 | case BOOK3S_INTERRUPT_PROGRAM: | 1108 | case BOOK3S_INTERRUPT_PROGRAM: |
1046 | case BOOK3S_INTERRUPT_H_EMUL_ASSIST: | 1109 | case BOOK3S_INTERRUPT_H_EMUL_ASSIST: |
1047 | { | 1110 | r = kvmppc_exit_pr_progint(run, vcpu, exit_nr); |
1048 | enum emulation_result er; | ||
1049 | ulong flags; | ||
1050 | u32 last_inst; | ||
1051 | int emul; | ||
1052 | |||
1053 | program_interrupt: | ||
1054 | /* | ||
1055 | * shadow_srr1 only contains valid flags if we came here via | ||
1056 | * a program exception. The other exceptions (emulation assist, | ||
1057 | * FP unavailable, etc.) do not provide flags in SRR1, so use | ||
1058 | * an illegal-instruction exception when injecting a program | ||
1059 | * interrupt into the guest. | ||
1060 | */ | ||
1061 | if (exit_nr == BOOK3S_INTERRUPT_PROGRAM) | ||
1062 | flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; | ||
1063 | else | ||
1064 | flags = SRR1_PROGILL; | ||
1065 | |||
1066 | emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); | ||
1067 | if (emul != EMULATE_DONE) { | ||
1068 | r = RESUME_GUEST; | ||
1069 | break; | ||
1070 | } | ||
1071 | |||
1072 | if (kvmppc_get_msr(vcpu) & MSR_PR) { | ||
1073 | #ifdef EXIT_DEBUG | ||
1074 | pr_info("Userspace triggered 0x700 exception at\n 0x%lx (0x%x)\n", | ||
1075 | kvmppc_get_pc(vcpu), last_inst); | ||
1076 | #endif | ||
1077 | if ((last_inst & 0xff0007ff) != | ||
1078 | (INS_DCBZ & 0xfffffff7)) { | ||
1079 | kvmppc_core_queue_program(vcpu, flags); | ||
1080 | r = RESUME_GUEST; | ||
1081 | break; | ||
1082 | } | ||
1083 | } | ||
1084 | |||
1085 | vcpu->stat.emulated_inst_exits++; | ||
1086 | er = kvmppc_emulate_instruction(run, vcpu); | ||
1087 | switch (er) { | ||
1088 | case EMULATE_DONE: | ||
1089 | r = RESUME_GUEST_NV; | ||
1090 | break; | ||
1091 | case EMULATE_AGAIN: | ||
1092 | r = RESUME_GUEST; | ||
1093 | break; | ||
1094 | case EMULATE_FAIL: | ||
1095 | printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", | ||
1096 | __func__, kvmppc_get_pc(vcpu), last_inst); | ||
1097 | kvmppc_core_queue_program(vcpu, flags); | ||
1098 | r = RESUME_GUEST; | ||
1099 | break; | ||
1100 | case EMULATE_DO_MMIO: | ||
1101 | run->exit_reason = KVM_EXIT_MMIO; | ||
1102 | r = RESUME_HOST_NV; | ||
1103 | break; | ||
1104 | case EMULATE_EXIT_USER: | ||
1105 | r = RESUME_HOST_NV; | ||
1106 | break; | ||
1107 | default: | ||
1108 | BUG(); | ||
1109 | } | ||
1110 | break; | 1111 | break; |
1111 | } | ||
1112 | case BOOK3S_INTERRUPT_SYSCALL: | 1112 | case BOOK3S_INTERRUPT_SYSCALL: |
1113 | { | 1113 | { |
1114 | u32 last_sc; | 1114 | u32 last_sc; |
@@ -1185,7 +1185,7 @@ program_interrupt: | |||
1185 | emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, | 1185 | emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, |
1186 | &last_inst); | 1186 | &last_inst); |
1187 | if (emul == EMULATE_DONE) | 1187 | if (emul == EMULATE_DONE) |
1188 | goto program_interrupt; | 1188 | r = kvmppc_exit_pr_progint(run, vcpu, exit_nr); |
1189 | else | 1189 | else |
1190 | r = RESUME_GUEST; | 1190 | r = RESUME_GUEST; |
1191 | 1191 | ||
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 20dff102a06f..e48803e2918d 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c | |||
@@ -63,7 +63,7 @@ | |||
63 | /* -- ICS routines -- */ | 63 | /* -- ICS routines -- */ |
64 | 64 | ||
65 | static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, | 65 | static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, |
66 | u32 new_irq); | 66 | u32 new_irq, bool check_resend); |
67 | 67 | ||
68 | /* | 68 | /* |
69 | * Return value ideally indicates how the interrupt was handled, but no | 69 | * Return value ideally indicates how the interrupt was handled, but no |
@@ -75,6 +75,7 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level) | |||
75 | struct ics_irq_state *state; | 75 | struct ics_irq_state *state; |
76 | struct kvmppc_ics *ics; | 76 | struct kvmppc_ics *ics; |
77 | u16 src; | 77 | u16 src; |
78 | u32 pq_old, pq_new; | ||
78 | 79 | ||
79 | XICS_DBG("ics deliver %#x (level: %d)\n", irq, level); | 80 | XICS_DBG("ics deliver %#x (level: %d)\n", irq, level); |
80 | 81 | ||
@@ -87,25 +88,41 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level) | |||
87 | if (!state->exists) | 88 | if (!state->exists) |
88 | return -EINVAL; | 89 | return -EINVAL; |
89 | 90 | ||
91 | if (level == KVM_INTERRUPT_SET_LEVEL || level == KVM_INTERRUPT_SET) | ||
92 | level = 1; | ||
93 | else if (level == KVM_INTERRUPT_UNSET) | ||
94 | level = 0; | ||
90 | /* | 95 | /* |
91 | * We set state->asserted locklessly. This should be fine as | 96 | * Take other values the same as 1, consistent with original code. |
92 | * we are the only setter, thus concurrent access is undefined | 97 | * maybe WARN here? |
93 | * to begin with. | ||
94 | */ | 98 | */ |
95 | if ((level == 1 && state->lsi) || level == KVM_INTERRUPT_SET_LEVEL) | 99 | |
96 | state->asserted = 1; | 100 | if (!state->lsi && level == 0) /* noop for MSI */ |
97 | else if (level == 0 || level == KVM_INTERRUPT_UNSET) { | ||
98 | state->asserted = 0; | ||
99 | return 0; | 101 | return 0; |
100 | } | 102 | |
103 | do { | ||
104 | pq_old = state->pq_state; | ||
105 | if (state->lsi) { | ||
106 | if (level) { | ||
107 | if (pq_old & PQ_PRESENTED) | ||
108 | /* Setting already set LSI ... */ | ||
109 | return 0; | ||
110 | |||
111 | pq_new = PQ_PRESENTED; | ||
112 | } else | ||
113 | pq_new = 0; | ||
114 | } else | ||
115 | pq_new = ((pq_old << 1) & 3) | PQ_PRESENTED; | ||
116 | } while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old); | ||
117 | |||
118 | /* Test P=1, Q=0, this is the only case where we present */ | ||
119 | if (pq_new == PQ_PRESENTED) | ||
120 | icp_deliver_irq(xics, NULL, irq, false); | ||
101 | 121 | ||
102 | /* Record which CPU this arrived on for passed-through interrupts */ | 122 | /* Record which CPU this arrived on for passed-through interrupts */ |
103 | if (state->host_irq) | 123 | if (state->host_irq) |
104 | state->intr_cpu = raw_smp_processor_id(); | 124 | state->intr_cpu = raw_smp_processor_id(); |
105 | 125 | ||
106 | /* Attempt delivery */ | ||
107 | icp_deliver_irq(xics, NULL, irq); | ||
108 | |||
109 | return 0; | 126 | return 0; |
110 | } | 127 | } |
111 | 128 | ||
@@ -114,29 +131,14 @@ static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics, | |||
114 | { | 131 | { |
115 | int i; | 132 | int i; |
116 | 133 | ||
117 | unsigned long flags; | ||
118 | |||
119 | local_irq_save(flags); | ||
120 | arch_spin_lock(&ics->lock); | ||
121 | |||
122 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { | 134 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { |
123 | struct ics_irq_state *state = &ics->irq_state[i]; | 135 | struct ics_irq_state *state = &ics->irq_state[i]; |
124 | 136 | if (state->resend) { | |
125 | if (!state->resend) | 137 | XICS_DBG("resend %#x prio %#x\n", state->number, |
126 | continue; | 138 | state->priority); |
127 | 139 | icp_deliver_irq(xics, icp, state->number, true); | |
128 | XICS_DBG("resend %#x prio %#x\n", state->number, | 140 | } |
129 | state->priority); | ||
130 | |||
131 | arch_spin_unlock(&ics->lock); | ||
132 | local_irq_restore(flags); | ||
133 | icp_deliver_irq(xics, icp, state->number); | ||
134 | local_irq_save(flags); | ||
135 | arch_spin_lock(&ics->lock); | ||
136 | } | 141 | } |
137 | |||
138 | arch_spin_unlock(&ics->lock); | ||
139 | local_irq_restore(flags); | ||
140 | } | 142 | } |
141 | 143 | ||
142 | static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics, | 144 | static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics, |
@@ -155,6 +157,7 @@ static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics, | |||
155 | deliver = false; | 157 | deliver = false; |
156 | if ((state->masked_pending || state->resend) && priority != MASKED) { | 158 | if ((state->masked_pending || state->resend) && priority != MASKED) { |
157 | state->masked_pending = 0; | 159 | state->masked_pending = 0; |
160 | state->resend = 0; | ||
158 | deliver = true; | 161 | deliver = true; |
159 | } | 162 | } |
160 | 163 | ||
@@ -189,7 +192,7 @@ int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority) | |||
189 | state->masked_pending, state->resend); | 192 | state->masked_pending, state->resend); |
190 | 193 | ||
191 | if (write_xive(xics, ics, state, server, priority, priority)) | 194 | if (write_xive(xics, ics, state, server, priority, priority)) |
192 | icp_deliver_irq(xics, icp, irq); | 195 | icp_deliver_irq(xics, icp, irq, false); |
193 | 196 | ||
194 | return 0; | 197 | return 0; |
195 | } | 198 | } |
@@ -242,7 +245,7 @@ int kvmppc_xics_int_on(struct kvm *kvm, u32 irq) | |||
242 | 245 | ||
243 | if (write_xive(xics, ics, state, state->server, state->saved_priority, | 246 | if (write_xive(xics, ics, state, state->server, state->saved_priority, |
244 | state->saved_priority)) | 247 | state->saved_priority)) |
245 | icp_deliver_irq(xics, icp, irq); | 248 | icp_deliver_irq(xics, icp, irq, false); |
246 | 249 | ||
247 | return 0; | 250 | return 0; |
248 | } | 251 | } |
@@ -376,7 +379,7 @@ static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority, | |||
376 | } | 379 | } |
377 | 380 | ||
378 | static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, | 381 | static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, |
379 | u32 new_irq) | 382 | u32 new_irq, bool check_resend) |
380 | { | 383 | { |
381 | struct ics_irq_state *state; | 384 | struct ics_irq_state *state; |
382 | struct kvmppc_ics *ics; | 385 | struct kvmppc_ics *ics; |
@@ -422,6 +425,10 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, | |||
422 | } | 425 | } |
423 | } | 426 | } |
424 | 427 | ||
428 | if (check_resend) | ||
429 | if (!state->resend) | ||
430 | goto out; | ||
431 | |||
425 | /* Clear the resend bit of that interrupt */ | 432 | /* Clear the resend bit of that interrupt */ |
426 | state->resend = 0; | 433 | state->resend = 0; |
427 | 434 | ||
@@ -470,6 +477,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, | |||
470 | arch_spin_unlock(&ics->lock); | 477 | arch_spin_unlock(&ics->lock); |
471 | local_irq_restore(flags); | 478 | local_irq_restore(flags); |
472 | new_irq = reject; | 479 | new_irq = reject; |
480 | check_resend = 0; | ||
473 | goto again; | 481 | goto again; |
474 | } | 482 | } |
475 | } else { | 483 | } else { |
@@ -477,10 +485,16 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, | |||
477 | * We failed to deliver the interrupt we need to set the | 485 | * We failed to deliver the interrupt we need to set the |
478 | * resend map bit and mark the ICS state as needing a resend | 486 | * resend map bit and mark the ICS state as needing a resend |
479 | */ | 487 | */ |
480 | set_bit(ics->icsid, icp->resend_map); | ||
481 | state->resend = 1; | 488 | state->resend = 1; |
482 | 489 | ||
483 | /* | 490 | /* |
491 | * Make sure when checking resend, we don't miss the resend | ||
492 | * if resend_map bit is seen and cleared. | ||
493 | */ | ||
494 | smp_wmb(); | ||
495 | set_bit(ics->icsid, icp->resend_map); | ||
496 | |||
497 | /* | ||
484 | * If the need_resend flag got cleared in the ICP some time | 498 | * If the need_resend flag got cleared in the ICP some time |
485 | * between icp_try_to_deliver() atomic update and now, then | 499 | * between icp_try_to_deliver() atomic update and now, then |
486 | * we know it might have missed the resend_map bit. So we | 500 | * we know it might have missed the resend_map bit. So we |
@@ -488,8 +502,10 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, | |||
488 | */ | 502 | */ |
489 | smp_mb(); | 503 | smp_mb(); |
490 | if (!icp->state.need_resend) { | 504 | if (!icp->state.need_resend) { |
505 | state->resend = 0; | ||
491 | arch_spin_unlock(&ics->lock); | 506 | arch_spin_unlock(&ics->lock); |
492 | local_irq_restore(flags); | 507 | local_irq_restore(flags); |
508 | check_resend = 0; | ||
493 | goto again; | 509 | goto again; |
494 | } | 510 | } |
495 | } | 511 | } |
@@ -681,7 +697,7 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, | |||
681 | 697 | ||
682 | /* Handle reject */ | 698 | /* Handle reject */ |
683 | if (reject && reject != XICS_IPI) | 699 | if (reject && reject != XICS_IPI) |
684 | icp_deliver_irq(xics, icp, reject); | 700 | icp_deliver_irq(xics, icp, reject, false); |
685 | 701 | ||
686 | /* Handle resend */ | 702 | /* Handle resend */ |
687 | if (resend) | 703 | if (resend) |
@@ -761,17 +777,54 @@ static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) | |||
761 | * attempt (see comments in icp_deliver_irq). | 777 | * attempt (see comments in icp_deliver_irq). |
762 | */ | 778 | */ |
763 | if (reject && reject != XICS_IPI) | 779 | if (reject && reject != XICS_IPI) |
764 | icp_deliver_irq(xics, icp, reject); | 780 | icp_deliver_irq(xics, icp, reject, false); |
765 | } | 781 | } |
766 | 782 | ||
767 | static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) | 783 | static int ics_eoi(struct kvm_vcpu *vcpu, u32 irq) |
768 | { | 784 | { |
769 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | 785 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; |
770 | struct kvmppc_icp *icp = vcpu->arch.icp; | 786 | struct kvmppc_icp *icp = vcpu->arch.icp; |
771 | struct kvmppc_ics *ics; | 787 | struct kvmppc_ics *ics; |
772 | struct ics_irq_state *state; | 788 | struct ics_irq_state *state; |
773 | u32 irq = xirr & 0x00ffffff; | ||
774 | u16 src; | 789 | u16 src; |
790 | u32 pq_old, pq_new; | ||
791 | |||
792 | /* | ||
793 | * ICS EOI handling: For LSI, if P bit is still set, we need to | ||
794 | * resend it. | ||
795 | * | ||
796 | * For MSI, we move Q bit into P (and clear Q). If it is set, | ||
797 | * resend it. | ||
798 | */ | ||
799 | |||
800 | ics = kvmppc_xics_find_ics(xics, irq, &src); | ||
801 | if (!ics) { | ||
802 | XICS_DBG("ios_eoi: IRQ 0x%06x not found !\n", irq); | ||
803 | return H_PARAMETER; | ||
804 | } | ||
805 | state = &ics->irq_state[src]; | ||
806 | |||
807 | if (state->lsi) | ||
808 | pq_new = state->pq_state; | ||
809 | else | ||
810 | do { | ||
811 | pq_old = state->pq_state; | ||
812 | pq_new = pq_old >> 1; | ||
813 | } while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old); | ||
814 | |||
815 | if (pq_new & PQ_PRESENTED) | ||
816 | icp_deliver_irq(xics, icp, irq, false); | ||
817 | |||
818 | kvm_notify_acked_irq(vcpu->kvm, 0, irq); | ||
819 | |||
820 | return H_SUCCESS; | ||
821 | } | ||
822 | |||
823 | static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) | ||
824 | { | ||
825 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | ||
826 | struct kvmppc_icp *icp = vcpu->arch.icp; | ||
827 | u32 irq = xirr & 0x00ffffff; | ||
775 | 828 | ||
776 | XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr); | 829 | XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr); |
777 | 830 | ||
@@ -794,26 +847,8 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) | |||
794 | /* IPIs have no EOI */ | 847 | /* IPIs have no EOI */ |
795 | if (irq == XICS_IPI) | 848 | if (irq == XICS_IPI) |
796 | return H_SUCCESS; | 849 | return H_SUCCESS; |
797 | /* | ||
798 | * EOI handling: If the interrupt is still asserted, we need to | ||
799 | * resend it. We can take a lockless "peek" at the ICS state here. | ||
800 | * | ||
801 | * "Message" interrupts will never have "asserted" set | ||
802 | */ | ||
803 | ics = kvmppc_xics_find_ics(xics, irq, &src); | ||
804 | if (!ics) { | ||
805 | XICS_DBG("h_eoi: IRQ 0x%06x not found !\n", irq); | ||
806 | return H_PARAMETER; | ||
807 | } | ||
808 | state = &ics->irq_state[src]; | ||
809 | 850 | ||
810 | /* Still asserted, resend it */ | 851 | return ics_eoi(vcpu, irq); |
811 | if (state->asserted) | ||
812 | icp_deliver_irq(xics, icp, irq); | ||
813 | |||
814 | kvm_notify_acked_irq(vcpu->kvm, 0, irq); | ||
815 | |||
816 | return H_SUCCESS; | ||
817 | } | 852 | } |
818 | 853 | ||
819 | int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) | 854 | int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) |
@@ -832,10 +867,6 @@ int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) | |||
832 | icp->n_rm_check_resend++; | 867 | icp->n_rm_check_resend++; |
833 | icp_check_resend(xics, icp->rm_resend_icp); | 868 | icp_check_resend(xics, icp->rm_resend_icp); |
834 | } | 869 | } |
835 | if (icp->rm_action & XICS_RM_REJECT) { | ||
836 | icp->n_rm_reject++; | ||
837 | icp_deliver_irq(xics, icp, icp->rm_reject); | ||
838 | } | ||
839 | if (icp->rm_action & XICS_RM_NOTIFY_EOI) { | 870 | if (icp->rm_action & XICS_RM_NOTIFY_EOI) { |
840 | icp->n_rm_notify_eoi++; | 871 | icp->n_rm_notify_eoi++; |
841 | kvm_notify_acked_irq(vcpu->kvm, 0, icp->rm_eoied_irq); | 872 | kvm_notify_acked_irq(vcpu->kvm, 0, icp->rm_eoied_irq); |
@@ -920,7 +951,7 @@ static int xics_debug_show(struct seq_file *m, void *private) | |||
920 | int icsid, i; | 951 | int icsid, i; |
921 | unsigned long flags; | 952 | unsigned long flags; |
922 | unsigned long t_rm_kick_vcpu, t_rm_check_resend; | 953 | unsigned long t_rm_kick_vcpu, t_rm_check_resend; |
923 | unsigned long t_rm_reject, t_rm_notify_eoi; | 954 | unsigned long t_rm_notify_eoi; |
924 | unsigned long t_reject, t_check_resend; | 955 | unsigned long t_reject, t_check_resend; |
925 | 956 | ||
926 | if (!kvm) | 957 | if (!kvm) |
@@ -929,7 +960,6 @@ static int xics_debug_show(struct seq_file *m, void *private) | |||
929 | t_rm_kick_vcpu = 0; | 960 | t_rm_kick_vcpu = 0; |
930 | t_rm_notify_eoi = 0; | 961 | t_rm_notify_eoi = 0; |
931 | t_rm_check_resend = 0; | 962 | t_rm_check_resend = 0; |
932 | t_rm_reject = 0; | ||
933 | t_check_resend = 0; | 963 | t_check_resend = 0; |
934 | t_reject = 0; | 964 | t_reject = 0; |
935 | 965 | ||
@@ -952,14 +982,13 @@ static int xics_debug_show(struct seq_file *m, void *private) | |||
952 | t_rm_kick_vcpu += icp->n_rm_kick_vcpu; | 982 | t_rm_kick_vcpu += icp->n_rm_kick_vcpu; |
953 | t_rm_notify_eoi += icp->n_rm_notify_eoi; | 983 | t_rm_notify_eoi += icp->n_rm_notify_eoi; |
954 | t_rm_check_resend += icp->n_rm_check_resend; | 984 | t_rm_check_resend += icp->n_rm_check_resend; |
955 | t_rm_reject += icp->n_rm_reject; | ||
956 | t_check_resend += icp->n_check_resend; | 985 | t_check_resend += icp->n_check_resend; |
957 | t_reject += icp->n_reject; | 986 | t_reject += icp->n_reject; |
958 | } | 987 | } |
959 | 988 | ||
960 | seq_printf(m, "ICP Guest->Host totals: kick_vcpu=%lu check_resend=%lu reject=%lu notify_eoi=%lu\n", | 989 | seq_printf(m, "ICP Guest->Host totals: kick_vcpu=%lu check_resend=%lu notify_eoi=%lu\n", |
961 | t_rm_kick_vcpu, t_rm_check_resend, | 990 | t_rm_kick_vcpu, t_rm_check_resend, |
962 | t_rm_reject, t_rm_notify_eoi); | 991 | t_rm_notify_eoi); |
963 | seq_printf(m, "ICP Real Mode totals: check_resend=%lu resend=%lu\n", | 992 | seq_printf(m, "ICP Real Mode totals: check_resend=%lu resend=%lu\n", |
964 | t_check_resend, t_reject); | 993 | t_check_resend, t_reject); |
965 | for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) { | 994 | for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) { |
@@ -977,9 +1006,9 @@ static int xics_debug_show(struct seq_file *m, void *private) | |||
977 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { | 1006 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { |
978 | struct ics_irq_state *irq = &ics->irq_state[i]; | 1007 | struct ics_irq_state *irq = &ics->irq_state[i]; |
979 | 1008 | ||
980 | seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x asserted %d resend %d masked pending %d\n", | 1009 | seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x pq_state %d resend %d masked pending %d\n", |
981 | irq->number, irq->server, irq->priority, | 1010 | irq->number, irq->server, irq->priority, |
982 | irq->saved_priority, irq->asserted, | 1011 | irq->saved_priority, irq->pq_state, |
983 | irq->resend, irq->masked_pending); | 1012 | irq->resend, irq->masked_pending); |
984 | 1013 | ||
985 | } | 1014 | } |
@@ -1198,10 +1227,17 @@ static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr) | |||
1198 | val |= prio << KVM_XICS_PRIORITY_SHIFT; | 1227 | val |= prio << KVM_XICS_PRIORITY_SHIFT; |
1199 | if (irqp->lsi) { | 1228 | if (irqp->lsi) { |
1200 | val |= KVM_XICS_LEVEL_SENSITIVE; | 1229 | val |= KVM_XICS_LEVEL_SENSITIVE; |
1201 | if (irqp->asserted) | 1230 | if (irqp->pq_state & PQ_PRESENTED) |
1202 | val |= KVM_XICS_PENDING; | 1231 | val |= KVM_XICS_PENDING; |
1203 | } else if (irqp->masked_pending || irqp->resend) | 1232 | } else if (irqp->masked_pending || irqp->resend) |
1204 | val |= KVM_XICS_PENDING; | 1233 | val |= KVM_XICS_PENDING; |
1234 | |||
1235 | if (irqp->pq_state & PQ_PRESENTED) | ||
1236 | val |= KVM_XICS_PRESENTED; | ||
1237 | |||
1238 | if (irqp->pq_state & PQ_QUEUED) | ||
1239 | val |= KVM_XICS_QUEUED; | ||
1240 | |||
1205 | ret = 0; | 1241 | ret = 0; |
1206 | } | 1242 | } |
1207 | arch_spin_unlock(&ics->lock); | 1243 | arch_spin_unlock(&ics->lock); |
@@ -1253,18 +1289,20 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr) | |||
1253 | irqp->resend = 0; | 1289 | irqp->resend = 0; |
1254 | irqp->masked_pending = 0; | 1290 | irqp->masked_pending = 0; |
1255 | irqp->lsi = 0; | 1291 | irqp->lsi = 0; |
1256 | irqp->asserted = 0; | 1292 | irqp->pq_state = 0; |
1257 | if (val & KVM_XICS_LEVEL_SENSITIVE) { | 1293 | if (val & KVM_XICS_LEVEL_SENSITIVE) |
1258 | irqp->lsi = 1; | 1294 | irqp->lsi = 1; |
1259 | if (val & KVM_XICS_PENDING) | 1295 | /* If PENDING, set P in case P is not saved because of old code */ |
1260 | irqp->asserted = 1; | 1296 | if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING) |
1261 | } | 1297 | irqp->pq_state |= PQ_PRESENTED; |
1298 | if (val & KVM_XICS_QUEUED) | ||
1299 | irqp->pq_state |= PQ_QUEUED; | ||
1262 | irqp->exists = 1; | 1300 | irqp->exists = 1; |
1263 | arch_spin_unlock(&ics->lock); | 1301 | arch_spin_unlock(&ics->lock); |
1264 | local_irq_restore(flags); | 1302 | local_irq_restore(flags); |
1265 | 1303 | ||
1266 | if (val & KVM_XICS_PENDING) | 1304 | if (val & KVM_XICS_PENDING) |
1267 | icp_deliver_irq(xics, NULL, irqp->number); | 1305 | icp_deliver_irq(xics, NULL, irqp->number, false); |
1268 | 1306 | ||
1269 | return 0; | 1307 | return 0; |
1270 | } | 1308 | } |
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h index 2a50320b55ca..ec5474cf70c6 100644 --- a/arch/powerpc/kvm/book3s_xics.h +++ b/arch/powerpc/kvm/book3s_xics.h | |||
@@ -31,16 +31,19 @@ | |||
31 | /* Priority value to use for disabling an interrupt */ | 31 | /* Priority value to use for disabling an interrupt */ |
32 | #define MASKED 0xff | 32 | #define MASKED 0xff |
33 | 33 | ||
34 | #define PQ_PRESENTED 1 | ||
35 | #define PQ_QUEUED 2 | ||
36 | |||
34 | /* State for one irq source */ | 37 | /* State for one irq source */ |
35 | struct ics_irq_state { | 38 | struct ics_irq_state { |
36 | u32 number; | 39 | u32 number; |
37 | u32 server; | 40 | u32 server; |
41 | u32 pq_state; | ||
38 | u8 priority; | 42 | u8 priority; |
39 | u8 saved_priority; | 43 | u8 saved_priority; |
40 | u8 resend; | 44 | u8 resend; |
41 | u8 masked_pending; | 45 | u8 masked_pending; |
42 | u8 lsi; /* level-sensitive interrupt */ | 46 | u8 lsi; /* level-sensitive interrupt */ |
43 | u8 asserted; /* Only for LSI */ | ||
44 | u8 exists; | 47 | u8 exists; |
45 | int intr_cpu; | 48 | int intr_cpu; |
46 | u32 host_irq; | 49 | u32 host_irq; |
@@ -73,7 +76,6 @@ struct kvmppc_icp { | |||
73 | */ | 76 | */ |
74 | #define XICS_RM_KICK_VCPU 0x1 | 77 | #define XICS_RM_KICK_VCPU 0x1 |
75 | #define XICS_RM_CHECK_RESEND 0x2 | 78 | #define XICS_RM_CHECK_RESEND 0x2 |
76 | #define XICS_RM_REJECT 0x4 | ||
77 | #define XICS_RM_NOTIFY_EOI 0x8 | 79 | #define XICS_RM_NOTIFY_EOI 0x8 |
78 | u32 rm_action; | 80 | u32 rm_action; |
79 | struct kvm_vcpu *rm_kick_target; | 81 | struct kvm_vcpu *rm_kick_target; |
@@ -84,7 +86,6 @@ struct kvmppc_icp { | |||
84 | /* Counters for each reason we exited real mode */ | 86 | /* Counters for each reason we exited real mode */ |
85 | unsigned long n_rm_kick_vcpu; | 87 | unsigned long n_rm_kick_vcpu; |
86 | unsigned long n_rm_check_resend; | 88 | unsigned long n_rm_check_resend; |
87 | unsigned long n_rm_reject; | ||
88 | unsigned long n_rm_notify_eoi; | 89 | unsigned long n_rm_notify_eoi; |
89 | /* Counters for handling ICP processing in real mode */ | 90 | /* Counters for handling ICP processing in real mode */ |
90 | unsigned long n_check_resend; | 91 | unsigned long n_check_resend; |
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 40a5b2d75ed1..2b38d824e9e5 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -511,6 +511,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
511 | case KVM_CAP_ONE_REG: | 511 | case KVM_CAP_ONE_REG: |
512 | case KVM_CAP_IOEVENTFD: | 512 | case KVM_CAP_IOEVENTFD: |
513 | case KVM_CAP_DEVICE_CTRL: | 513 | case KVM_CAP_DEVICE_CTRL: |
514 | case KVM_CAP_IMMEDIATE_EXIT: | ||
514 | r = 1; | 515 | r = 1; |
515 | break; | 516 | break; |
516 | case KVM_CAP_PPC_PAIRED_SINGLES: | 517 | case KVM_CAP_PPC_PAIRED_SINGLES: |
@@ -612,6 +613,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
612 | case KVM_CAP_SPAPR_MULTITCE: | 613 | case KVM_CAP_SPAPR_MULTITCE: |
613 | r = 1; | 614 | r = 1; |
614 | break; | 615 | break; |
616 | case KVM_CAP_SPAPR_RESIZE_HPT: | ||
617 | /* Disable this on POWER9 until code handles new HPTE format */ | ||
618 | r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300); | ||
619 | break; | ||
615 | #endif | 620 | #endif |
616 | case KVM_CAP_PPC_HTM: | 621 | case KVM_CAP_PPC_HTM: |
617 | r = cpu_has_feature(CPU_FTR_TM_COMP) && | 622 | r = cpu_has_feature(CPU_FTR_TM_COMP) && |
@@ -1114,7 +1119,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
1114 | #endif | 1119 | #endif |
1115 | } | 1120 | } |
1116 | 1121 | ||
1117 | r = kvmppc_vcpu_run(run, vcpu); | 1122 | if (run->immediate_exit) |
1123 | r = -EINTR; | ||
1124 | else | ||
1125 | r = kvmppc_vcpu_run(run, vcpu); | ||
1118 | 1126 | ||
1119 | if (vcpu->sigset_active) | 1127 | if (vcpu->sigset_active) |
1120 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 1128 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 4aa8a7e2a1da..4492c9363178 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c | |||
@@ -373,7 +373,7 @@ void ipte_unlock(struct kvm_vcpu *vcpu) | |||
373 | ipte_unlock_simple(vcpu); | 373 | ipte_unlock_simple(vcpu); |
374 | } | 374 | } |
375 | 375 | ||
376 | static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar, | 376 | static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar, |
377 | enum gacc_mode mode) | 377 | enum gacc_mode mode) |
378 | { | 378 | { |
379 | union alet alet; | 379 | union alet alet; |
@@ -465,7 +465,9 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar, | |||
465 | struct trans_exc_code_bits { | 465 | struct trans_exc_code_bits { |
466 | unsigned long addr : 52; /* Translation-exception Address */ | 466 | unsigned long addr : 52; /* Translation-exception Address */ |
467 | unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */ | 467 | unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */ |
468 | unsigned long : 6; | 468 | unsigned long : 2; |
469 | unsigned long b56 : 1; | ||
470 | unsigned long : 3; | ||
469 | unsigned long b60 : 1; | 471 | unsigned long b60 : 1; |
470 | unsigned long b61 : 1; | 472 | unsigned long b61 : 1; |
471 | unsigned long as : 2; /* ASCE Identifier */ | 473 | unsigned long as : 2; /* ASCE Identifier */ |
@@ -485,7 +487,7 @@ enum prot_type { | |||
485 | }; | 487 | }; |
486 | 488 | ||
487 | static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, | 489 | static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, |
488 | ar_t ar, enum gacc_mode mode, enum prot_type prot) | 490 | u8 ar, enum gacc_mode mode, enum prot_type prot) |
489 | { | 491 | { |
490 | struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; | 492 | struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; |
491 | struct trans_exc_code_bits *tec; | 493 | struct trans_exc_code_bits *tec; |
@@ -497,14 +499,18 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, | |||
497 | switch (code) { | 499 | switch (code) { |
498 | case PGM_PROTECTION: | 500 | case PGM_PROTECTION: |
499 | switch (prot) { | 501 | switch (prot) { |
502 | case PROT_TYPE_LA: | ||
503 | tec->b56 = 1; | ||
504 | break; | ||
505 | case PROT_TYPE_KEYC: | ||
506 | tec->b60 = 1; | ||
507 | break; | ||
500 | case PROT_TYPE_ALC: | 508 | case PROT_TYPE_ALC: |
501 | tec->b60 = 1; | 509 | tec->b60 = 1; |
502 | /* FALL THROUGH */ | 510 | /* FALL THROUGH */ |
503 | case PROT_TYPE_DAT: | 511 | case PROT_TYPE_DAT: |
504 | tec->b61 = 1; | 512 | tec->b61 = 1; |
505 | break; | 513 | break; |
506 | default: /* LA and KEYC set b61 to 0, other params undefined */ | ||
507 | return code; | ||
508 | } | 514 | } |
509 | /* FALL THROUGH */ | 515 | /* FALL THROUGH */ |
510 | case PGM_ASCE_TYPE: | 516 | case PGM_ASCE_TYPE: |
@@ -539,7 +545,7 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, | |||
539 | } | 545 | } |
540 | 546 | ||
541 | static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, | 547 | static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, |
542 | unsigned long ga, ar_t ar, enum gacc_mode mode) | 548 | unsigned long ga, u8 ar, enum gacc_mode mode) |
543 | { | 549 | { |
544 | int rc; | 550 | int rc; |
545 | struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw); | 551 | struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw); |
@@ -771,7 +777,7 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, | |||
771 | return 1; | 777 | return 1; |
772 | } | 778 | } |
773 | 779 | ||
774 | static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, | 780 | static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, |
775 | unsigned long *pages, unsigned long nr_pages, | 781 | unsigned long *pages, unsigned long nr_pages, |
776 | const union asce asce, enum gacc_mode mode) | 782 | const union asce asce, enum gacc_mode mode) |
777 | { | 783 | { |
@@ -803,7 +809,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, | |||
803 | return 0; | 809 | return 0; |
804 | } | 810 | } |
805 | 811 | ||
806 | int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, | 812 | int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, |
807 | unsigned long len, enum gacc_mode mode) | 813 | unsigned long len, enum gacc_mode mode) |
808 | { | 814 | { |
809 | psw_t *psw = &vcpu->arch.sie_block->gpsw; | 815 | psw_t *psw = &vcpu->arch.sie_block->gpsw; |
@@ -877,7 +883,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, | |||
877 | * Note: The IPTE lock is not taken during this function, so the caller | 883 | * Note: The IPTE lock is not taken during this function, so the caller |
878 | * has to take care of this. | 884 | * has to take care of this. |
879 | */ | 885 | */ |
880 | int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, | 886 | int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, |
881 | unsigned long *gpa, enum gacc_mode mode) | 887 | unsigned long *gpa, enum gacc_mode mode) |
882 | { | 888 | { |
883 | psw_t *psw = &vcpu->arch.sie_block->gpsw; | 889 | psw_t *psw = &vcpu->arch.sie_block->gpsw; |
@@ -910,7 +916,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, | |||
910 | /** | 916 | /** |
911 | * check_gva_range - test a range of guest virtual addresses for accessibility | 917 | * check_gva_range - test a range of guest virtual addresses for accessibility |
912 | */ | 918 | */ |
913 | int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, | 919 | int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, |
914 | unsigned long length, enum gacc_mode mode) | 920 | unsigned long length, enum gacc_mode mode) |
915 | { | 921 | { |
916 | unsigned long gpa; | 922 | unsigned long gpa; |
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 8756569ad938..7ce47fd36f28 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h | |||
@@ -162,11 +162,11 @@ enum gacc_mode { | |||
162 | }; | 162 | }; |
163 | 163 | ||
164 | int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, | 164 | int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, |
165 | ar_t ar, unsigned long *gpa, enum gacc_mode mode); | 165 | u8 ar, unsigned long *gpa, enum gacc_mode mode); |
166 | int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, | 166 | int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, |
167 | unsigned long length, enum gacc_mode mode); | 167 | unsigned long length, enum gacc_mode mode); |
168 | 168 | ||
169 | int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, | 169 | int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, |
170 | unsigned long len, enum gacc_mode mode); | 170 | unsigned long len, enum gacc_mode mode); |
171 | 171 | ||
172 | int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, | 172 | int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, |
@@ -218,7 +218,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, | |||
218 | * if data has been changed in guest space in case of an exception. | 218 | * if data has been changed in guest space in case of an exception. |
219 | */ | 219 | */ |
220 | static inline __must_check | 220 | static inline __must_check |
221 | int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, | 221 | int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, |
222 | unsigned long len) | 222 | unsigned long len) |
223 | { | 223 | { |
224 | return access_guest(vcpu, ga, ar, data, len, GACC_STORE); | 224 | return access_guest(vcpu, ga, ar, data, len, GACC_STORE); |
@@ -238,7 +238,7 @@ int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, | |||
238 | * data will be copied from guest space to kernel space. | 238 | * data will be copied from guest space to kernel space. |
239 | */ | 239 | */ |
240 | static inline __must_check | 240 | static inline __must_check |
241 | int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, | 241 | int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, |
242 | unsigned long len) | 242 | unsigned long len) |
243 | { | 243 | { |
244 | return access_guest(vcpu, ga, ar, data, len, GACC_FETCH); | 244 | return access_guest(vcpu, ga, ar, data, len, GACC_FETCH); |
@@ -247,10 +247,11 @@ int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, | |||
247 | /** | 247 | /** |
248 | * read_guest_instr - copy instruction data from guest space to kernel space | 248 | * read_guest_instr - copy instruction data from guest space to kernel space |
249 | * @vcpu: virtual cpu | 249 | * @vcpu: virtual cpu |
250 | * @ga: guest address | ||
250 | * @data: destination address in kernel space | 251 | * @data: destination address in kernel space |
251 | * @len: number of bytes to copy | 252 | * @len: number of bytes to copy |
252 | * | 253 | * |
253 | * Copy @len bytes from the current psw address (guest space) to @data (kernel | 254 | * Copy @len bytes from the given address (guest space) to @data (kernel |
254 | * space). | 255 | * space). |
255 | * | 256 | * |
256 | * The behaviour of read_guest_instr is identical to read_guest, except that | 257 | * The behaviour of read_guest_instr is identical to read_guest, except that |
@@ -258,10 +259,10 @@ int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, | |||
258 | * address-space mode. | 259 | * address-space mode. |
259 | */ | 260 | */ |
260 | static inline __must_check | 261 | static inline __must_check |
261 | int read_guest_instr(struct kvm_vcpu *vcpu, void *data, unsigned long len) | 262 | int read_guest_instr(struct kvm_vcpu *vcpu, unsigned long ga, void *data, |
263 | unsigned long len) | ||
262 | { | 264 | { |
263 | return access_guest(vcpu, vcpu->arch.sie_block->gpsw.addr, 0, data, len, | 265 | return access_guest(vcpu, ga, 0, data, len, GACC_IFETCH); |
264 | GACC_IFETCH); | ||
265 | } | 266 | } |
266 | 267 | ||
267 | /** | 268 | /** |
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index d7c6a7f53ced..23d9a4e12da1 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c | |||
@@ -388,14 +388,13 @@ void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu) | |||
388 | #define per_write_wp_event(code) \ | 388 | #define per_write_wp_event(code) \ |
389 | (code & (PER_CODE_STORE | PER_CODE_STORE_REAL)) | 389 | (code & (PER_CODE_STORE | PER_CODE_STORE_REAL)) |
390 | 390 | ||
391 | static int debug_exit_required(struct kvm_vcpu *vcpu) | 391 | static int debug_exit_required(struct kvm_vcpu *vcpu, u8 perc, |
392 | unsigned long peraddr) | ||
392 | { | 393 | { |
393 | u8 perc = vcpu->arch.sie_block->perc; | ||
394 | struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch; | 394 | struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch; |
395 | struct kvm_hw_wp_info_arch *wp_info = NULL; | 395 | struct kvm_hw_wp_info_arch *wp_info = NULL; |
396 | struct kvm_hw_bp_info_arch *bp_info = NULL; | 396 | struct kvm_hw_bp_info_arch *bp_info = NULL; |
397 | unsigned long addr = vcpu->arch.sie_block->gpsw.addr; | 397 | unsigned long addr = vcpu->arch.sie_block->gpsw.addr; |
398 | unsigned long peraddr = vcpu->arch.sie_block->peraddr; | ||
399 | 398 | ||
400 | if (guestdbg_hw_bp_enabled(vcpu)) { | 399 | if (guestdbg_hw_bp_enabled(vcpu)) { |
401 | if (per_write_wp_event(perc) && | 400 | if (per_write_wp_event(perc) && |
@@ -437,36 +436,118 @@ exit_required: | |||
437 | return 1; | 436 | return 1; |
438 | } | 437 | } |
439 | 438 | ||
439 | static int per_fetched_addr(struct kvm_vcpu *vcpu, unsigned long *addr) | ||
440 | { | ||
441 | u8 exec_ilen = 0; | ||
442 | u16 opcode[3]; | ||
443 | int rc; | ||
444 | |||
445 | if (vcpu->arch.sie_block->icptcode == ICPT_PROGI) { | ||
446 | /* PER address references the fetched or the execute instr */ | ||
447 | *addr = vcpu->arch.sie_block->peraddr; | ||
448 | /* | ||
449 | * Manually detect if we have an EXECUTE instruction. As | ||
450 | * instructions are always 2 byte aligned we can read the | ||
451 | * first two bytes unconditionally | ||
452 | */ | ||
453 | rc = read_guest_instr(vcpu, *addr, &opcode, 2); | ||
454 | if (rc) | ||
455 | return rc; | ||
456 | if (opcode[0] >> 8 == 0x44) | ||
457 | exec_ilen = 4; | ||
458 | if ((opcode[0] & 0xff0f) == 0xc600) | ||
459 | exec_ilen = 6; | ||
460 | } else { | ||
461 | /* instr was suppressed, calculate the responsible instr */ | ||
462 | *addr = __rewind_psw(vcpu->arch.sie_block->gpsw, | ||
463 | kvm_s390_get_ilen(vcpu)); | ||
464 | if (vcpu->arch.sie_block->icptstatus & 0x01) { | ||
465 | exec_ilen = (vcpu->arch.sie_block->icptstatus & 0x60) >> 4; | ||
466 | if (!exec_ilen) | ||
467 | exec_ilen = 4; | ||
468 | } | ||
469 | } | ||
470 | |||
471 | if (exec_ilen) { | ||
472 | /* read the complete EXECUTE instr to detect the fetched addr */ | ||
473 | rc = read_guest_instr(vcpu, *addr, &opcode, exec_ilen); | ||
474 | if (rc) | ||
475 | return rc; | ||
476 | if (exec_ilen == 6) { | ||
477 | /* EXECUTE RELATIVE LONG - RIL-b format */ | ||
478 | s32 rl = *((s32 *) (opcode + 1)); | ||
479 | |||
480 | /* rl is a _signed_ 32 bit value specifying halfwords */ | ||
481 | *addr += (u64)(s64) rl * 2; | ||
482 | } else { | ||
483 | /* EXECUTE - RX-a format */ | ||
484 | u32 base = (opcode[1] & 0xf000) >> 12; | ||
485 | u32 disp = opcode[1] & 0x0fff; | ||
486 | u32 index = opcode[0] & 0x000f; | ||
487 | |||
488 | *addr = base ? vcpu->run->s.regs.gprs[base] : 0; | ||
489 | *addr += index ? vcpu->run->s.regs.gprs[index] : 0; | ||
490 | *addr += disp; | ||
491 | } | ||
492 | *addr = kvm_s390_logical_to_effective(vcpu, *addr); | ||
493 | } | ||
494 | return 0; | ||
495 | } | ||
496 | |||
440 | #define guest_per_enabled(vcpu) \ | 497 | #define guest_per_enabled(vcpu) \ |
441 | (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) | 498 | (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) |
442 | 499 | ||
443 | int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu) | 500 | int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu) |
444 | { | 501 | { |
502 | const u64 cr10 = vcpu->arch.sie_block->gcr[10]; | ||
503 | const u64 cr11 = vcpu->arch.sie_block->gcr[11]; | ||
445 | const u8 ilen = kvm_s390_get_ilen(vcpu); | 504 | const u8 ilen = kvm_s390_get_ilen(vcpu); |
446 | struct kvm_s390_pgm_info pgm_info = { | 505 | struct kvm_s390_pgm_info pgm_info = { |
447 | .code = PGM_PER, | 506 | .code = PGM_PER, |
448 | .per_code = PER_CODE_IFETCH, | 507 | .per_code = PER_CODE_IFETCH, |
449 | .per_address = __rewind_psw(vcpu->arch.sie_block->gpsw, ilen), | 508 | .per_address = __rewind_psw(vcpu->arch.sie_block->gpsw, ilen), |
450 | }; | 509 | }; |
510 | unsigned long fetched_addr; | ||
511 | int rc; | ||
451 | 512 | ||
452 | /* | 513 | /* |
453 | * The PSW points to the next instruction, therefore the intercepted | 514 | * The PSW points to the next instruction, therefore the intercepted |
454 | * instruction generated a PER i-fetch event. PER address therefore | 515 | * instruction generated a PER i-fetch event. PER address therefore |
455 | * points at the previous PSW address (could be an EXECUTE function). | 516 | * points at the previous PSW address (could be an EXECUTE function). |
456 | */ | 517 | */ |
457 | return kvm_s390_inject_prog_irq(vcpu, &pgm_info); | 518 | if (!guestdbg_enabled(vcpu)) |
519 | return kvm_s390_inject_prog_irq(vcpu, &pgm_info); | ||
520 | |||
521 | if (debug_exit_required(vcpu, pgm_info.per_code, pgm_info.per_address)) | ||
522 | vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING; | ||
523 | |||
524 | if (!guest_per_enabled(vcpu) || | ||
525 | !(vcpu->arch.sie_block->gcr[9] & PER_EVENT_IFETCH)) | ||
526 | return 0; | ||
527 | |||
528 | rc = per_fetched_addr(vcpu, &fetched_addr); | ||
529 | if (rc < 0) | ||
530 | return rc; | ||
531 | if (rc) | ||
532 | /* instruction-fetching exceptions */ | ||
533 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
534 | |||
535 | if (in_addr_range(fetched_addr, cr10, cr11)) | ||
536 | return kvm_s390_inject_prog_irq(vcpu, &pgm_info); | ||
537 | return 0; | ||
458 | } | 538 | } |
459 | 539 | ||
460 | static void filter_guest_per_event(struct kvm_vcpu *vcpu) | 540 | static int filter_guest_per_event(struct kvm_vcpu *vcpu) |
461 | { | 541 | { |
462 | const u8 perc = vcpu->arch.sie_block->perc; | 542 | const u8 perc = vcpu->arch.sie_block->perc; |
463 | u64 peraddr = vcpu->arch.sie_block->peraddr; | ||
464 | u64 addr = vcpu->arch.sie_block->gpsw.addr; | 543 | u64 addr = vcpu->arch.sie_block->gpsw.addr; |
465 | u64 cr9 = vcpu->arch.sie_block->gcr[9]; | 544 | u64 cr9 = vcpu->arch.sie_block->gcr[9]; |
466 | u64 cr10 = vcpu->arch.sie_block->gcr[10]; | 545 | u64 cr10 = vcpu->arch.sie_block->gcr[10]; |
467 | u64 cr11 = vcpu->arch.sie_block->gcr[11]; | 546 | u64 cr11 = vcpu->arch.sie_block->gcr[11]; |
468 | /* filter all events, demanded by the guest */ | 547 | /* filter all events, demanded by the guest */ |
469 | u8 guest_perc = perc & (cr9 >> 24) & PER_CODE_MASK; | 548 | u8 guest_perc = perc & (cr9 >> 24) & PER_CODE_MASK; |
549 | unsigned long fetched_addr; | ||
550 | int rc; | ||
470 | 551 | ||
471 | if (!guest_per_enabled(vcpu)) | 552 | if (!guest_per_enabled(vcpu)) |
472 | guest_perc = 0; | 553 | guest_perc = 0; |
@@ -478,9 +559,17 @@ static void filter_guest_per_event(struct kvm_vcpu *vcpu) | |||
478 | guest_perc &= ~PER_CODE_BRANCH; | 559 | guest_perc &= ~PER_CODE_BRANCH; |
479 | 560 | ||
480 | /* filter "instruction-fetching" events */ | 561 | /* filter "instruction-fetching" events */ |
481 | if (guest_perc & PER_CODE_IFETCH && | 562 | if (guest_perc & PER_CODE_IFETCH) { |
482 | !in_addr_range(peraddr, cr10, cr11)) | 563 | rc = per_fetched_addr(vcpu, &fetched_addr); |
483 | guest_perc &= ~PER_CODE_IFETCH; | 564 | if (rc < 0) |
565 | return rc; | ||
566 | /* | ||
567 | * Don't inject an irq on exceptions. This would make handling | ||
568 | * on icpt code 8 very complex (as PSW was already rewound). | ||
569 | */ | ||
570 | if (rc || !in_addr_range(fetched_addr, cr10, cr11)) | ||
571 | guest_perc &= ~PER_CODE_IFETCH; | ||
572 | } | ||
484 | 573 | ||
485 | /* All other PER events will be given to the guest */ | 574 | /* All other PER events will be given to the guest */ |
486 | /* TODO: Check altered address/address space */ | 575 | /* TODO: Check altered address/address space */ |
@@ -489,6 +578,7 @@ static void filter_guest_per_event(struct kvm_vcpu *vcpu) | |||
489 | 578 | ||
490 | if (!guest_perc) | 579 | if (!guest_perc) |
491 | vcpu->arch.sie_block->iprcc &= ~PGM_PER; | 580 | vcpu->arch.sie_block->iprcc &= ~PGM_PER; |
581 | return 0; | ||
492 | } | 582 | } |
493 | 583 | ||
494 | #define pssec(vcpu) (vcpu->arch.sie_block->gcr[1] & _ASCE_SPACE_SWITCH) | 584 | #define pssec(vcpu) (vcpu->arch.sie_block->gcr[1] & _ASCE_SPACE_SWITCH) |
@@ -496,14 +586,17 @@ static void filter_guest_per_event(struct kvm_vcpu *vcpu) | |||
496 | #define old_ssec(vcpu) ((vcpu->arch.sie_block->tecmc >> 31) & 0x1) | 586 | #define old_ssec(vcpu) ((vcpu->arch.sie_block->tecmc >> 31) & 0x1) |
497 | #define old_as_is_home(vcpu) !(vcpu->arch.sie_block->tecmc & 0xffff) | 587 | #define old_as_is_home(vcpu) !(vcpu->arch.sie_block->tecmc & 0xffff) |
498 | 588 | ||
499 | void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu) | 589 | int kvm_s390_handle_per_event(struct kvm_vcpu *vcpu) |
500 | { | 590 | { |
501 | int new_as; | 591 | int rc, new_as; |
502 | 592 | ||
503 | if (debug_exit_required(vcpu)) | 593 | if (debug_exit_required(vcpu, vcpu->arch.sie_block->perc, |
594 | vcpu->arch.sie_block->peraddr)) | ||
504 | vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING; | 595 | vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING; |
505 | 596 | ||
506 | filter_guest_per_event(vcpu); | 597 | rc = filter_guest_per_event(vcpu); |
598 | if (rc) | ||
599 | return rc; | ||
507 | 600 | ||
508 | /* | 601 | /* |
509 | * Only RP, SAC, SACF, PT, PTI, PR, PC instructions can trigger | 602 | * Only RP, SAC, SACF, PT, PTI, PR, PC instructions can trigger |
@@ -532,4 +625,5 @@ void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu) | |||
532 | (pssec(vcpu) || old_ssec(vcpu))) | 625 | (pssec(vcpu) || old_ssec(vcpu))) |
533 | vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH; | 626 | vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH; |
534 | } | 627 | } |
628 | return 0; | ||
535 | } | 629 | } |
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 7a27eebab28a..59920f96ebc0 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c | |||
@@ -238,7 +238,9 @@ static int handle_prog(struct kvm_vcpu *vcpu) | |||
238 | vcpu->stat.exit_program_interruption++; | 238 | vcpu->stat.exit_program_interruption++; |
239 | 239 | ||
240 | if (guestdbg_enabled(vcpu) && per_event(vcpu)) { | 240 | if (guestdbg_enabled(vcpu) && per_event(vcpu)) { |
241 | kvm_s390_handle_per_event(vcpu); | 241 | rc = kvm_s390_handle_per_event(vcpu); |
242 | if (rc) | ||
243 | return rc; | ||
242 | /* the interrupt might have been filtered out completely */ | 244 | /* the interrupt might have been filtered out completely */ |
243 | if (vcpu->arch.sie_block->iprcc == 0) | 245 | if (vcpu->arch.sie_block->iprcc == 0) |
244 | return 0; | 246 | return 0; |
@@ -359,6 +361,9 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu) | |||
359 | 361 | ||
360 | static int handle_operexc(struct kvm_vcpu *vcpu) | 362 | static int handle_operexc(struct kvm_vcpu *vcpu) |
361 | { | 363 | { |
364 | psw_t oldpsw, newpsw; | ||
365 | int rc; | ||
366 | |||
362 | vcpu->stat.exit_operation_exception++; | 367 | vcpu->stat.exit_operation_exception++; |
363 | trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa, | 368 | trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa, |
364 | vcpu->arch.sie_block->ipb); | 369 | vcpu->arch.sie_block->ipb); |
@@ -369,6 +374,24 @@ static int handle_operexc(struct kvm_vcpu *vcpu) | |||
369 | 374 | ||
370 | if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0) | 375 | if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0) |
371 | return -EOPNOTSUPP; | 376 | return -EOPNOTSUPP; |
377 | rc = read_guest_lc(vcpu, __LC_PGM_NEW_PSW, &newpsw, sizeof(psw_t)); | ||
378 | if (rc) | ||
379 | return rc; | ||
380 | /* | ||
381 | * Avoid endless loops of operation exceptions, if the pgm new | ||
382 | * PSW will cause a new operation exception. | ||
383 | * The heuristic checks if the pgm new psw is within 6 bytes before | ||
384 | * the faulting psw address (with same DAT, AS settings) and the | ||
385 | * new psw is not a wait psw and the fault was not triggered by | ||
386 | * problem state. | ||
387 | */ | ||
388 | oldpsw = vcpu->arch.sie_block->gpsw; | ||
389 | if (oldpsw.addr - newpsw.addr <= 6 && | ||
390 | !(newpsw.mask & PSW_MASK_WAIT) && | ||
391 | !(oldpsw.mask & PSW_MASK_PSTATE) && | ||
392 | (newpsw.mask & PSW_MASK_ASC) == (oldpsw.mask & PSW_MASK_ASC) && | ||
393 | (newpsw.mask & PSW_MASK_DAT) == (oldpsw.mask & PSW_MASK_DAT)) | ||
394 | return -EOPNOTSUPP; | ||
372 | 395 | ||
373 | return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); | 396 | return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); |
374 | } | 397 | } |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b604854df02c..f5694838234d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
@@ -218,7 +218,7 @@ static void allow_cpu_feat(unsigned long nr) | |||
218 | static inline int plo_test_bit(unsigned char nr) | 218 | static inline int plo_test_bit(unsigned char nr) |
219 | { | 219 | { |
220 | register unsigned long r0 asm("0") = (unsigned long) nr | 0x100; | 220 | register unsigned long r0 asm("0") = (unsigned long) nr | 0x100; |
221 | int cc = 3; /* subfunction not available */ | 221 | int cc; |
222 | 222 | ||
223 | asm volatile( | 223 | asm volatile( |
224 | /* Parameter registers are ignored for "test bit" */ | 224 | /* Parameter registers are ignored for "test bit" */ |
@@ -371,6 +371,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
371 | case KVM_CAP_S390_IRQCHIP: | 371 | case KVM_CAP_S390_IRQCHIP: |
372 | case KVM_CAP_VM_ATTRIBUTES: | 372 | case KVM_CAP_VM_ATTRIBUTES: |
373 | case KVM_CAP_MP_STATE: | 373 | case KVM_CAP_MP_STATE: |
374 | case KVM_CAP_IMMEDIATE_EXIT: | ||
374 | case KVM_CAP_S390_INJECT_IRQ: | 375 | case KVM_CAP_S390_INJECT_IRQ: |
375 | case KVM_CAP_S390_USER_SIGP: | 376 | case KVM_CAP_S390_USER_SIGP: |
376 | case KVM_CAP_S390_USER_STSI: | 377 | case KVM_CAP_S390_USER_STSI: |
@@ -443,6 +444,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
443 | struct kvm_memory_slot *memslot; | 444 | struct kvm_memory_slot *memslot; |
444 | int is_dirty = 0; | 445 | int is_dirty = 0; |
445 | 446 | ||
447 | if (kvm_is_ucontrol(kvm)) | ||
448 | return -EINVAL; | ||
449 | |||
446 | mutex_lock(&kvm->slots_lock); | 450 | mutex_lock(&kvm->slots_lock); |
447 | 451 | ||
448 | r = -EINVAL; | 452 | r = -EINVAL; |
@@ -506,6 +510,14 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) | |||
506 | } else if (MACHINE_HAS_VX) { | 510 | } else if (MACHINE_HAS_VX) { |
507 | set_kvm_facility(kvm->arch.model.fac_mask, 129); | 511 | set_kvm_facility(kvm->arch.model.fac_mask, 129); |
508 | set_kvm_facility(kvm->arch.model.fac_list, 129); | 512 | set_kvm_facility(kvm->arch.model.fac_list, 129); |
513 | if (test_facility(134)) { | ||
514 | set_kvm_facility(kvm->arch.model.fac_mask, 134); | ||
515 | set_kvm_facility(kvm->arch.model.fac_list, 134); | ||
516 | } | ||
517 | if (test_facility(135)) { | ||
518 | set_kvm_facility(kvm->arch.model.fac_mask, 135); | ||
519 | set_kvm_facility(kvm->arch.model.fac_list, 135); | ||
520 | } | ||
509 | r = 0; | 521 | r = 0; |
510 | } else | 522 | } else |
511 | r = -EINVAL; | 523 | r = -EINVAL; |
@@ -822,6 +834,13 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) | |||
822 | } | 834 | } |
823 | memcpy(kvm->arch.model.fac_list, proc->fac_list, | 835 | memcpy(kvm->arch.model.fac_list, proc->fac_list, |
824 | S390_ARCH_FAC_LIST_SIZE_BYTE); | 836 | S390_ARCH_FAC_LIST_SIZE_BYTE); |
837 | VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", | ||
838 | kvm->arch.model.ibc, | ||
839 | kvm->arch.model.cpuid); | ||
840 | VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", | ||
841 | kvm->arch.model.fac_list[0], | ||
842 | kvm->arch.model.fac_list[1], | ||
843 | kvm->arch.model.fac_list[2]); | ||
825 | } else | 844 | } else |
826 | ret = -EFAULT; | 845 | ret = -EFAULT; |
827 | kfree(proc); | 846 | kfree(proc); |
@@ -895,6 +914,13 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) | |||
895 | proc->ibc = kvm->arch.model.ibc; | 914 | proc->ibc = kvm->arch.model.ibc; |
896 | memcpy(&proc->fac_list, kvm->arch.model.fac_list, | 915 | memcpy(&proc->fac_list, kvm->arch.model.fac_list, |
897 | S390_ARCH_FAC_LIST_SIZE_BYTE); | 916 | S390_ARCH_FAC_LIST_SIZE_BYTE); |
917 | VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", | ||
918 | kvm->arch.model.ibc, | ||
919 | kvm->arch.model.cpuid); | ||
920 | VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", | ||
921 | kvm->arch.model.fac_list[0], | ||
922 | kvm->arch.model.fac_list[1], | ||
923 | kvm->arch.model.fac_list[2]); | ||
898 | if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) | 924 | if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) |
899 | ret = -EFAULT; | 925 | ret = -EFAULT; |
900 | kfree(proc); | 926 | kfree(proc); |
@@ -918,6 +944,17 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) | |||
918 | S390_ARCH_FAC_LIST_SIZE_BYTE); | 944 | S390_ARCH_FAC_LIST_SIZE_BYTE); |
919 | memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, | 945 | memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, |
920 | sizeof(S390_lowcore.stfle_fac_list)); | 946 | sizeof(S390_lowcore.stfle_fac_list)); |
947 | VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx", | ||
948 | kvm->arch.model.ibc, | ||
949 | kvm->arch.model.cpuid); | ||
950 | VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx", | ||
951 | mach->fac_mask[0], | ||
952 | mach->fac_mask[1], | ||
953 | mach->fac_mask[2]); | ||
954 | VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx", | ||
955 | mach->fac_list[0], | ||
956 | mach->fac_list[1], | ||
957 | mach->fac_list[2]); | ||
921 | if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) | 958 | if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) |
922 | ret = -EFAULT; | 959 | ret = -EFAULT; |
923 | kfree(mach); | 960 | kfree(mach); |
@@ -1939,6 +1976,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
1939 | 1976 | ||
1940 | if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi) | 1977 | if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi) |
1941 | vcpu->arch.sie_block->ecb2 |= 0x08; | 1978 | vcpu->arch.sie_block->ecb2 |= 0x08; |
1979 | if (test_kvm_facility(vcpu->kvm, 130)) | ||
1980 | vcpu->arch.sie_block->ecb2 |= 0x20; | ||
1942 | vcpu->arch.sie_block->eca = 0x1002000U; | 1981 | vcpu->arch.sie_block->eca = 0x1002000U; |
1943 | if (sclp.has_cei) | 1982 | if (sclp.has_cei) |
1944 | vcpu->arch.sie_block->eca |= 0x80000000U; | 1983 | vcpu->arch.sie_block->eca |= 0x80000000U; |
@@ -2579,7 +2618,7 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) | |||
2579 | * to look up the current opcode to get the length of the instruction | 2618 | * to look up the current opcode to get the length of the instruction |
2580 | * to be able to forward the PSW. | 2619 | * to be able to forward the PSW. |
2581 | */ | 2620 | */ |
2582 | rc = read_guest_instr(vcpu, &opcode, 1); | 2621 | rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1); |
2583 | ilen = insn_length(opcode); | 2622 | ilen = insn_length(opcode); |
2584 | if (rc < 0) { | 2623 | if (rc < 0) { |
2585 | return rc; | 2624 | return rc; |
@@ -2761,6 +2800,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2761 | int rc; | 2800 | int rc; |
2762 | sigset_t sigsaved; | 2801 | sigset_t sigsaved; |
2763 | 2802 | ||
2803 | if (kvm_run->immediate_exit) | ||
2804 | return -EINTR; | ||
2805 | |||
2764 | if (guestdbg_exit_pending(vcpu)) { | 2806 | if (guestdbg_exit_pending(vcpu)) { |
2765 | kvm_s390_prepare_debug_exit(vcpu); | 2807 | kvm_s390_prepare_debug_exit(vcpu); |
2766 | return 0; | 2808 | return 0; |
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 3a4e97f1a9e6..af9fa91a0c91 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
@@ -86,9 +86,7 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) | |||
86 | kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); | 86 | kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); |
87 | } | 87 | } |
88 | 88 | ||
89 | typedef u8 __bitwise ar_t; | 89 | static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, u8 *ar) |
90 | |||
91 | static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, ar_t *ar) | ||
92 | { | 90 | { |
93 | u32 base2 = vcpu->arch.sie_block->ipb >> 28; | 91 | u32 base2 = vcpu->arch.sie_block->ipb >> 28; |
94 | u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); | 92 | u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); |
@@ -101,7 +99,7 @@ static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, ar_t *ar) | |||
101 | 99 | ||
102 | static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, | 100 | static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, |
103 | u64 *address1, u64 *address2, | 101 | u64 *address1, u64 *address2, |
104 | ar_t *ar_b1, ar_t *ar_b2) | 102 | u8 *ar_b1, u8 *ar_b2) |
105 | { | 103 | { |
106 | u32 base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28; | 104 | u32 base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28; |
107 | u32 disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16; | 105 | u32 disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16; |
@@ -125,7 +123,7 @@ static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2 | |||
125 | *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; | 123 | *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; |
126 | } | 124 | } |
127 | 125 | ||
128 | static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu, ar_t *ar) | 126 | static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu, u8 *ar) |
129 | { | 127 | { |
130 | u32 base2 = vcpu->arch.sie_block->ipb >> 28; | 128 | u32 base2 = vcpu->arch.sie_block->ipb >> 28; |
131 | u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) + | 129 | u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) + |
@@ -140,7 +138,7 @@ static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu, ar_t *ar) | |||
140 | return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + (long)(int)disp2; | 138 | return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + (long)(int)disp2; |
141 | } | 139 | } |
142 | 140 | ||
143 | static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu, ar_t *ar) | 141 | static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu, u8 *ar) |
144 | { | 142 | { |
145 | u32 base2 = vcpu->arch.sie_block->ipb >> 28; | 143 | u32 base2 = vcpu->arch.sie_block->ipb >> 28; |
146 | u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); | 144 | u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); |
@@ -379,7 +377,7 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu, | |||
379 | void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu); | 377 | void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu); |
380 | void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu); | 378 | void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu); |
381 | int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu); | 379 | int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu); |
382 | void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu); | 380 | int kvm_s390_handle_per_event(struct kvm_vcpu *vcpu); |
383 | 381 | ||
384 | /* support for Basic/Extended SCA handling */ | 382 | /* support for Basic/Extended SCA handling */ |
385 | static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm) | 383 | static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm) |
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 794503516bd4..fb4b494cde9b 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c | |||
@@ -54,7 +54,7 @@ int kvm_s390_handle_aa(struct kvm_vcpu *vcpu) | |||
54 | static int handle_set_clock(struct kvm_vcpu *vcpu) | 54 | static int handle_set_clock(struct kvm_vcpu *vcpu) |
55 | { | 55 | { |
56 | int rc; | 56 | int rc; |
57 | ar_t ar; | 57 | u8 ar; |
58 | u64 op2, val; | 58 | u64 op2, val; |
59 | 59 | ||
60 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | 60 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) |
@@ -79,7 +79,7 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) | |||
79 | u64 operand2; | 79 | u64 operand2; |
80 | u32 address; | 80 | u32 address; |
81 | int rc; | 81 | int rc; |
82 | ar_t ar; | 82 | u8 ar; |
83 | 83 | ||
84 | vcpu->stat.instruction_spx++; | 84 | vcpu->stat.instruction_spx++; |
85 | 85 | ||
@@ -117,7 +117,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) | |||
117 | u64 operand2; | 117 | u64 operand2; |
118 | u32 address; | 118 | u32 address; |
119 | int rc; | 119 | int rc; |
120 | ar_t ar; | 120 | u8 ar; |
121 | 121 | ||
122 | vcpu->stat.instruction_stpx++; | 122 | vcpu->stat.instruction_stpx++; |
123 | 123 | ||
@@ -147,7 +147,7 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) | |||
147 | u16 vcpu_id = vcpu->vcpu_id; | 147 | u16 vcpu_id = vcpu->vcpu_id; |
148 | u64 ga; | 148 | u64 ga; |
149 | int rc; | 149 | int rc; |
150 | ar_t ar; | 150 | u8 ar; |
151 | 151 | ||
152 | vcpu->stat.instruction_stap++; | 152 | vcpu->stat.instruction_stap++; |
153 | 153 | ||
@@ -380,7 +380,7 @@ static int handle_tpi(struct kvm_vcpu *vcpu) | |||
380 | u32 tpi_data[3]; | 380 | u32 tpi_data[3]; |
381 | int rc; | 381 | int rc; |
382 | u64 addr; | 382 | u64 addr; |
383 | ar_t ar; | 383 | u8 ar; |
384 | 384 | ||
385 | addr = kvm_s390_get_base_disp_s(vcpu, &ar); | 385 | addr = kvm_s390_get_base_disp_s(vcpu, &ar); |
386 | if (addr & 3) | 386 | if (addr & 3) |
@@ -548,7 +548,7 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) | |||
548 | psw_compat_t new_psw; | 548 | psw_compat_t new_psw; |
549 | u64 addr; | 549 | u64 addr; |
550 | int rc; | 550 | int rc; |
551 | ar_t ar; | 551 | u8 ar; |
552 | 552 | ||
553 | if (gpsw->mask & PSW_MASK_PSTATE) | 553 | if (gpsw->mask & PSW_MASK_PSTATE) |
554 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | 554 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); |
@@ -575,7 +575,7 @@ static int handle_lpswe(struct kvm_vcpu *vcpu) | |||
575 | psw_t new_psw; | 575 | psw_t new_psw; |
576 | u64 addr; | 576 | u64 addr; |
577 | int rc; | 577 | int rc; |
578 | ar_t ar; | 578 | u8 ar; |
579 | 579 | ||
580 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | 580 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) |
581 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | 581 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); |
@@ -597,7 +597,7 @@ static int handle_stidp(struct kvm_vcpu *vcpu) | |||
597 | u64 stidp_data = vcpu->kvm->arch.model.cpuid; | 597 | u64 stidp_data = vcpu->kvm->arch.model.cpuid; |
598 | u64 operand2; | 598 | u64 operand2; |
599 | int rc; | 599 | int rc; |
600 | ar_t ar; | 600 | u8 ar; |
601 | 601 | ||
602 | vcpu->stat.instruction_stidp++; | 602 | vcpu->stat.instruction_stidp++; |
603 | 603 | ||
@@ -644,7 +644,7 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) | |||
644 | ASCEBC(mem->vm[0].cpi, 16); | 644 | ASCEBC(mem->vm[0].cpi, 16); |
645 | } | 645 | } |
646 | 646 | ||
647 | static void insert_stsi_usr_data(struct kvm_vcpu *vcpu, u64 addr, ar_t ar, | 647 | static void insert_stsi_usr_data(struct kvm_vcpu *vcpu, u64 addr, u8 ar, |
648 | u8 fc, u8 sel1, u16 sel2) | 648 | u8 fc, u8 sel1, u16 sel2) |
649 | { | 649 | { |
650 | vcpu->run->exit_reason = KVM_EXIT_S390_STSI; | 650 | vcpu->run->exit_reason = KVM_EXIT_S390_STSI; |
@@ -663,7 +663,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) | |||
663 | unsigned long mem = 0; | 663 | unsigned long mem = 0; |
664 | u64 operand2; | 664 | u64 operand2; |
665 | int rc = 0; | 665 | int rc = 0; |
666 | ar_t ar; | 666 | u8 ar; |
667 | 667 | ||
668 | vcpu->stat.instruction_stsi++; | 668 | vcpu->stat.instruction_stsi++; |
669 | VCPU_EVENT(vcpu, 3, "STSI: fc: %u sel1: %u sel2: %u", fc, sel1, sel2); | 669 | VCPU_EVENT(vcpu, 3, "STSI: fc: %u sel1: %u sel2: %u", fc, sel1, sel2); |
@@ -970,7 +970,7 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) | |||
970 | int reg, rc, nr_regs; | 970 | int reg, rc, nr_regs; |
971 | u32 ctl_array[16]; | 971 | u32 ctl_array[16]; |
972 | u64 ga; | 972 | u64 ga; |
973 | ar_t ar; | 973 | u8 ar; |
974 | 974 | ||
975 | vcpu->stat.instruction_lctl++; | 975 | vcpu->stat.instruction_lctl++; |
976 | 976 | ||
@@ -1009,7 +1009,7 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu) | |||
1009 | int reg, rc, nr_regs; | 1009 | int reg, rc, nr_regs; |
1010 | u32 ctl_array[16]; | 1010 | u32 ctl_array[16]; |
1011 | u64 ga; | 1011 | u64 ga; |
1012 | ar_t ar; | 1012 | u8 ar; |
1013 | 1013 | ||
1014 | vcpu->stat.instruction_stctl++; | 1014 | vcpu->stat.instruction_stctl++; |
1015 | 1015 | ||
@@ -1043,7 +1043,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) | |||
1043 | int reg, rc, nr_regs; | 1043 | int reg, rc, nr_regs; |
1044 | u64 ctl_array[16]; | 1044 | u64 ctl_array[16]; |
1045 | u64 ga; | 1045 | u64 ga; |
1046 | ar_t ar; | 1046 | u8 ar; |
1047 | 1047 | ||
1048 | vcpu->stat.instruction_lctlg++; | 1048 | vcpu->stat.instruction_lctlg++; |
1049 | 1049 | ||
@@ -1081,7 +1081,7 @@ static int handle_stctg(struct kvm_vcpu *vcpu) | |||
1081 | int reg, rc, nr_regs; | 1081 | int reg, rc, nr_regs; |
1082 | u64 ctl_array[16]; | 1082 | u64 ctl_array[16]; |
1083 | u64 ga; | 1083 | u64 ga; |
1084 | ar_t ar; | 1084 | u8 ar; |
1085 | 1085 | ||
1086 | vcpu->stat.instruction_stctg++; | 1086 | vcpu->stat.instruction_stctg++; |
1087 | 1087 | ||
@@ -1132,7 +1132,7 @@ static int handle_tprot(struct kvm_vcpu *vcpu) | |||
1132 | unsigned long hva, gpa; | 1132 | unsigned long hva, gpa; |
1133 | int ret = 0, cc = 0; | 1133 | int ret = 0, cc = 0; |
1134 | bool writable; | 1134 | bool writable; |
1135 | ar_t ar; | 1135 | u8 ar; |
1136 | 1136 | ||
1137 | vcpu->stat.instruction_tprot++; | 1137 | vcpu->stat.instruction_tprot++; |
1138 | 1138 | ||
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index a9a9d974d9a4..38556e395915 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c | |||
@@ -324,6 +324,9 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
324 | /* Run-time-Instrumentation */ | 324 | /* Run-time-Instrumentation */ |
325 | if (test_kvm_facility(vcpu->kvm, 64)) | 325 | if (test_kvm_facility(vcpu->kvm, 64)) |
326 | scb_s->ecb3 |= scb_o->ecb3 & 0x01U; | 326 | scb_s->ecb3 |= scb_o->ecb3 & 0x01U; |
327 | /* Instruction Execution Prevention */ | ||
328 | if (test_kvm_facility(vcpu->kvm, 130)) | ||
329 | scb_s->ecb2 |= scb_o->ecb2 & 0x20U; | ||
327 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF)) | 330 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF)) |
328 | scb_s->eca |= scb_o->eca & 0x00000001U; | 331 | scb_s->eca |= scb_o->eca & 0x00000001U; |
329 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB)) | 332 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB)) |
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index beb90f3993e6..b48dc5f1900b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
@@ -744,7 +744,7 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) | |||
744 | 744 | ||
745 | pgste_set_unlock(ptep, new); | 745 | pgste_set_unlock(ptep, new); |
746 | pte_unmap_unlock(ptep, ptl); | 746 | pte_unmap_unlock(ptep, ptl); |
747 | return 0; | 747 | return cc; |
748 | } | 748 | } |
749 | EXPORT_SYMBOL(reset_guest_reference_bit); | 749 | EXPORT_SYMBOL(reset_guest_reference_bit); |
750 | 750 | ||
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index 8cc53b1e6d03..0cf802de52a1 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c | |||
@@ -80,6 +80,8 @@ static struct facility_def facility_defs[] = { | |||
80 | 76, /* msa extension 3 */ | 80 | 76, /* msa extension 3 */ |
81 | 77, /* msa extension 4 */ | 81 | 77, /* msa extension 4 */ |
82 | 78, /* enhanced-DAT 2 */ | 82 | 78, /* enhanced-DAT 2 */ |
83 | 130, /* instruction-execution-protection */ | ||
84 | 131, /* enhanced-SOP 2 and side-effect */ | ||
83 | -1 /* END */ | 85 | -1 /* END */ |
84 | } | 86 | } |
85 | }, | 87 | }, |
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 12080d87da3b..cb8f9149f6c8 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h | |||
@@ -177,16 +177,8 @@ static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr) | |||
177 | struct desc_struct *d = get_cpu_gdt_table(cpu); | 177 | struct desc_struct *d = get_cpu_gdt_table(cpu); |
178 | tss_desc tss; | 178 | tss_desc tss; |
179 | 179 | ||
180 | /* | ||
181 | * sizeof(unsigned long) coming from an extra "long" at the end | ||
182 | * of the iobitmap. See tss_struct definition in processor.h | ||
183 | * | ||
184 | * -1? seg base+limit should be pointing to the address of the | ||
185 | * last valid byte | ||
186 | */ | ||
187 | set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS, | 180 | set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS, |
188 | IO_BITMAP_OFFSET + IO_BITMAP_BYTES + | 181 | __KERNEL_TSS_LIMIT); |
189 | sizeof(unsigned long) - 1); | ||
190 | write_gdt_entry(d, entry, &tss, DESC_TSS); | 182 | write_gdt_entry(d, entry, &tss, DESC_TSS); |
191 | } | 183 | } |
192 | 184 | ||
@@ -213,6 +205,54 @@ static inline void native_load_tr_desc(void) | |||
213 | asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); | 205 | asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); |
214 | } | 206 | } |
215 | 207 | ||
208 | static inline void force_reload_TR(void) | ||
209 | { | ||
210 | struct desc_struct *d = get_cpu_gdt_table(smp_processor_id()); | ||
211 | tss_desc tss; | ||
212 | |||
213 | memcpy(&tss, &d[GDT_ENTRY_TSS], sizeof(tss_desc)); | ||
214 | |||
215 | /* | ||
216 | * LTR requires an available TSS, and the TSS is currently | ||
217 | * busy. Make it be available so that LTR will work. | ||
218 | */ | ||
219 | tss.type = DESC_TSS; | ||
220 | write_gdt_entry(d, GDT_ENTRY_TSS, &tss, DESC_TSS); | ||
221 | |||
222 | load_TR_desc(); | ||
223 | } | ||
224 | |||
225 | DECLARE_PER_CPU(bool, need_tr_refresh); | ||
226 | |||
227 | static inline void refresh_TR(void) | ||
228 | { | ||
229 | DEBUG_LOCKS_WARN_ON(preemptible()); | ||
230 | |||
231 | if (unlikely(this_cpu_read(need_tr_refresh))) { | ||
232 | force_reload_TR(); | ||
233 | this_cpu_write(need_tr_refresh, false); | ||
234 | } | ||
235 | } | ||
236 | |||
237 | /* | ||
238 | * If you do something evil that corrupts the cached TSS limit (I'm looking | ||
239 | * at you, VMX exits), call this function. | ||
240 | * | ||
241 | * The optimization here is that the TSS limit only matters for Linux if the | ||
242 | * IO bitmap is in use. If the TSS limit gets forced to its minimum value, | ||
243 | * everything works except that IO bitmap will be ignored and all CPL 3 IO | ||
244 | * instructions will #GP, which is exactly what we want for normal tasks. | ||
245 | */ | ||
246 | static inline void invalidate_tss_limit(void) | ||
247 | { | ||
248 | DEBUG_LOCKS_WARN_ON(preemptible()); | ||
249 | |||
250 | if (unlikely(test_thread_flag(TIF_IO_BITMAP))) | ||
251 | force_reload_TR(); | ||
252 | else | ||
253 | this_cpu_write(need_tr_refresh, true); | ||
254 | } | ||
255 | |||
216 | static inline void native_load_gdt(const struct desc_ptr *dtr) | 256 | static inline void native_load_gdt(const struct desc_ptr *dtr) |
217 | { | 257 | { |
218 | asm volatile("lgdt %0"::"m" (*dtr)); | 258 | asm volatile("lgdt %0"::"m" (*dtr)); |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index e9cd7befcb76..3e8c287090e4 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -441,5 +441,6 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | |||
441 | int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq); | 441 | int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq); |
442 | void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt); | 442 | void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt); |
443 | void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt); | 443 | void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt); |
444 | bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt); | ||
444 | 445 | ||
445 | #endif /* _ASM_X86_KVM_X86_EMULATE_H */ | 446 | #endif /* _ASM_X86_KVM_X86_EMULATE_H */ |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a7066dc1a7e9..74ef58c8ff53 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -55,7 +55,6 @@ | |||
55 | #define KVM_REQ_TRIPLE_FAULT 10 | 55 | #define KVM_REQ_TRIPLE_FAULT 10 |
56 | #define KVM_REQ_MMU_SYNC 11 | 56 | #define KVM_REQ_MMU_SYNC 11 |
57 | #define KVM_REQ_CLOCK_UPDATE 12 | 57 | #define KVM_REQ_CLOCK_UPDATE 12 |
58 | #define KVM_REQ_DEACTIVATE_FPU 13 | ||
59 | #define KVM_REQ_EVENT 14 | 58 | #define KVM_REQ_EVENT 14 |
60 | #define KVM_REQ_APF_HALT 15 | 59 | #define KVM_REQ_APF_HALT 15 |
61 | #define KVM_REQ_STEAL_UPDATE 16 | 60 | #define KVM_REQ_STEAL_UPDATE 16 |
@@ -115,7 +114,7 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) | |||
115 | 114 | ||
116 | #define KVM_PERMILLE_MMU_PAGES 20 | 115 | #define KVM_PERMILLE_MMU_PAGES 20 |
117 | #define KVM_MIN_ALLOC_MMU_PAGES 64 | 116 | #define KVM_MIN_ALLOC_MMU_PAGES 64 |
118 | #define KVM_MMU_HASH_SHIFT 10 | 117 | #define KVM_MMU_HASH_SHIFT 12 |
119 | #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) | 118 | #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) |
120 | #define KVM_MIN_FREE_MMU_PAGES 5 | 119 | #define KVM_MIN_FREE_MMU_PAGES 5 |
121 | #define KVM_REFILL_PAGES 25 | 120 | #define KVM_REFILL_PAGES 25 |
@@ -208,6 +207,13 @@ enum { | |||
208 | PFERR_WRITE_MASK | \ | 207 | PFERR_WRITE_MASK | \ |
209 | PFERR_PRESENT_MASK) | 208 | PFERR_PRESENT_MASK) |
210 | 209 | ||
210 | /* | ||
211 | * The mask used to denote special SPTEs, which can be either MMIO SPTEs or | ||
212 | * Access Tracking SPTEs. We use bit 62 instead of bit 63 to avoid conflicting | ||
213 | * with the SVE bit in EPT PTEs. | ||
214 | */ | ||
215 | #define SPTE_SPECIAL_MASK (1ULL << 62) | ||
216 | |||
211 | /* apic attention bits */ | 217 | /* apic attention bits */ |
212 | #define KVM_APIC_CHECK_VAPIC 0 | 218 | #define KVM_APIC_CHECK_VAPIC 0 |
213 | /* | 219 | /* |
@@ -668,6 +674,9 @@ struct kvm_vcpu_arch { | |||
668 | 674 | ||
669 | int pending_ioapic_eoi; | 675 | int pending_ioapic_eoi; |
670 | int pending_external_vector; | 676 | int pending_external_vector; |
677 | |||
678 | /* GPA available (AMD only) */ | ||
679 | bool gpa_available; | ||
671 | }; | 680 | }; |
672 | 681 | ||
673 | struct kvm_lpage_info { | 682 | struct kvm_lpage_info { |
@@ -716,6 +725,12 @@ struct kvm_hv { | |||
716 | HV_REFERENCE_TSC_PAGE tsc_ref; | 725 | HV_REFERENCE_TSC_PAGE tsc_ref; |
717 | }; | 726 | }; |
718 | 727 | ||
728 | enum kvm_irqchip_mode { | ||
729 | KVM_IRQCHIP_NONE, | ||
730 | KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */ | ||
731 | KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ | ||
732 | }; | ||
733 | |||
719 | struct kvm_arch { | 734 | struct kvm_arch { |
720 | unsigned int n_used_mmu_pages; | 735 | unsigned int n_used_mmu_pages; |
721 | unsigned int n_requested_mmu_pages; | 736 | unsigned int n_requested_mmu_pages; |
@@ -788,7 +803,7 @@ struct kvm_arch { | |||
788 | 803 | ||
789 | u64 disabled_quirks; | 804 | u64 disabled_quirks; |
790 | 805 | ||
791 | bool irqchip_split; | 806 | enum kvm_irqchip_mode irqchip_mode; |
792 | u8 nr_reserved_ioapic_pins; | 807 | u8 nr_reserved_ioapic_pins; |
793 | 808 | ||
794 | bool disabled_lapic_found; | 809 | bool disabled_lapic_found; |
@@ -815,6 +830,7 @@ struct kvm_vm_stat { | |||
815 | ulong mmu_unsync; | 830 | ulong mmu_unsync; |
816 | ulong remote_tlb_flush; | 831 | ulong remote_tlb_flush; |
817 | ulong lpages; | 832 | ulong lpages; |
833 | ulong max_mmu_page_hash_collisions; | ||
818 | }; | 834 | }; |
819 | 835 | ||
820 | struct kvm_vcpu_stat { | 836 | struct kvm_vcpu_stat { |
@@ -844,6 +860,7 @@ struct kvm_vcpu_stat { | |||
844 | u64 hypercalls; | 860 | u64 hypercalls; |
845 | u64 irq_injections; | 861 | u64 irq_injections; |
846 | u64 nmi_injections; | 862 | u64 nmi_injections; |
863 | u64 req_event; | ||
847 | }; | 864 | }; |
848 | 865 | ||
849 | struct x86_instruction_info; | 866 | struct x86_instruction_info; |
@@ -918,8 +935,6 @@ struct kvm_x86_ops { | |||
918 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); | 935 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); |
919 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | 936 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); |
920 | u32 (*get_pkru)(struct kvm_vcpu *vcpu); | 937 | u32 (*get_pkru)(struct kvm_vcpu *vcpu); |
921 | void (*fpu_activate)(struct kvm_vcpu *vcpu); | ||
922 | void (*fpu_deactivate)(struct kvm_vcpu *vcpu); | ||
923 | 938 | ||
924 | void (*tlb_flush)(struct kvm_vcpu *vcpu); | 939 | void (*tlb_flush)(struct kvm_vcpu *vcpu); |
925 | 940 | ||
@@ -951,7 +966,7 @@ struct kvm_x86_ops { | |||
951 | void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); | 966 | void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); |
952 | void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); | 967 | void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); |
953 | void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); | 968 | void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); |
954 | void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); | 969 | int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); |
955 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); | 970 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); |
956 | int (*get_tdp_level)(void); | 971 | int (*get_tdp_level)(void); |
957 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); | 972 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); |
@@ -1050,7 +1065,8 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu); | |||
1050 | void kvm_mmu_init_vm(struct kvm *kvm); | 1065 | void kvm_mmu_init_vm(struct kvm *kvm); |
1051 | void kvm_mmu_uninit_vm(struct kvm *kvm); | 1066 | void kvm_mmu_uninit_vm(struct kvm *kvm); |
1052 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | 1067 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, |
1053 | u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask); | 1068 | u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask, |
1069 | u64 acc_track_mask); | ||
1054 | 1070 | ||
1055 | void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); | 1071 | void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); |
1056 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, | 1072 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, |
diff --git a/arch/x86/include/asm/kvmclock.h b/arch/x86/include/asm/kvmclock.h new file mode 100644 index 000000000000..f260bef63591 --- /dev/null +++ b/arch/x86/include/asm/kvmclock.h | |||
@@ -0,0 +1,6 @@ | |||
1 | #ifndef _ASM_X86_KVM_CLOCK_H | ||
2 | #define _ASM_X86_KVM_CLOCK_H | ||
3 | |||
4 | extern struct clocksource kvm_clock; | ||
5 | |||
6 | #endif /* _ASM_X86_KVM_CLOCK_H */ | ||
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 1eea6ca40694..f75fbfe550f2 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -673,7 +673,7 @@ static __always_inline void pv_kick(int cpu) | |||
673 | PVOP_VCALL1(pv_lock_ops.kick, cpu); | 673 | PVOP_VCALL1(pv_lock_ops.kick, cpu); |
674 | } | 674 | } |
675 | 675 | ||
676 | static __always_inline bool pv_vcpu_is_preempted(int cpu) | 676 | static __always_inline bool pv_vcpu_is_preempted(long cpu) |
677 | { | 677 | { |
678 | return PVOP_CALLEE1(bool, pv_lock_ops.vcpu_is_preempted, cpu); | 678 | return PVOP_CALLEE1(bool, pv_lock_ops.vcpu_is_preempted, cpu); |
679 | } | 679 | } |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index e6cfe7ba2d65..f385eca5407a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -304,7 +304,7 @@ struct x86_hw_tss { | |||
304 | u16 reserved5; | 304 | u16 reserved5; |
305 | u16 io_bitmap_base; | 305 | u16 io_bitmap_base; |
306 | 306 | ||
307 | } __attribute__((packed)) ____cacheline_aligned; | 307 | } __attribute__((packed)); |
308 | #endif | 308 | #endif |
309 | 309 | ||
310 | /* | 310 | /* |
@@ -342,6 +342,16 @@ struct tss_struct { | |||
342 | 342 | ||
343 | DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); | 343 | DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); |
344 | 344 | ||
345 | /* | ||
346 | * sizeof(unsigned long) coming from an extra "long" at the end | ||
347 | * of the iobitmap. | ||
348 | * | ||
349 | * -1? seg base+limit should be pointing to the address of the | ||
350 | * last valid byte | ||
351 | */ | ||
352 | #define __KERNEL_TSS_LIMIT \ | ||
353 | (IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1) | ||
354 | |||
345 | #ifdef CONFIG_X86_32 | 355 | #ifdef CONFIG_X86_32 |
346 | DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); | 356 | DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); |
347 | #endif | 357 | #endif |
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index c343ab52579f..48a706f641f2 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h | |||
@@ -34,7 +34,7 @@ static inline void queued_spin_unlock(struct qspinlock *lock) | |||
34 | } | 34 | } |
35 | 35 | ||
36 | #define vcpu_is_preempted vcpu_is_preempted | 36 | #define vcpu_is_preempted vcpu_is_preempted |
37 | static inline bool vcpu_is_preempted(int cpu) | 37 | static inline bool vcpu_is_preempted(long cpu) |
38 | { | 38 | { |
39 | return pv_vcpu_is_preempted(cpu); | 39 | return pv_vcpu_is_preempted(cpu); |
40 | } | 40 | } |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 2b5b2d4b924e..cc54b7026567 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -467,8 +467,16 @@ enum vmcs_field { | |||
467 | #define VMX_EPT_WRITABLE_MASK 0x2ull | 467 | #define VMX_EPT_WRITABLE_MASK 0x2ull |
468 | #define VMX_EPT_EXECUTABLE_MASK 0x4ull | 468 | #define VMX_EPT_EXECUTABLE_MASK 0x4ull |
469 | #define VMX_EPT_IPAT_BIT (1ull << 6) | 469 | #define VMX_EPT_IPAT_BIT (1ull << 6) |
470 | #define VMX_EPT_ACCESS_BIT (1ull << 8) | 470 | #define VMX_EPT_ACCESS_BIT (1ull << 8) |
471 | #define VMX_EPT_DIRTY_BIT (1ull << 9) | 471 | #define VMX_EPT_DIRTY_BIT (1ull << 9) |
472 | #define VMX_EPT_RWX_MASK (VMX_EPT_READABLE_MASK | \ | ||
473 | VMX_EPT_WRITABLE_MASK | \ | ||
474 | VMX_EPT_EXECUTABLE_MASK) | ||
475 | #define VMX_EPT_MT_MASK (7ull << VMX_EPT_MT_EPTE_SHIFT) | ||
476 | |||
477 | /* The mask to use to trigger an EPT Misconfiguration in order to track MMIO */ | ||
478 | #define VMX_EPT_MISCONFIG_WX_VALUE (VMX_EPT_WRITABLE_MASK | \ | ||
479 | VMX_EPT_EXECUTABLE_MASK) | ||
472 | 480 | ||
473 | #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul | 481 | #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul |
474 | 482 | ||
@@ -500,6 +508,22 @@ struct vmx_msr_entry { | |||
500 | #define ENTRY_FAIL_VMCS_LINK_PTR 4 | 508 | #define ENTRY_FAIL_VMCS_LINK_PTR 4 |
501 | 509 | ||
502 | /* | 510 | /* |
511 | * Exit Qualifications for EPT Violations | ||
512 | */ | ||
513 | #define EPT_VIOLATION_ACC_READ_BIT 0 | ||
514 | #define EPT_VIOLATION_ACC_WRITE_BIT 1 | ||
515 | #define EPT_VIOLATION_ACC_INSTR_BIT 2 | ||
516 | #define EPT_VIOLATION_READABLE_BIT 3 | ||
517 | #define EPT_VIOLATION_WRITABLE_BIT 4 | ||
518 | #define EPT_VIOLATION_EXECUTABLE_BIT 5 | ||
519 | #define EPT_VIOLATION_ACC_READ (1 << EPT_VIOLATION_ACC_READ_BIT) | ||
520 | #define EPT_VIOLATION_ACC_WRITE (1 << EPT_VIOLATION_ACC_WRITE_BIT) | ||
521 | #define EPT_VIOLATION_ACC_INSTR (1 << EPT_VIOLATION_ACC_INSTR_BIT) | ||
522 | #define EPT_VIOLATION_READABLE (1 << EPT_VIOLATION_READABLE_BIT) | ||
523 | #define EPT_VIOLATION_WRITABLE (1 << EPT_VIOLATION_WRITABLE_BIT) | ||
524 | #define EPT_VIOLATION_EXECUTABLE (1 << EPT_VIOLATION_EXECUTABLE_BIT) | ||
525 | |||
526 | /* | ||
503 | * VM-instruction error numbers | 527 | * VM-instruction error numbers |
504 | */ | 528 | */ |
505 | enum vm_instruction_error_number { | 529 | enum vm_instruction_error_number { |
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 1421a6585126..cff0bb6556f8 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h | |||
@@ -50,6 +50,15 @@ struct kvm_steal_time { | |||
50 | __u32 pad[11]; | 50 | __u32 pad[11]; |
51 | }; | 51 | }; |
52 | 52 | ||
53 | #define KVM_CLOCK_PAIRING_WALLCLOCK 0 | ||
54 | struct kvm_clock_pairing { | ||
55 | __s64 sec; | ||
56 | __s64 nsec; | ||
57 | __u64 tsc; | ||
58 | __u32 flags; | ||
59 | __u32 pad[9]; | ||
60 | }; | ||
61 | |||
53 | #define KVM_STEAL_ALIGNMENT_BITS 5 | 62 | #define KVM_STEAL_ALIGNMENT_BITS 5 |
54 | #define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1))) | 63 | #define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1))) |
55 | #define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1) | 64 | #define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1) |
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 210927ee2e74..99332f550c48 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
@@ -13,6 +13,10 @@ static char syscalls_ia32[] = { | |||
13 | #include <asm/syscalls_32.h> | 13 | #include <asm/syscalls_32.h> |
14 | }; | 14 | }; |
15 | 15 | ||
16 | #if defined(CONFIG_KVM_GUEST) && defined(CONFIG_PARAVIRT_SPINLOCKS) | ||
17 | #include <asm/kvm_para.h> | ||
18 | #endif | ||
19 | |||
16 | int main(void) | 20 | int main(void) |
17 | { | 21 | { |
18 | #ifdef CONFIG_PARAVIRT | 22 | #ifdef CONFIG_PARAVIRT |
@@ -22,6 +26,11 @@ int main(void) | |||
22 | BLANK(); | 26 | BLANK(); |
23 | #endif | 27 | #endif |
24 | 28 | ||
29 | #if defined(CONFIG_KVM_GUEST) && defined(CONFIG_PARAVIRT_SPINLOCKS) | ||
30 | OFFSET(KVM_STEAL_TIME_preempted, kvm_steal_time, preempted); | ||
31 | BLANK(); | ||
32 | #endif | ||
33 | |||
25 | #define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry) | 34 | #define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry) |
26 | ENTRY(bx); | 35 | ENTRY(bx); |
27 | ENTRY(cx); | 36 | ENTRY(cx); |
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 589b3193f102..b01bc8517450 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/syscalls.h> | 16 | #include <linux/syscalls.h> |
17 | #include <linux/bitmap.h> | 17 | #include <linux/bitmap.h> |
18 | #include <asm/syscalls.h> | 18 | #include <asm/syscalls.h> |
19 | #include <asm/desc.h> | ||
19 | 20 | ||
20 | /* | 21 | /* |
21 | * this changes the io permissions bitmap in the current task. | 22 | * this changes the io permissions bitmap in the current task. |
@@ -45,6 +46,10 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) | |||
45 | memset(bitmap, 0xff, IO_BITMAP_BYTES); | 46 | memset(bitmap, 0xff, IO_BITMAP_BYTES); |
46 | t->io_bitmap_ptr = bitmap; | 47 | t->io_bitmap_ptr = bitmap; |
47 | set_thread_flag(TIF_IO_BITMAP); | 48 | set_thread_flag(TIF_IO_BITMAP); |
49 | |||
50 | preempt_disable(); | ||
51 | refresh_TR(); | ||
52 | preempt_enable(); | ||
48 | } | 53 | } |
49 | 54 | ||
50 | /* | 55 | /* |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 099fcba4981d..14f65a5f938e 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -589,7 +589,8 @@ out: | |||
589 | local_irq_restore(flags); | 589 | local_irq_restore(flags); |
590 | } | 590 | } |
591 | 591 | ||
592 | __visible bool __kvm_vcpu_is_preempted(int cpu) | 592 | #ifdef CONFIG_X86_32 |
593 | __visible bool __kvm_vcpu_is_preempted(long cpu) | ||
593 | { | 594 | { |
594 | struct kvm_steal_time *src = &per_cpu(steal_time, cpu); | 595 | struct kvm_steal_time *src = &per_cpu(steal_time, cpu); |
595 | 596 | ||
@@ -597,6 +598,29 @@ __visible bool __kvm_vcpu_is_preempted(int cpu) | |||
597 | } | 598 | } |
598 | PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted); | 599 | PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted); |
599 | 600 | ||
601 | #else | ||
602 | |||
603 | #include <asm/asm-offsets.h> | ||
604 | |||
605 | extern bool __raw_callee_save___kvm_vcpu_is_preempted(long); | ||
606 | |||
607 | /* | ||
608 | * Hand-optimize version for x86-64 to avoid 8 64-bit register saving and | ||
609 | * restoring to/from the stack. | ||
610 | */ | ||
611 | asm( | ||
612 | ".pushsection .text;" | ||
613 | ".global __raw_callee_save___kvm_vcpu_is_preempted;" | ||
614 | ".type __raw_callee_save___kvm_vcpu_is_preempted, @function;" | ||
615 | "__raw_callee_save___kvm_vcpu_is_preempted:" | ||
616 | "movq __per_cpu_offset(,%rdi,8), %rax;" | ||
617 | "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);" | ||
618 | "setne %al;" | ||
619 | "ret;" | ||
620 | ".popsection"); | ||
621 | |||
622 | #endif | ||
623 | |||
600 | /* | 624 | /* |
601 | * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present. | 625 | * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present. |
602 | */ | 626 | */ |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 542710b99f52..bae6ea6cfb94 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -28,6 +28,7 @@ | |||
28 | 28 | ||
29 | #include <asm/x86_init.h> | 29 | #include <asm/x86_init.h> |
30 | #include <asm/reboot.h> | 30 | #include <asm/reboot.h> |
31 | #include <asm/kvmclock.h> | ||
31 | 32 | ||
32 | static int kvmclock __ro_after_init = 1; | 33 | static int kvmclock __ro_after_init = 1; |
33 | static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; | 34 | static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; |
@@ -49,6 +50,7 @@ struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void) | |||
49 | { | 50 | { |
50 | return hv_clock; | 51 | return hv_clock; |
51 | } | 52 | } |
53 | EXPORT_SYMBOL_GPL(pvclock_pvti_cpu0_va); | ||
52 | 54 | ||
53 | /* | 55 | /* |
54 | * The wallclock is the time of day when we booted. Since then, some time may | 56 | * The wallclock is the time of day when we booted. Since then, some time may |
@@ -174,13 +176,14 @@ bool kvm_check_and_clear_guest_paused(void) | |||
174 | return ret; | 176 | return ret; |
175 | } | 177 | } |
176 | 178 | ||
177 | static struct clocksource kvm_clock = { | 179 | struct clocksource kvm_clock = { |
178 | .name = "kvm-clock", | 180 | .name = "kvm-clock", |
179 | .read = kvm_clock_get_cycles, | 181 | .read = kvm_clock_get_cycles, |
180 | .rating = 400, | 182 | .rating = 400, |
181 | .mask = CLOCKSOURCE_MASK(64), | 183 | .mask = CLOCKSOURCE_MASK(64), |
182 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 184 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
183 | }; | 185 | }; |
186 | EXPORT_SYMBOL_GPL(kvm_clock); | ||
184 | 187 | ||
185 | int kvm_register_clock(char *txt) | 188 | int kvm_register_clock(char *txt) |
186 | { | 189 | { |
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 6259327f3454..8f2d1c9d43a8 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c | |||
@@ -20,7 +20,7 @@ bool pv_is_native_spin_unlock(void) | |||
20 | __raw_callee_save___native_queued_spin_unlock; | 20 | __raw_callee_save___native_queued_spin_unlock; |
21 | } | 21 | } |
22 | 22 | ||
23 | __visible bool __native_vcpu_is_preempted(int cpu) | 23 | __visible bool __native_vcpu_is_preempted(long cpu) |
24 | { | 24 | { |
25 | return false; | 25 | return false; |
26 | } | 26 | } |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index b615a1113f58..7780efa635b9 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <asm/mce.h> | 32 | #include <asm/mce.h> |
33 | #include <asm/vm86.h> | 33 | #include <asm/vm86.h> |
34 | #include <asm/switch_to.h> | 34 | #include <asm/switch_to.h> |
35 | #include <asm/desc.h> | ||
35 | 36 | ||
36 | /* | 37 | /* |
37 | * per-CPU TSS segments. Threads are completely 'soft' on Linux, | 38 | * per-CPU TSS segments. Threads are completely 'soft' on Linux, |
@@ -64,6 +65,9 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { | |||
64 | }; | 65 | }; |
65 | EXPORT_PER_CPU_SYMBOL(cpu_tss); | 66 | EXPORT_PER_CPU_SYMBOL(cpu_tss); |
66 | 67 | ||
68 | DEFINE_PER_CPU(bool, need_tr_refresh); | ||
69 | EXPORT_PER_CPU_SYMBOL_GPL(need_tr_refresh); | ||
70 | |||
67 | /* | 71 | /* |
68 | * this gets called so that we can store lazy state into memory and copy the | 72 | * this gets called so that we can store lazy state into memory and copy the |
69 | * current task into the new thread. | 73 | * current task into the new thread. |
@@ -209,6 +213,12 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
209 | */ | 213 | */ |
210 | memcpy(tss->io_bitmap, next->io_bitmap_ptr, | 214 | memcpy(tss->io_bitmap, next->io_bitmap_ptr, |
211 | max(prev->io_bitmap_max, next->io_bitmap_max)); | 215 | max(prev->io_bitmap_max, next->io_bitmap_max)); |
216 | |||
217 | /* | ||
218 | * Make sure that the TSS limit is correct for the CPU | ||
219 | * to notice the IO bitmap. | ||
220 | */ | ||
221 | refresh_TR(); | ||
212 | } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { | 222 | } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { |
213 | /* | 223 | /* |
214 | * Clear any possible leftover bits: | 224 | * Clear any possible leftover bits: |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index e85f6bd7b9d5..1d155cc56629 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -123,8 +123,6 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) | |||
123 | if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) | 123 | if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) |
124 | best->ebx = xstate_required_size(vcpu->arch.xcr0, true); | 124 | best->ebx = xstate_required_size(vcpu->arch.xcr0, true); |
125 | 125 | ||
126 | kvm_x86_ops->fpu_activate(vcpu); | ||
127 | |||
128 | /* | 126 | /* |
129 | * The existing code assumes virtual address is 48-bit in the canonical | 127 | * The existing code assumes virtual address is 48-bit in the canonical |
130 | * address checks; exit if it is ever changed. | 128 | * address checks; exit if it is ever changed. |
@@ -383,7 +381,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
383 | 381 | ||
384 | /* cpuid 7.0.ecx*/ | 382 | /* cpuid 7.0.ecx*/ |
385 | const u32 kvm_cpuid_7_0_ecx_x86_features = | 383 | const u32 kvm_cpuid_7_0_ecx_x86_features = |
386 | F(AVX512VBMI) | F(PKU) | 0 /*OSPKE*/; | 384 | F(AVX512VBMI) | F(PKU) | 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ); |
387 | 385 | ||
388 | /* cpuid 7.0.edx*/ | 386 | /* cpuid 7.0.edx*/ |
389 | const u32 kvm_cpuid_7_0_edx_x86_features = | 387 | const u32 kvm_cpuid_7_0_edx_x86_features = |
@@ -861,12 +859,6 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) | |||
861 | if (!best) | 859 | if (!best) |
862 | best = check_cpuid_limit(vcpu, function, index); | 860 | best = check_cpuid_limit(vcpu, function, index); |
863 | 861 | ||
864 | /* | ||
865 | * Perfmon not yet supported for L2 guest. | ||
866 | */ | ||
867 | if (is_guest_mode(vcpu) && function == 0xa) | ||
868 | best = NULL; | ||
869 | |||
870 | if (best) { | 862 | if (best) { |
871 | *eax = best->eax; | 863 | *eax = best->eax; |
872 | *ebx = best->ebx; | 864 | *ebx = best->ebx; |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index cedbba0f3402..45c7306c8780 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -173,6 +173,7 @@ | |||
173 | #define NearBranch ((u64)1 << 52) /* Near branches */ | 173 | #define NearBranch ((u64)1 << 52) /* Near branches */ |
174 | #define No16 ((u64)1 << 53) /* No 16 bit operand */ | 174 | #define No16 ((u64)1 << 53) /* No 16 bit operand */ |
175 | #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */ | 175 | #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */ |
176 | #define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */ | ||
176 | 177 | ||
177 | #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) | 178 | #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) |
178 | 179 | ||
@@ -4298,7 +4299,7 @@ static const struct opcode group1[] = { | |||
4298 | }; | 4299 | }; |
4299 | 4300 | ||
4300 | static const struct opcode group1A[] = { | 4301 | static const struct opcode group1A[] = { |
4301 | I(DstMem | SrcNone | Mov | Stack | IncSP, em_pop), N, N, N, N, N, N, N, | 4302 | I(DstMem | SrcNone | Mov | Stack | IncSP | TwoMemOp, em_pop), N, N, N, N, N, N, N, |
4302 | }; | 4303 | }; |
4303 | 4304 | ||
4304 | static const struct opcode group2[] = { | 4305 | static const struct opcode group2[] = { |
@@ -4336,7 +4337,7 @@ static const struct opcode group5[] = { | |||
4336 | I(SrcMemFAddr | ImplicitOps, em_call_far), | 4337 | I(SrcMemFAddr | ImplicitOps, em_call_far), |
4337 | I(SrcMem | NearBranch, em_jmp_abs), | 4338 | I(SrcMem | NearBranch, em_jmp_abs), |
4338 | I(SrcMemFAddr | ImplicitOps, em_jmp_far), | 4339 | I(SrcMemFAddr | ImplicitOps, em_jmp_far), |
4339 | I(SrcMem | Stack, em_push), D(Undefined), | 4340 | I(SrcMem | Stack | TwoMemOp, em_push), D(Undefined), |
4340 | }; | 4341 | }; |
4341 | 4342 | ||
4342 | static const struct opcode group6[] = { | 4343 | static const struct opcode group6[] = { |
@@ -4556,8 +4557,8 @@ static const struct opcode opcode_table[256] = { | |||
4556 | /* 0xA0 - 0xA7 */ | 4557 | /* 0xA0 - 0xA7 */ |
4557 | I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), | 4558 | I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), |
4558 | I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), | 4559 | I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), |
4559 | I2bv(SrcSI | DstDI | Mov | String, em_mov), | 4560 | I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov), |
4560 | F2bv(SrcSI | DstDI | String | NoWrite, em_cmp_r), | 4561 | F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r), |
4561 | /* 0xA8 - 0xAF */ | 4562 | /* 0xA8 - 0xAF */ |
4562 | F2bv(DstAcc | SrcImm | NoWrite, em_test), | 4563 | F2bv(DstAcc | SrcImm | NoWrite, em_test), |
4563 | I2bv(SrcAcc | DstDI | Mov | String, em_mov), | 4564 | I2bv(SrcAcc | DstDI | Mov | String, em_mov), |
@@ -5671,3 +5672,14 @@ void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt) | |||
5671 | { | 5672 | { |
5672 | writeback_registers(ctxt); | 5673 | writeback_registers(ctxt); |
5673 | } | 5674 | } |
5675 | |||
5676 | bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt) | ||
5677 | { | ||
5678 | if (ctxt->rep_prefix && (ctxt->d & String)) | ||
5679 | return false; | ||
5680 | |||
5681 | if (ctxt->d & TwoMemOp) | ||
5682 | return false; | ||
5683 | |||
5684 | return true; | ||
5685 | } | ||
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 2ecd7dab4631..f701d4430727 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c | |||
@@ -305,13 +305,13 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) | |||
305 | return -ENOENT; | 305 | return -ENOENT; |
306 | 306 | ||
307 | memset(&irq, 0, sizeof(irq)); | 307 | memset(&irq, 0, sizeof(irq)); |
308 | irq.dest_id = kvm_apic_id(vcpu->arch.apic); | 308 | irq.shorthand = APIC_DEST_SELF; |
309 | irq.dest_mode = APIC_DEST_PHYSICAL; | 309 | irq.dest_mode = APIC_DEST_PHYSICAL; |
310 | irq.delivery_mode = APIC_DM_FIXED; | 310 | irq.delivery_mode = APIC_DM_FIXED; |
311 | irq.vector = vector; | 311 | irq.vector = vector; |
312 | irq.level = 1; | 312 | irq.level = 1; |
313 | 313 | ||
314 | ret = kvm_irq_delivery_to_apic(vcpu->kvm, NULL, &irq, NULL); | 314 | ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq, NULL); |
315 | trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret); | 315 | trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret); |
316 | return ret; | 316 | return ret; |
317 | } | 317 | } |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 7cc2360f1848..73ea24d4f119 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -598,14 +598,14 @@ static const struct kvm_io_device_ops picdev_eclr_ops = { | |||
598 | .write = picdev_eclr_write, | 598 | .write = picdev_eclr_write, |
599 | }; | 599 | }; |
600 | 600 | ||
601 | struct kvm_pic *kvm_create_pic(struct kvm *kvm) | 601 | int kvm_pic_init(struct kvm *kvm) |
602 | { | 602 | { |
603 | struct kvm_pic *s; | 603 | struct kvm_pic *s; |
604 | int ret; | 604 | int ret; |
605 | 605 | ||
606 | s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); | 606 | s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); |
607 | if (!s) | 607 | if (!s) |
608 | return NULL; | 608 | return -ENOMEM; |
609 | spin_lock_init(&s->lock); | 609 | spin_lock_init(&s->lock); |
610 | s->kvm = kvm; | 610 | s->kvm = kvm; |
611 | s->pics[0].elcr_mask = 0xf8; | 611 | s->pics[0].elcr_mask = 0xf8; |
@@ -635,7 +635,9 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) | |||
635 | 635 | ||
636 | mutex_unlock(&kvm->slots_lock); | 636 | mutex_unlock(&kvm->slots_lock); |
637 | 637 | ||
638 | return s; | 638 | kvm->arch.vpic = s; |
639 | |||
640 | return 0; | ||
639 | 641 | ||
640 | fail_unreg_1: | 642 | fail_unreg_1: |
641 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_slave); | 643 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_slave); |
@@ -648,13 +650,17 @@ fail_unlock: | |||
648 | 650 | ||
649 | kfree(s); | 651 | kfree(s); |
650 | 652 | ||
651 | return NULL; | 653 | return ret; |
652 | } | 654 | } |
653 | 655 | ||
654 | void kvm_destroy_pic(struct kvm_pic *vpic) | 656 | void kvm_pic_destroy(struct kvm *kvm) |
655 | { | 657 | { |
658 | struct kvm_pic *vpic = kvm->arch.vpic; | ||
659 | |||
656 | kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master); | 660 | kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master); |
657 | kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave); | 661 | kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave); |
658 | kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr); | 662 | kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr); |
663 | |||
664 | kvm->arch.vpic = NULL; | ||
659 | kfree(vpic); | 665 | kfree(vpic); |
660 | } | 666 | } |
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 035731eb3897..40d5b2cf6061 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -73,8 +73,8 @@ struct kvm_pic { | |||
73 | unsigned long irq_states[PIC_NUM_PINS]; | 73 | unsigned long irq_states[PIC_NUM_PINS]; |
74 | }; | 74 | }; |
75 | 75 | ||
76 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); | 76 | int kvm_pic_init(struct kvm *kvm); |
77 | void kvm_destroy_pic(struct kvm_pic *vpic); | 77 | void kvm_pic_destroy(struct kvm *kvm); |
78 | int kvm_pic_read_irq(struct kvm *kvm); | 78 | int kvm_pic_read_irq(struct kvm *kvm); |
79 | void kvm_pic_update_irq(struct kvm_pic *s); | 79 | void kvm_pic_update_irq(struct kvm_pic *s); |
80 | 80 | ||
@@ -93,18 +93,19 @@ static inline int pic_in_kernel(struct kvm *kvm) | |||
93 | 93 | ||
94 | static inline int irqchip_split(struct kvm *kvm) | 94 | static inline int irqchip_split(struct kvm *kvm) |
95 | { | 95 | { |
96 | return kvm->arch.irqchip_split; | 96 | return kvm->arch.irqchip_mode == KVM_IRQCHIP_SPLIT; |
97 | } | 97 | } |
98 | 98 | ||
99 | static inline int irqchip_in_kernel(struct kvm *kvm) | 99 | static inline int irqchip_kernel(struct kvm *kvm) |
100 | { | 100 | { |
101 | struct kvm_pic *vpic = pic_irqchip(kvm); | 101 | return kvm->arch.irqchip_mode == KVM_IRQCHIP_KERNEL; |
102 | bool ret; | 102 | } |
103 | 103 | ||
104 | ret = (vpic != NULL); | 104 | static inline int irqchip_in_kernel(struct kvm *kvm) |
105 | ret |= irqchip_split(kvm); | 105 | { |
106 | bool ret = kvm->arch.irqchip_mode != KVM_IRQCHIP_NONE; | ||
106 | 107 | ||
107 | /* Read vpic before kvm->irq_routing. */ | 108 | /* Matches with wmb after initializing kvm->irq_routing. */ |
108 | smp_rmb(); | 109 | smp_rmb(); |
109 | return ret; | 110 | return ret; |
110 | } | 111 | } |
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 6c0191615f23..b96d3893f121 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c | |||
@@ -41,15 +41,6 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, | |||
41 | bool line_status) | 41 | bool line_status) |
42 | { | 42 | { |
43 | struct kvm_pic *pic = pic_irqchip(kvm); | 43 | struct kvm_pic *pic = pic_irqchip(kvm); |
44 | |||
45 | /* | ||
46 | * XXX: rejecting pic routes when pic isn't in use would be better, | ||
47 | * but the default routing table is installed while kvm->arch.vpic is | ||
48 | * NULL and KVM_CREATE_IRQCHIP can race with KVM_IRQ_LINE. | ||
49 | */ | ||
50 | if (!pic) | ||
51 | return -1; | ||
52 | |||
53 | return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level); | 44 | return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level); |
54 | } | 45 | } |
55 | 46 | ||
@@ -58,10 +49,6 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, | |||
58 | bool line_status) | 49 | bool line_status) |
59 | { | 50 | { |
60 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | 51 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
61 | |||
62 | if (!ioapic) | ||
63 | return -1; | ||
64 | |||
65 | return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level, | 52 | return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level, |
66 | line_status); | 53 | line_status); |
67 | } | 54 | } |
@@ -297,16 +284,20 @@ int kvm_set_routing_entry(struct kvm *kvm, | |||
297 | case KVM_IRQ_ROUTING_IRQCHIP: | 284 | case KVM_IRQ_ROUTING_IRQCHIP: |
298 | delta = 0; | 285 | delta = 0; |
299 | switch (ue->u.irqchip.irqchip) { | 286 | switch (ue->u.irqchip.irqchip) { |
300 | case KVM_IRQCHIP_PIC_MASTER: | ||
301 | e->set = kvm_set_pic_irq; | ||
302 | max_pin = PIC_NUM_PINS; | ||
303 | break; | ||
304 | case KVM_IRQCHIP_PIC_SLAVE: | 287 | case KVM_IRQCHIP_PIC_SLAVE: |
288 | delta = 8; | ||
289 | /* fall through */ | ||
290 | case KVM_IRQCHIP_PIC_MASTER: | ||
291 | if (!pic_in_kernel(kvm)) | ||
292 | goto out; | ||
293 | |||
305 | e->set = kvm_set_pic_irq; | 294 | e->set = kvm_set_pic_irq; |
306 | max_pin = PIC_NUM_PINS; | 295 | max_pin = PIC_NUM_PINS; |
307 | delta = 8; | ||
308 | break; | 296 | break; |
309 | case KVM_IRQCHIP_IOAPIC: | 297 | case KVM_IRQCHIP_IOAPIC: |
298 | if (!ioapic_in_kernel(kvm)) | ||
299 | goto out; | ||
300 | |||
310 | max_pin = KVM_IOAPIC_NUM_PINS; | 301 | max_pin = KVM_IOAPIC_NUM_PINS; |
311 | e->set = kvm_set_ioapic_irq; | 302 | e->set = kvm_set_ioapic_irq; |
312 | break; | 303 | break; |
@@ -409,7 +400,7 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm) | |||
409 | 400 | ||
410 | void kvm_arch_post_irq_routing_update(struct kvm *kvm) | 401 | void kvm_arch_post_irq_routing_update(struct kvm *kvm) |
411 | { | 402 | { |
412 | if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm)) | 403 | if (!irqchip_split(kvm)) |
413 | return; | 404 | return; |
414 | kvm_make_scan_ioapic_request(kvm); | 405 | kvm_make_scan_ioapic_request(kvm); |
415 | } | 406 | } |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 2f6ef5121a4c..bad6a25067bc 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -115,6 +115,16 @@ static inline int apic_enabled(struct kvm_lapic *apic) | |||
115 | (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ | 115 | (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ |
116 | APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) | 116 | APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) |
117 | 117 | ||
118 | static inline u8 kvm_xapic_id(struct kvm_lapic *apic) | ||
119 | { | ||
120 | return kvm_lapic_get_reg(apic, APIC_ID) >> 24; | ||
121 | } | ||
122 | |||
123 | static inline u32 kvm_x2apic_id(struct kvm_lapic *apic) | ||
124 | { | ||
125 | return apic->vcpu->vcpu_id; | ||
126 | } | ||
127 | |||
118 | static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map, | 128 | static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map, |
119 | u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) { | 129 | u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) { |
120 | switch (map->mode) { | 130 | switch (map->mode) { |
@@ -159,13 +169,13 @@ static void recalculate_apic_map(struct kvm *kvm) | |||
159 | struct kvm_apic_map *new, *old = NULL; | 169 | struct kvm_apic_map *new, *old = NULL; |
160 | struct kvm_vcpu *vcpu; | 170 | struct kvm_vcpu *vcpu; |
161 | int i; | 171 | int i; |
162 | u32 max_id = 255; | 172 | u32 max_id = 255; /* enough space for any xAPIC ID */ |
163 | 173 | ||
164 | mutex_lock(&kvm->arch.apic_map_lock); | 174 | mutex_lock(&kvm->arch.apic_map_lock); |
165 | 175 | ||
166 | kvm_for_each_vcpu(i, vcpu, kvm) | 176 | kvm_for_each_vcpu(i, vcpu, kvm) |
167 | if (kvm_apic_present(vcpu)) | 177 | if (kvm_apic_present(vcpu)) |
168 | max_id = max(max_id, kvm_apic_id(vcpu->arch.apic)); | 178 | max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic)); |
169 | 179 | ||
170 | new = kvm_kvzalloc(sizeof(struct kvm_apic_map) + | 180 | new = kvm_kvzalloc(sizeof(struct kvm_apic_map) + |
171 | sizeof(struct kvm_lapic *) * ((u64)max_id + 1)); | 181 | sizeof(struct kvm_lapic *) * ((u64)max_id + 1)); |
@@ -179,16 +189,28 @@ static void recalculate_apic_map(struct kvm *kvm) | |||
179 | struct kvm_lapic *apic = vcpu->arch.apic; | 189 | struct kvm_lapic *apic = vcpu->arch.apic; |
180 | struct kvm_lapic **cluster; | 190 | struct kvm_lapic **cluster; |
181 | u16 mask; | 191 | u16 mask; |
182 | u32 ldr, aid; | 192 | u32 ldr; |
193 | u8 xapic_id; | ||
194 | u32 x2apic_id; | ||
183 | 195 | ||
184 | if (!kvm_apic_present(vcpu)) | 196 | if (!kvm_apic_present(vcpu)) |
185 | continue; | 197 | continue; |
186 | 198 | ||
187 | aid = kvm_apic_id(apic); | 199 | xapic_id = kvm_xapic_id(apic); |
188 | ldr = kvm_lapic_get_reg(apic, APIC_LDR); | 200 | x2apic_id = kvm_x2apic_id(apic); |
189 | 201 | ||
190 | if (aid <= new->max_apic_id) | 202 | /* Hotplug hack: see kvm_apic_match_physical_addr(), ... */ |
191 | new->phys_map[aid] = apic; | 203 | if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) && |
204 | x2apic_id <= new->max_apic_id) | ||
205 | new->phys_map[x2apic_id] = apic; | ||
206 | /* | ||
207 | * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around, | ||
208 | * prevent them from masking VCPUs with APIC ID <= 0xff. | ||
209 | */ | ||
210 | if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id]) | ||
211 | new->phys_map[xapic_id] = apic; | ||
212 | |||
213 | ldr = kvm_lapic_get_reg(apic, APIC_LDR); | ||
192 | 214 | ||
193 | if (apic_x2apic_mode(apic)) { | 215 | if (apic_x2apic_mode(apic)) { |
194 | new->mode |= KVM_APIC_MODE_X2APIC; | 216 | new->mode |= KVM_APIC_MODE_X2APIC; |
@@ -250,6 +272,8 @@ static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) | |||
250 | { | 272 | { |
251 | u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); | 273 | u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); |
252 | 274 | ||
275 | WARN_ON_ONCE(id != apic->vcpu->vcpu_id); | ||
276 | |||
253 | kvm_lapic_set_reg(apic, APIC_ID, id); | 277 | kvm_lapic_set_reg(apic, APIC_ID, id); |
254 | kvm_lapic_set_reg(apic, APIC_LDR, ldr); | 278 | kvm_lapic_set_reg(apic, APIC_LDR, ldr); |
255 | recalculate_apic_map(apic->vcpu->kvm); | 279 | recalculate_apic_map(apic->vcpu->kvm); |
@@ -317,7 +341,7 @@ static int find_highest_vector(void *bitmap) | |||
317 | vec >= 0; vec -= APIC_VECTORS_PER_REG) { | 341 | vec >= 0; vec -= APIC_VECTORS_PER_REG) { |
318 | reg = bitmap + REG_POS(vec); | 342 | reg = bitmap + REG_POS(vec); |
319 | if (*reg) | 343 | if (*reg) |
320 | return fls(*reg) - 1 + vec; | 344 | return __fls(*reg) + vec; |
321 | } | 345 | } |
322 | 346 | ||
323 | return -1; | 347 | return -1; |
@@ -337,27 +361,32 @@ static u8 count_vectors(void *bitmap) | |||
337 | return count; | 361 | return count; |
338 | } | 362 | } |
339 | 363 | ||
340 | void __kvm_apic_update_irr(u32 *pir, void *regs) | 364 | int __kvm_apic_update_irr(u32 *pir, void *regs) |
341 | { | 365 | { |
342 | u32 i, pir_val; | 366 | u32 i, vec; |
367 | u32 pir_val, irr_val; | ||
368 | int max_irr = -1; | ||
343 | 369 | ||
344 | for (i = 0; i <= 7; i++) { | 370 | for (i = vec = 0; i <= 7; i++, vec += 32) { |
345 | pir_val = READ_ONCE(pir[i]); | 371 | pir_val = READ_ONCE(pir[i]); |
372 | irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10)); | ||
346 | if (pir_val) { | 373 | if (pir_val) { |
347 | pir_val = xchg(&pir[i], 0); | 374 | irr_val |= xchg(&pir[i], 0); |
348 | *((u32 *)(regs + APIC_IRR + i * 0x10)) |= pir_val; | 375 | *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val; |
349 | } | 376 | } |
377 | if (irr_val) | ||
378 | max_irr = __fls(irr_val) + vec; | ||
350 | } | 379 | } |
380 | |||
381 | return max_irr; | ||
351 | } | 382 | } |
352 | EXPORT_SYMBOL_GPL(__kvm_apic_update_irr); | 383 | EXPORT_SYMBOL_GPL(__kvm_apic_update_irr); |
353 | 384 | ||
354 | void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) | 385 | int kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) |
355 | { | 386 | { |
356 | struct kvm_lapic *apic = vcpu->arch.apic; | 387 | struct kvm_lapic *apic = vcpu->arch.apic; |
357 | 388 | ||
358 | __kvm_apic_update_irr(pir, apic->regs); | 389 | return __kvm_apic_update_irr(pir, apic->regs); |
359 | |||
360 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
361 | } | 390 | } |
362 | EXPORT_SYMBOL_GPL(kvm_apic_update_irr); | 391 | EXPORT_SYMBOL_GPL(kvm_apic_update_irr); |
363 | 392 | ||
@@ -377,8 +406,6 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) | |||
377 | if (!apic->irr_pending) | 406 | if (!apic->irr_pending) |
378 | return -1; | 407 | return -1; |
379 | 408 | ||
380 | if (apic->vcpu->arch.apicv_active) | ||
381 | kvm_x86_ops->sync_pir_to_irr(apic->vcpu); | ||
382 | result = apic_search_irr(apic); | 409 | result = apic_search_irr(apic); |
383 | ASSERT(result == -1 || result >= 16); | 410 | ASSERT(result == -1 || result >= 16); |
384 | 411 | ||
@@ -392,9 +419,10 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) | |||
392 | vcpu = apic->vcpu; | 419 | vcpu = apic->vcpu; |
393 | 420 | ||
394 | if (unlikely(vcpu->arch.apicv_active)) { | 421 | if (unlikely(vcpu->arch.apicv_active)) { |
395 | /* try to update RVI */ | 422 | /* need to update RVI */ |
396 | apic_clear_vector(vec, apic->regs + APIC_IRR); | 423 | apic_clear_vector(vec, apic->regs + APIC_IRR); |
397 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 424 | kvm_x86_ops->hwapic_irr_update(vcpu, |
425 | apic_find_highest_irr(apic)); | ||
398 | } else { | 426 | } else { |
399 | apic->irr_pending = false; | 427 | apic->irr_pending = false; |
400 | apic_clear_vector(vec, apic->regs + APIC_IRR); | 428 | apic_clear_vector(vec, apic->regs + APIC_IRR); |
@@ -484,6 +512,7 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) | |||
484 | */ | 512 | */ |
485 | return apic_find_highest_irr(vcpu->arch.apic); | 513 | return apic_find_highest_irr(vcpu->arch.apic); |
486 | } | 514 | } |
515 | EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); | ||
487 | 516 | ||
488 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | 517 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, |
489 | int vector, int level, int trig_mode, | 518 | int vector, int level, int trig_mode, |
@@ -500,16 +529,14 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, | |||
500 | 529 | ||
501 | static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) | 530 | static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) |
502 | { | 531 | { |
503 | 532 | return kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.pv_eoi.data, &val, | |
504 | return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val, | 533 | sizeof(val)); |
505 | sizeof(val)); | ||
506 | } | 534 | } |
507 | 535 | ||
508 | static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) | 536 | static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) |
509 | { | 537 | { |
510 | 538 | return kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.pv_eoi.data, val, | |
511 | return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val, | 539 | sizeof(*val)); |
512 | sizeof(*val)); | ||
513 | } | 540 | } |
514 | 541 | ||
515 | static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) | 542 | static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) |
@@ -546,7 +573,19 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) | |||
546 | __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); | 573 | __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); |
547 | } | 574 | } |
548 | 575 | ||
549 | static void apic_update_ppr(struct kvm_lapic *apic) | 576 | static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr) |
577 | { | ||
578 | int highest_irr; | ||
579 | if (kvm_x86_ops->sync_pir_to_irr && apic->vcpu->arch.apicv_active) | ||
580 | highest_irr = kvm_x86_ops->sync_pir_to_irr(apic->vcpu); | ||
581 | else | ||
582 | highest_irr = apic_find_highest_irr(apic); | ||
583 | if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr) | ||
584 | return -1; | ||
585 | return highest_irr; | ||
586 | } | ||
587 | |||
588 | static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr) | ||
550 | { | 589 | { |
551 | u32 tpr, isrv, ppr, old_ppr; | 590 | u32 tpr, isrv, ppr, old_ppr; |
552 | int isr; | 591 | int isr; |
@@ -564,13 +603,28 @@ static void apic_update_ppr(struct kvm_lapic *apic) | |||
564 | apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x", | 603 | apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x", |
565 | apic, ppr, isr, isrv); | 604 | apic, ppr, isr, isrv); |
566 | 605 | ||
567 | if (old_ppr != ppr) { | 606 | *new_ppr = ppr; |
607 | if (old_ppr != ppr) | ||
568 | kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr); | 608 | kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr); |
569 | if (ppr < old_ppr) | 609 | |
570 | kvm_make_request(KVM_REQ_EVENT, apic->vcpu); | 610 | return ppr < old_ppr; |
571 | } | 611 | } |
612 | |||
613 | static void apic_update_ppr(struct kvm_lapic *apic) | ||
614 | { | ||
615 | u32 ppr; | ||
616 | |||
617 | if (__apic_update_ppr(apic, &ppr) && | ||
618 | apic_has_interrupt_for_ppr(apic, ppr) != -1) | ||
619 | kvm_make_request(KVM_REQ_EVENT, apic->vcpu); | ||
572 | } | 620 | } |
573 | 621 | ||
622 | void kvm_apic_update_ppr(struct kvm_vcpu *vcpu) | ||
623 | { | ||
624 | apic_update_ppr(vcpu->arch.apic); | ||
625 | } | ||
626 | EXPORT_SYMBOL_GPL(kvm_apic_update_ppr); | ||
627 | |||
574 | static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) | 628 | static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) |
575 | { | 629 | { |
576 | kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr); | 630 | kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr); |
@@ -579,10 +633,8 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) | |||
579 | 633 | ||
580 | static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda) | 634 | static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda) |
581 | { | 635 | { |
582 | if (apic_x2apic_mode(apic)) | 636 | return mda == (apic_x2apic_mode(apic) ? |
583 | return mda == X2APIC_BROADCAST; | 637 | X2APIC_BROADCAST : APIC_BROADCAST); |
584 | |||
585 | return GET_APIC_DEST_FIELD(mda) == APIC_BROADCAST; | ||
586 | } | 638 | } |
587 | 639 | ||
588 | static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda) | 640 | static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda) |
@@ -591,9 +643,18 @@ static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda) | |||
591 | return true; | 643 | return true; |
592 | 644 | ||
593 | if (apic_x2apic_mode(apic)) | 645 | if (apic_x2apic_mode(apic)) |
594 | return mda == kvm_apic_id(apic); | 646 | return mda == kvm_x2apic_id(apic); |
595 | 647 | ||
596 | return mda == SET_APIC_DEST_FIELD(kvm_apic_id(apic)); | 648 | /* |
649 | * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if | ||
650 | * it were in x2APIC mode. Hotplugged VCPUs start in xAPIC mode and | ||
651 | * this allows unique addressing of VCPUs with APIC ID over 0xff. | ||
652 | * The 0xff condition is needed because writeable xAPIC ID. | ||
653 | */ | ||
654 | if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic)) | ||
655 | return true; | ||
656 | |||
657 | return mda == kvm_xapic_id(apic); | ||
597 | } | 658 | } |
598 | 659 | ||
599 | static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) | 660 | static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) |
@@ -610,7 +671,6 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) | |||
610 | && (logical_id & mda & 0xffff) != 0; | 671 | && (logical_id & mda & 0xffff) != 0; |
611 | 672 | ||
612 | logical_id = GET_APIC_LOGICAL_ID(logical_id); | 673 | logical_id = GET_APIC_LOGICAL_ID(logical_id); |
613 | mda = GET_APIC_DEST_FIELD(mda); | ||
614 | 674 | ||
615 | switch (kvm_lapic_get_reg(apic, APIC_DFR)) { | 675 | switch (kvm_lapic_get_reg(apic, APIC_DFR)) { |
616 | case APIC_DFR_FLAT: | 676 | case APIC_DFR_FLAT: |
@@ -627,9 +687,9 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) | |||
627 | 687 | ||
628 | /* The KVM local APIC implementation has two quirks: | 688 | /* The KVM local APIC implementation has two quirks: |
629 | * | 689 | * |
630 | * - the xAPIC MDA stores the destination at bits 24-31, while this | 690 | * - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs |
631 | * is not true of struct kvm_lapic_irq's dest_id field. This is | 691 | * in xAPIC mode if the "destination & 0xff" matches its xAPIC ID. |
632 | * just a quirk in the API and is not problematic. | 692 | * KVM doesn't do that aliasing. |
633 | * | 693 | * |
634 | * - in-kernel IOAPIC messages have to be delivered directly to | 694 | * - in-kernel IOAPIC messages have to be delivered directly to |
635 | * x2APIC, because the kernel does not support interrupt remapping. | 695 | * x2APIC, because the kernel does not support interrupt remapping. |
@@ -645,13 +705,12 @@ static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id, | |||
645 | struct kvm_lapic *source, struct kvm_lapic *target) | 705 | struct kvm_lapic *source, struct kvm_lapic *target) |
646 | { | 706 | { |
647 | bool ipi = source != NULL; | 707 | bool ipi = source != NULL; |
648 | bool x2apic_mda = apic_x2apic_mode(ipi ? source : target); | ||
649 | 708 | ||
650 | if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled && | 709 | if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled && |
651 | !ipi && dest_id == APIC_BROADCAST && x2apic_mda) | 710 | !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target)) |
652 | return X2APIC_BROADCAST; | 711 | return X2APIC_BROADCAST; |
653 | 712 | ||
654 | return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id); | 713 | return dest_id; |
655 | } | 714 | } |
656 | 715 | ||
657 | bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | 716 | bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, |
@@ -1907,9 +1966,9 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
1907 | vcpu->arch.apic_arb_prio = 0; | 1966 | vcpu->arch.apic_arb_prio = 0; |
1908 | vcpu->arch.apic_attention = 0; | 1967 | vcpu->arch.apic_attention = 0; |
1909 | 1968 | ||
1910 | apic_debug("%s: vcpu=%p, id=%d, base_msr=" | 1969 | apic_debug("%s: vcpu=%p, id=0x%x, base_msr=" |
1911 | "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, | 1970 | "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, |
1912 | vcpu, kvm_apic_id(apic), | 1971 | vcpu, kvm_lapic_get_reg(apic, APIC_ID), |
1913 | vcpu->arch.apic_base, apic->base_address); | 1972 | vcpu->arch.apic_base, apic->base_address); |
1914 | } | 1973 | } |
1915 | 1974 | ||
@@ -2021,17 +2080,13 @@ nomem: | |||
2021 | int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) | 2080 | int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) |
2022 | { | 2081 | { |
2023 | struct kvm_lapic *apic = vcpu->arch.apic; | 2082 | struct kvm_lapic *apic = vcpu->arch.apic; |
2024 | int highest_irr; | 2083 | u32 ppr; |
2025 | 2084 | ||
2026 | if (!apic_enabled(apic)) | 2085 | if (!apic_enabled(apic)) |
2027 | return -1; | 2086 | return -1; |
2028 | 2087 | ||
2029 | apic_update_ppr(apic); | 2088 | __apic_update_ppr(apic, &ppr); |
2030 | highest_irr = apic_find_highest_irr(apic); | 2089 | return apic_has_interrupt_for_ppr(apic, ppr); |
2031 | if ((highest_irr == -1) || | ||
2032 | ((highest_irr & 0xF0) <= kvm_lapic_get_reg(apic, APIC_PROCPRI))) | ||
2033 | return -1; | ||
2034 | return highest_irr; | ||
2035 | } | 2090 | } |
2036 | 2091 | ||
2037 | int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) | 2092 | int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) |
@@ -2067,6 +2122,7 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) | |||
2067 | { | 2122 | { |
2068 | int vector = kvm_apic_has_interrupt(vcpu); | 2123 | int vector = kvm_apic_has_interrupt(vcpu); |
2069 | struct kvm_lapic *apic = vcpu->arch.apic; | 2124 | struct kvm_lapic *apic = vcpu->arch.apic; |
2125 | u32 ppr; | ||
2070 | 2126 | ||
2071 | if (vector == -1) | 2127 | if (vector == -1) |
2072 | return -1; | 2128 | return -1; |
@@ -2078,13 +2134,23 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) | |||
2078 | * because the process would deliver it through the IDT. | 2134 | * because the process would deliver it through the IDT. |
2079 | */ | 2135 | */ |
2080 | 2136 | ||
2081 | apic_set_isr(vector, apic); | ||
2082 | apic_update_ppr(apic); | ||
2083 | apic_clear_irr(vector, apic); | 2137 | apic_clear_irr(vector, apic); |
2084 | |||
2085 | if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) { | 2138 | if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) { |
2086 | apic_clear_isr(vector, apic); | 2139 | /* |
2140 | * For auto-EOI interrupts, there might be another pending | ||
2141 | * interrupt above PPR, so check whether to raise another | ||
2142 | * KVM_REQ_EVENT. | ||
2143 | */ | ||
2087 | apic_update_ppr(apic); | 2144 | apic_update_ppr(apic); |
2145 | } else { | ||
2146 | /* | ||
2147 | * For normal interrupts, PPR has been raised and there cannot | ||
2148 | * be a higher-priority pending interrupt---except if there was | ||
2149 | * a concurrent interrupt injection, but that would have | ||
2150 | * triggered KVM_REQ_EVENT already. | ||
2151 | */ | ||
2152 | apic_set_isr(vector, apic); | ||
2153 | __apic_update_ppr(apic, &ppr); | ||
2088 | } | 2154 | } |
2089 | 2155 | ||
2090 | return vector; | 2156 | return vector; |
@@ -2145,8 +2211,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) | |||
2145 | 1 : count_vectors(apic->regs + APIC_ISR); | 2211 | 1 : count_vectors(apic->regs + APIC_ISR); |
2146 | apic->highest_isr_cache = -1; | 2212 | apic->highest_isr_cache = -1; |
2147 | if (vcpu->arch.apicv_active) { | 2213 | if (vcpu->arch.apicv_active) { |
2148 | if (kvm_x86_ops->apicv_post_state_restore) | 2214 | kvm_x86_ops->apicv_post_state_restore(vcpu); |
2149 | kvm_x86_ops->apicv_post_state_restore(vcpu); | ||
2150 | kvm_x86_ops->hwapic_irr_update(vcpu, | 2215 | kvm_x86_ops->hwapic_irr_update(vcpu, |
2151 | apic_find_highest_irr(apic)); | 2216 | apic_find_highest_irr(apic)); |
2152 | kvm_x86_ops->hwapic_isr_update(vcpu, | 2217 | kvm_x86_ops->hwapic_isr_update(vcpu, |
@@ -2220,8 +2285,8 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) | |||
2220 | if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) | 2285 | if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) |
2221 | return; | 2286 | return; |
2222 | 2287 | ||
2223 | if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, | 2288 | if (kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.apic->vapic_cache, &data, |
2224 | sizeof(u32))) | 2289 | sizeof(u32))) |
2225 | return; | 2290 | return; |
2226 | 2291 | ||
2227 | apic_set_tpr(vcpu->arch.apic, data & 0xff); | 2292 | apic_set_tpr(vcpu->arch.apic, data & 0xff); |
@@ -2273,14 +2338,14 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) | |||
2273 | max_isr = 0; | 2338 | max_isr = 0; |
2274 | data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); | 2339 | data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); |
2275 | 2340 | ||
2276 | kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, | 2341 | kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.apic->vapic_cache, &data, |
2277 | sizeof(u32)); | 2342 | sizeof(u32)); |
2278 | } | 2343 | } |
2279 | 2344 | ||
2280 | int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) | 2345 | int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) |
2281 | { | 2346 | { |
2282 | if (vapic_addr) { | 2347 | if (vapic_addr) { |
2283 | if (kvm_gfn_to_hva_cache_init(vcpu->kvm, | 2348 | if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, |
2284 | &vcpu->arch.apic->vapic_cache, | 2349 | &vcpu->arch.apic->vapic_cache, |
2285 | vapic_addr, sizeof(u32))) | 2350 | vapic_addr, sizeof(u32))) |
2286 | return -EINVAL; | 2351 | return -EINVAL; |
@@ -2374,7 +2439,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) | |||
2374 | vcpu->arch.pv_eoi.msr_val = data; | 2439 | vcpu->arch.pv_eoi.msr_val = data; |
2375 | if (!pv_eoi_enabled(vcpu)) | 2440 | if (!pv_eoi_enabled(vcpu)) |
2376 | return 0; | 2441 | return 0; |
2377 | return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, | 2442 | return kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.pv_eoi.data, |
2378 | addr, sizeof(u8)); | 2443 | addr, sizeof(u8)); |
2379 | } | 2444 | } |
2380 | 2445 | ||
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index ff8039d61672..bcbe811f3b97 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -71,8 +71,9 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, | |||
71 | bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | 71 | bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, |
72 | int short_hand, unsigned int dest, int dest_mode); | 72 | int short_hand, unsigned int dest, int dest_mode); |
73 | 73 | ||
74 | void __kvm_apic_update_irr(u32 *pir, void *regs); | 74 | int __kvm_apic_update_irr(u32 *pir, void *regs); |
75 | void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); | 75 | int kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); |
76 | void kvm_apic_update_ppr(struct kvm_vcpu *vcpu); | ||
76 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, | 77 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, |
77 | struct dest_map *dest_map); | 78 | struct dest_map *dest_map); |
78 | int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); | 79 | int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); |
@@ -203,17 +204,6 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) | |||
203 | return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); | 204 | return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); |
204 | } | 205 | } |
205 | 206 | ||
206 | static inline u32 kvm_apic_id(struct kvm_lapic *apic) | ||
207 | { | ||
208 | /* To avoid a race between apic_base and following APIC_ID update when | ||
209 | * switching to x2apic_mode, the x2apic mode returns initial x2apic id. | ||
210 | */ | ||
211 | if (apic_x2apic_mode(apic)) | ||
212 | return apic->vcpu->vcpu_id; | ||
213 | |||
214 | return kvm_lapic_get_reg(apic, APIC_ID) >> 24; | ||
215 | } | ||
216 | |||
217 | bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); | 207 | bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); |
218 | 208 | ||
219 | void wait_lapic_expire(struct kvm_vcpu *vcpu); | 209 | void wait_lapic_expire(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7012de4a1fed..2fd7586aad4d 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -37,6 +37,8 @@ | |||
37 | #include <linux/srcu.h> | 37 | #include <linux/srcu.h> |
38 | #include <linux/slab.h> | 38 | #include <linux/slab.h> |
39 | #include <linux/uaccess.h> | 39 | #include <linux/uaccess.h> |
40 | #include <linux/hash.h> | ||
41 | #include <linux/kern_levels.h> | ||
40 | 42 | ||
41 | #include <asm/page.h> | 43 | #include <asm/page.h> |
42 | #include <asm/cmpxchg.h> | 44 | #include <asm/cmpxchg.h> |
@@ -129,6 +131,10 @@ module_param(dbg, bool, 0644); | |||
129 | #define ACC_USER_MASK PT_USER_MASK | 131 | #define ACC_USER_MASK PT_USER_MASK |
130 | #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) | 132 | #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) |
131 | 133 | ||
134 | /* The mask for the R/X bits in EPT PTEs */ | ||
135 | #define PT64_EPT_READABLE_MASK 0x1ull | ||
136 | #define PT64_EPT_EXECUTABLE_MASK 0x4ull | ||
137 | |||
132 | #include <trace/events/kvm.h> | 138 | #include <trace/events/kvm.h> |
133 | 139 | ||
134 | #define CREATE_TRACE_POINTS | 140 | #define CREATE_TRACE_POINTS |
@@ -178,15 +184,40 @@ static u64 __read_mostly shadow_dirty_mask; | |||
178 | static u64 __read_mostly shadow_mmio_mask; | 184 | static u64 __read_mostly shadow_mmio_mask; |
179 | static u64 __read_mostly shadow_present_mask; | 185 | static u64 __read_mostly shadow_present_mask; |
180 | 186 | ||
187 | /* | ||
188 | * The mask/value to distinguish a PTE that has been marked not-present for | ||
189 | * access tracking purposes. | ||
190 | * The mask would be either 0 if access tracking is disabled, or | ||
191 | * SPTE_SPECIAL_MASK|VMX_EPT_RWX_MASK if access tracking is enabled. | ||
192 | */ | ||
193 | static u64 __read_mostly shadow_acc_track_mask; | ||
194 | static const u64 shadow_acc_track_value = SPTE_SPECIAL_MASK; | ||
195 | |||
196 | /* | ||
197 | * The mask/shift to use for saving the original R/X bits when marking the PTE | ||
198 | * as not-present for access tracking purposes. We do not save the W bit as the | ||
199 | * PTEs being access tracked also need to be dirty tracked, so the W bit will be | ||
200 | * restored only when a write is attempted to the page. | ||
201 | */ | ||
202 | static const u64 shadow_acc_track_saved_bits_mask = PT64_EPT_READABLE_MASK | | ||
203 | PT64_EPT_EXECUTABLE_MASK; | ||
204 | static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIFT; | ||
205 | |||
181 | static void mmu_spte_set(u64 *sptep, u64 spte); | 206 | static void mmu_spte_set(u64 *sptep, u64 spte); |
182 | static void mmu_free_roots(struct kvm_vcpu *vcpu); | 207 | static void mmu_free_roots(struct kvm_vcpu *vcpu); |
183 | 208 | ||
184 | void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask) | 209 | void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask) |
185 | { | 210 | { |
186 | shadow_mmio_mask = mmio_mask; | 211 | shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK; |
187 | } | 212 | } |
188 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); | 213 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); |
189 | 214 | ||
215 | static inline bool is_access_track_spte(u64 spte) | ||
216 | { | ||
217 | /* Always false if shadow_acc_track_mask is zero. */ | ||
218 | return (spte & shadow_acc_track_mask) == shadow_acc_track_value; | ||
219 | } | ||
220 | |||
190 | /* | 221 | /* |
191 | * the low bit of the generation number is always presumed to be zero. | 222 | * the low bit of the generation number is always presumed to be zero. |
192 | * This disables mmio caching during memslot updates. The concept is | 223 | * This disables mmio caching during memslot updates. The concept is |
@@ -284,17 +315,35 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte) | |||
284 | } | 315 | } |
285 | 316 | ||
286 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | 317 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, |
287 | u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask) | 318 | u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask, |
319 | u64 acc_track_mask) | ||
288 | { | 320 | { |
321 | if (acc_track_mask != 0) | ||
322 | acc_track_mask |= SPTE_SPECIAL_MASK; | ||
323 | |||
289 | shadow_user_mask = user_mask; | 324 | shadow_user_mask = user_mask; |
290 | shadow_accessed_mask = accessed_mask; | 325 | shadow_accessed_mask = accessed_mask; |
291 | shadow_dirty_mask = dirty_mask; | 326 | shadow_dirty_mask = dirty_mask; |
292 | shadow_nx_mask = nx_mask; | 327 | shadow_nx_mask = nx_mask; |
293 | shadow_x_mask = x_mask; | 328 | shadow_x_mask = x_mask; |
294 | shadow_present_mask = p_mask; | 329 | shadow_present_mask = p_mask; |
330 | shadow_acc_track_mask = acc_track_mask; | ||
331 | WARN_ON(shadow_accessed_mask != 0 && shadow_acc_track_mask != 0); | ||
295 | } | 332 | } |
296 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); | 333 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); |
297 | 334 | ||
335 | void kvm_mmu_clear_all_pte_masks(void) | ||
336 | { | ||
337 | shadow_user_mask = 0; | ||
338 | shadow_accessed_mask = 0; | ||
339 | shadow_dirty_mask = 0; | ||
340 | shadow_nx_mask = 0; | ||
341 | shadow_x_mask = 0; | ||
342 | shadow_mmio_mask = 0; | ||
343 | shadow_present_mask = 0; | ||
344 | shadow_acc_track_mask = 0; | ||
345 | } | ||
346 | |||
298 | static int is_cpuid_PSE36(void) | 347 | static int is_cpuid_PSE36(void) |
299 | { | 348 | { |
300 | return 1; | 349 | return 1; |
@@ -307,7 +356,7 @@ static int is_nx(struct kvm_vcpu *vcpu) | |||
307 | 356 | ||
308 | static int is_shadow_present_pte(u64 pte) | 357 | static int is_shadow_present_pte(u64 pte) |
309 | { | 358 | { |
310 | return (pte & 0xFFFFFFFFull) && !is_mmio_spte(pte); | 359 | return (pte != 0) && !is_mmio_spte(pte); |
311 | } | 360 | } |
312 | 361 | ||
313 | static int is_large_pte(u64 pte) | 362 | static int is_large_pte(u64 pte) |
@@ -324,6 +373,11 @@ static int is_last_spte(u64 pte, int level) | |||
324 | return 0; | 373 | return 0; |
325 | } | 374 | } |
326 | 375 | ||
376 | static bool is_executable_pte(u64 spte) | ||
377 | { | ||
378 | return (spte & (shadow_x_mask | shadow_nx_mask)) == shadow_x_mask; | ||
379 | } | ||
380 | |||
327 | static kvm_pfn_t spte_to_pfn(u64 pte) | 381 | static kvm_pfn_t spte_to_pfn(u64 pte) |
328 | { | 382 | { |
329 | return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; | 383 | return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; |
@@ -473,7 +527,7 @@ retry: | |||
473 | } | 527 | } |
474 | #endif | 528 | #endif |
475 | 529 | ||
476 | static bool spte_is_locklessly_modifiable(u64 spte) | 530 | static bool spte_can_locklessly_be_made_writable(u64 spte) |
477 | { | 531 | { |
478 | return (spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)) == | 532 | return (spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)) == |
479 | (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE); | 533 | (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE); |
@@ -481,36 +535,38 @@ static bool spte_is_locklessly_modifiable(u64 spte) | |||
481 | 535 | ||
482 | static bool spte_has_volatile_bits(u64 spte) | 536 | static bool spte_has_volatile_bits(u64 spte) |
483 | { | 537 | { |
538 | if (!is_shadow_present_pte(spte)) | ||
539 | return false; | ||
540 | |||
484 | /* | 541 | /* |
485 | * Always atomically update spte if it can be updated | 542 | * Always atomically update spte if it can be updated |
486 | * out of mmu-lock, it can ensure dirty bit is not lost, | 543 | * out of mmu-lock, it can ensure dirty bit is not lost, |
487 | * also, it can help us to get a stable is_writable_pte() | 544 | * also, it can help us to get a stable is_writable_pte() |
488 | * to ensure tlb flush is not missed. | 545 | * to ensure tlb flush is not missed. |
489 | */ | 546 | */ |
490 | if (spte_is_locklessly_modifiable(spte)) | 547 | if (spte_can_locklessly_be_made_writable(spte) || |
548 | is_access_track_spte(spte)) | ||
491 | return true; | 549 | return true; |
492 | 550 | ||
493 | if (!shadow_accessed_mask) | 551 | if (shadow_accessed_mask) { |
494 | return false; | 552 | if ((spte & shadow_accessed_mask) == 0 || |
495 | 553 | (is_writable_pte(spte) && (spte & shadow_dirty_mask) == 0)) | |
496 | if (!is_shadow_present_pte(spte)) | 554 | return true; |
497 | return false; | 555 | } |
498 | |||
499 | if ((spte & shadow_accessed_mask) && | ||
500 | (!is_writable_pte(spte) || (spte & shadow_dirty_mask))) | ||
501 | return false; | ||
502 | 556 | ||
503 | return true; | 557 | return false; |
504 | } | 558 | } |
505 | 559 | ||
506 | static bool spte_is_bit_cleared(u64 old_spte, u64 new_spte, u64 bit_mask) | 560 | static bool is_accessed_spte(u64 spte) |
507 | { | 561 | { |
508 | return (old_spte & bit_mask) && !(new_spte & bit_mask); | 562 | return shadow_accessed_mask ? spte & shadow_accessed_mask |
563 | : !is_access_track_spte(spte); | ||
509 | } | 564 | } |
510 | 565 | ||
511 | static bool spte_is_bit_changed(u64 old_spte, u64 new_spte, u64 bit_mask) | 566 | static bool is_dirty_spte(u64 spte) |
512 | { | 567 | { |
513 | return (old_spte & bit_mask) != (new_spte & bit_mask); | 568 | return shadow_dirty_mask ? spte & shadow_dirty_mask |
569 | : spte & PT_WRITABLE_MASK; | ||
514 | } | 570 | } |
515 | 571 | ||
516 | /* Rules for using mmu_spte_set: | 572 | /* Rules for using mmu_spte_set: |
@@ -525,25 +581,19 @@ static void mmu_spte_set(u64 *sptep, u64 new_spte) | |||
525 | __set_spte(sptep, new_spte); | 581 | __set_spte(sptep, new_spte); |
526 | } | 582 | } |
527 | 583 | ||
528 | /* Rules for using mmu_spte_update: | 584 | /* |
529 | * Update the state bits, it means the mapped pfn is not changed. | 585 | * Update the SPTE (excluding the PFN), but do not track changes in its |
530 | * | 586 | * accessed/dirty status. |
531 | * Whenever we overwrite a writable spte with a read-only one we | ||
532 | * should flush remote TLBs. Otherwise rmap_write_protect | ||
533 | * will find a read-only spte, even though the writable spte | ||
534 | * might be cached on a CPU's TLB, the return value indicates this | ||
535 | * case. | ||
536 | */ | 587 | */ |
537 | static bool mmu_spte_update(u64 *sptep, u64 new_spte) | 588 | static u64 mmu_spte_update_no_track(u64 *sptep, u64 new_spte) |
538 | { | 589 | { |
539 | u64 old_spte = *sptep; | 590 | u64 old_spte = *sptep; |
540 | bool ret = false; | ||
541 | 591 | ||
542 | WARN_ON(!is_shadow_present_pte(new_spte)); | 592 | WARN_ON(!is_shadow_present_pte(new_spte)); |
543 | 593 | ||
544 | if (!is_shadow_present_pte(old_spte)) { | 594 | if (!is_shadow_present_pte(old_spte)) { |
545 | mmu_spte_set(sptep, new_spte); | 595 | mmu_spte_set(sptep, new_spte); |
546 | return ret; | 596 | return old_spte; |
547 | } | 597 | } |
548 | 598 | ||
549 | if (!spte_has_volatile_bits(old_spte)) | 599 | if (!spte_has_volatile_bits(old_spte)) |
@@ -551,45 +601,62 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte) | |||
551 | else | 601 | else |
552 | old_spte = __update_clear_spte_slow(sptep, new_spte); | 602 | old_spte = __update_clear_spte_slow(sptep, new_spte); |
553 | 603 | ||
604 | WARN_ON(spte_to_pfn(old_spte) != spte_to_pfn(new_spte)); | ||
605 | |||
606 | return old_spte; | ||
607 | } | ||
608 | |||
609 | /* Rules for using mmu_spte_update: | ||
610 | * Update the state bits, it means the mapped pfn is not changed. | ||
611 | * | ||
612 | * Whenever we overwrite a writable spte with a read-only one we | ||
613 | * should flush remote TLBs. Otherwise rmap_write_protect | ||
614 | * will find a read-only spte, even though the writable spte | ||
615 | * might be cached on a CPU's TLB, the return value indicates this | ||
616 | * case. | ||
617 | * | ||
618 | * Returns true if the TLB needs to be flushed | ||
619 | */ | ||
620 | static bool mmu_spte_update(u64 *sptep, u64 new_spte) | ||
621 | { | ||
622 | bool flush = false; | ||
623 | u64 old_spte = mmu_spte_update_no_track(sptep, new_spte); | ||
624 | |||
625 | if (!is_shadow_present_pte(old_spte)) | ||
626 | return false; | ||
627 | |||
554 | /* | 628 | /* |
555 | * For the spte updated out of mmu-lock is safe, since | 629 | * For the spte updated out of mmu-lock is safe, since |
556 | * we always atomically update it, see the comments in | 630 | * we always atomically update it, see the comments in |
557 | * spte_has_volatile_bits(). | 631 | * spte_has_volatile_bits(). |
558 | */ | 632 | */ |
559 | if (spte_is_locklessly_modifiable(old_spte) && | 633 | if (spte_can_locklessly_be_made_writable(old_spte) && |
560 | !is_writable_pte(new_spte)) | 634 | !is_writable_pte(new_spte)) |
561 | ret = true; | 635 | flush = true; |
562 | |||
563 | if (!shadow_accessed_mask) { | ||
564 | /* | ||
565 | * We don't set page dirty when dropping non-writable spte. | ||
566 | * So do it now if the new spte is becoming non-writable. | ||
567 | */ | ||
568 | if (ret) | ||
569 | kvm_set_pfn_dirty(spte_to_pfn(old_spte)); | ||
570 | return ret; | ||
571 | } | ||
572 | 636 | ||
573 | /* | 637 | /* |
574 | * Flush TLB when accessed/dirty bits are changed in the page tables, | 638 | * Flush TLB when accessed/dirty states are changed in the page tables, |
575 | * to guarantee consistency between TLB and page tables. | 639 | * to guarantee consistency between TLB and page tables. |
576 | */ | 640 | */ |
577 | if (spte_is_bit_changed(old_spte, new_spte, | ||
578 | shadow_accessed_mask | shadow_dirty_mask)) | ||
579 | ret = true; | ||
580 | 641 | ||
581 | if (spte_is_bit_cleared(old_spte, new_spte, shadow_accessed_mask)) | 642 | if (is_accessed_spte(old_spte) && !is_accessed_spte(new_spte)) { |
643 | flush = true; | ||
582 | kvm_set_pfn_accessed(spte_to_pfn(old_spte)); | 644 | kvm_set_pfn_accessed(spte_to_pfn(old_spte)); |
583 | if (spte_is_bit_cleared(old_spte, new_spte, shadow_dirty_mask)) | 645 | } |
646 | |||
647 | if (is_dirty_spte(old_spte) && !is_dirty_spte(new_spte)) { | ||
648 | flush = true; | ||
584 | kvm_set_pfn_dirty(spte_to_pfn(old_spte)); | 649 | kvm_set_pfn_dirty(spte_to_pfn(old_spte)); |
650 | } | ||
585 | 651 | ||
586 | return ret; | 652 | return flush; |
587 | } | 653 | } |
588 | 654 | ||
589 | /* | 655 | /* |
590 | * Rules for using mmu_spte_clear_track_bits: | 656 | * Rules for using mmu_spte_clear_track_bits: |
591 | * It sets the sptep from present to nonpresent, and track the | 657 | * It sets the sptep from present to nonpresent, and track the |
592 | * state bits, it is used to clear the last level sptep. | 658 | * state bits, it is used to clear the last level sptep. |
659 | * Returns non-zero if the PTE was previously valid. | ||
593 | */ | 660 | */ |
594 | static int mmu_spte_clear_track_bits(u64 *sptep) | 661 | static int mmu_spte_clear_track_bits(u64 *sptep) |
595 | { | 662 | { |
@@ -613,11 +680,12 @@ static int mmu_spte_clear_track_bits(u64 *sptep) | |||
613 | */ | 680 | */ |
614 | WARN_ON(!kvm_is_reserved_pfn(pfn) && !page_count(pfn_to_page(pfn))); | 681 | WARN_ON(!kvm_is_reserved_pfn(pfn) && !page_count(pfn_to_page(pfn))); |
615 | 682 | ||
616 | if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) | 683 | if (is_accessed_spte(old_spte)) |
617 | kvm_set_pfn_accessed(pfn); | 684 | kvm_set_pfn_accessed(pfn); |
618 | if (old_spte & (shadow_dirty_mask ? shadow_dirty_mask : | 685 | |
619 | PT_WRITABLE_MASK)) | 686 | if (is_dirty_spte(old_spte)) |
620 | kvm_set_pfn_dirty(pfn); | 687 | kvm_set_pfn_dirty(pfn); |
688 | |||
621 | return 1; | 689 | return 1; |
622 | } | 690 | } |
623 | 691 | ||
@@ -636,6 +704,78 @@ static u64 mmu_spte_get_lockless(u64 *sptep) | |||
636 | return __get_spte_lockless(sptep); | 704 | return __get_spte_lockless(sptep); |
637 | } | 705 | } |
638 | 706 | ||
707 | static u64 mark_spte_for_access_track(u64 spte) | ||
708 | { | ||
709 | if (shadow_accessed_mask != 0) | ||
710 | return spte & ~shadow_accessed_mask; | ||
711 | |||
712 | if (shadow_acc_track_mask == 0 || is_access_track_spte(spte)) | ||
713 | return spte; | ||
714 | |||
715 | /* | ||
716 | * Making an Access Tracking PTE will result in removal of write access | ||
717 | * from the PTE. So, verify that we will be able to restore the write | ||
718 | * access in the fast page fault path later on. | ||
719 | */ | ||
720 | WARN_ONCE((spte & PT_WRITABLE_MASK) && | ||
721 | !spte_can_locklessly_be_made_writable(spte), | ||
722 | "kvm: Writable SPTE is not locklessly dirty-trackable\n"); | ||
723 | |||
724 | WARN_ONCE(spte & (shadow_acc_track_saved_bits_mask << | ||
725 | shadow_acc_track_saved_bits_shift), | ||
726 | "kvm: Access Tracking saved bit locations are not zero\n"); | ||
727 | |||
728 | spte |= (spte & shadow_acc_track_saved_bits_mask) << | ||
729 | shadow_acc_track_saved_bits_shift; | ||
730 | spte &= ~shadow_acc_track_mask; | ||
731 | spte |= shadow_acc_track_value; | ||
732 | |||
733 | return spte; | ||
734 | } | ||
735 | |||
736 | /* Restore an acc-track PTE back to a regular PTE */ | ||
737 | static u64 restore_acc_track_spte(u64 spte) | ||
738 | { | ||
739 | u64 new_spte = spte; | ||
740 | u64 saved_bits = (spte >> shadow_acc_track_saved_bits_shift) | ||
741 | & shadow_acc_track_saved_bits_mask; | ||
742 | |||
743 | WARN_ON_ONCE(!is_access_track_spte(spte)); | ||
744 | |||
745 | new_spte &= ~shadow_acc_track_mask; | ||
746 | new_spte &= ~(shadow_acc_track_saved_bits_mask << | ||
747 | shadow_acc_track_saved_bits_shift); | ||
748 | new_spte |= saved_bits; | ||
749 | |||
750 | return new_spte; | ||
751 | } | ||
752 | |||
753 | /* Returns the Accessed status of the PTE and resets it at the same time. */ | ||
754 | static bool mmu_spte_age(u64 *sptep) | ||
755 | { | ||
756 | u64 spte = mmu_spte_get_lockless(sptep); | ||
757 | |||
758 | if (!is_accessed_spte(spte)) | ||
759 | return false; | ||
760 | |||
761 | if (shadow_accessed_mask) { | ||
762 | clear_bit((ffs(shadow_accessed_mask) - 1), | ||
763 | (unsigned long *)sptep); | ||
764 | } else { | ||
765 | /* | ||
766 | * Capture the dirty status of the page, so that it doesn't get | ||
767 | * lost when the SPTE is marked for access tracking. | ||
768 | */ | ||
769 | if (is_writable_pte(spte)) | ||
770 | kvm_set_pfn_dirty(spte_to_pfn(spte)); | ||
771 | |||
772 | spte = mark_spte_for_access_track(spte); | ||
773 | mmu_spte_update_no_track(sptep, spte); | ||
774 | } | ||
775 | |||
776 | return true; | ||
777 | } | ||
778 | |||
639 | static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu) | 779 | static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu) |
640 | { | 780 | { |
641 | /* | 781 | /* |
@@ -1212,7 +1352,7 @@ static bool spte_write_protect(u64 *sptep, bool pt_protect) | |||
1212 | u64 spte = *sptep; | 1352 | u64 spte = *sptep; |
1213 | 1353 | ||
1214 | if (!is_writable_pte(spte) && | 1354 | if (!is_writable_pte(spte) && |
1215 | !(pt_protect && spte_is_locklessly_modifiable(spte))) | 1355 | !(pt_protect && spte_can_locklessly_be_made_writable(spte))) |
1216 | return false; | 1356 | return false; |
1217 | 1357 | ||
1218 | rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep); | 1358 | rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep); |
@@ -1420,7 +1560,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, | |||
1420 | restart: | 1560 | restart: |
1421 | for_each_rmap_spte(rmap_head, &iter, sptep) { | 1561 | for_each_rmap_spte(rmap_head, &iter, sptep) { |
1422 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n", | 1562 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n", |
1423 | sptep, *sptep, gfn, level); | 1563 | sptep, *sptep, gfn, level); |
1424 | 1564 | ||
1425 | need_flush = 1; | 1565 | need_flush = 1; |
1426 | 1566 | ||
@@ -1433,7 +1573,8 @@ restart: | |||
1433 | 1573 | ||
1434 | new_spte &= ~PT_WRITABLE_MASK; | 1574 | new_spte &= ~PT_WRITABLE_MASK; |
1435 | new_spte &= ~SPTE_HOST_WRITEABLE; | 1575 | new_spte &= ~SPTE_HOST_WRITEABLE; |
1436 | new_spte &= ~shadow_accessed_mask; | 1576 | |
1577 | new_spte = mark_spte_for_access_track(new_spte); | ||
1437 | 1578 | ||
1438 | mmu_spte_clear_track_bits(sptep); | 1579 | mmu_spte_clear_track_bits(sptep); |
1439 | mmu_spte_set(sptep, new_spte); | 1580 | mmu_spte_set(sptep, new_spte); |
@@ -1595,15 +1736,8 @@ static int kvm_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, | |||
1595 | struct rmap_iterator uninitialized_var(iter); | 1736 | struct rmap_iterator uninitialized_var(iter); |
1596 | int young = 0; | 1737 | int young = 0; |
1597 | 1738 | ||
1598 | BUG_ON(!shadow_accessed_mask); | 1739 | for_each_rmap_spte(rmap_head, &iter, sptep) |
1599 | 1740 | young |= mmu_spte_age(sptep); | |
1600 | for_each_rmap_spte(rmap_head, &iter, sptep) { | ||
1601 | if (*sptep & shadow_accessed_mask) { | ||
1602 | young = 1; | ||
1603 | clear_bit((ffs(shadow_accessed_mask) - 1), | ||
1604 | (unsigned long *)sptep); | ||
1605 | } | ||
1606 | } | ||
1607 | 1741 | ||
1608 | trace_kvm_age_page(gfn, level, slot, young); | 1742 | trace_kvm_age_page(gfn, level, slot, young); |
1609 | return young; | 1743 | return young; |
@@ -1615,24 +1749,20 @@ static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, | |||
1615 | { | 1749 | { |
1616 | u64 *sptep; | 1750 | u64 *sptep; |
1617 | struct rmap_iterator iter; | 1751 | struct rmap_iterator iter; |
1618 | int young = 0; | ||
1619 | 1752 | ||
1620 | /* | 1753 | /* |
1621 | * If there's no access bit in the secondary pte set by the | 1754 | * If there's no access bit in the secondary pte set by the hardware and |
1622 | * hardware it's up to gup-fast/gup to set the access bit in | 1755 | * fast access tracking is also not enabled, it's up to gup-fast/gup to |
1623 | * the primary pte or in the page structure. | 1756 | * set the access bit in the primary pte or in the page structure. |
1624 | */ | 1757 | */ |
1625 | if (!shadow_accessed_mask) | 1758 | if (!shadow_accessed_mask && !shadow_acc_track_mask) |
1626 | goto out; | 1759 | goto out; |
1627 | 1760 | ||
1628 | for_each_rmap_spte(rmap_head, &iter, sptep) { | 1761 | for_each_rmap_spte(rmap_head, &iter, sptep) |
1629 | if (*sptep & shadow_accessed_mask) { | 1762 | if (is_accessed_spte(*sptep)) |
1630 | young = 1; | 1763 | return 1; |
1631 | break; | ||
1632 | } | ||
1633 | } | ||
1634 | out: | 1764 | out: |
1635 | return young; | 1765 | return 0; |
1636 | } | 1766 | } |
1637 | 1767 | ||
1638 | #define RMAP_RECYCLE_THRESHOLD 1000 | 1768 | #define RMAP_RECYCLE_THRESHOLD 1000 |
@@ -1660,7 +1790,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) | |||
1660 | * This has some overhead, but not as much as the cost of swapping | 1790 | * This has some overhead, but not as much as the cost of swapping |
1661 | * out actively used pages or breaking up actively used hugepages. | 1791 | * out actively used pages or breaking up actively used hugepages. |
1662 | */ | 1792 | */ |
1663 | if (!shadow_accessed_mask) | 1793 | if (!shadow_accessed_mask && !shadow_acc_track_mask) |
1664 | return kvm_handle_hva_range(kvm, start, end, 0, | 1794 | return kvm_handle_hva_range(kvm, start, end, 0, |
1665 | kvm_unmap_rmapp); | 1795 | kvm_unmap_rmapp); |
1666 | 1796 | ||
@@ -1713,7 +1843,7 @@ static void kvm_mmu_free_page(struct kvm_mmu_page *sp) | |||
1713 | 1843 | ||
1714 | static unsigned kvm_page_table_hashfn(gfn_t gfn) | 1844 | static unsigned kvm_page_table_hashfn(gfn_t gfn) |
1715 | { | 1845 | { |
1716 | return gfn & ((1 << KVM_MMU_HASH_SHIFT) - 1); | 1846 | return hash_64(gfn, KVM_MMU_HASH_SHIFT); |
1717 | } | 1847 | } |
1718 | 1848 | ||
1719 | static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu, | 1849 | static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu, |
@@ -1904,17 +2034,17 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, | |||
1904 | * since it has been deleted from active_mmu_pages but still can be found | 2034 | * since it has been deleted from active_mmu_pages but still can be found |
1905 | * at hast list. | 2035 | * at hast list. |
1906 | * | 2036 | * |
1907 | * for_each_gfn_valid_sp() has skipped that kind of pages. | 2037 | * for_each_valid_sp() has skipped that kind of pages. |
1908 | */ | 2038 | */ |
1909 | #define for_each_gfn_valid_sp(_kvm, _sp, _gfn) \ | 2039 | #define for_each_valid_sp(_kvm, _sp, _gfn) \ |
1910 | hlist_for_each_entry(_sp, \ | 2040 | hlist_for_each_entry(_sp, \ |
1911 | &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ | 2041 | &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ |
1912 | if ((_sp)->gfn != (_gfn) || is_obsolete_sp((_kvm), (_sp)) \ | 2042 | if (is_obsolete_sp((_kvm), (_sp)) || (_sp)->role.invalid) { \ |
1913 | || (_sp)->role.invalid) {} else | 2043 | } else |
1914 | 2044 | ||
1915 | #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \ | 2045 | #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \ |
1916 | for_each_gfn_valid_sp(_kvm, _sp, _gfn) \ | 2046 | for_each_valid_sp(_kvm, _sp, _gfn) \ |
1917 | if ((_sp)->role.direct) {} else | 2047 | if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else |
1918 | 2048 | ||
1919 | /* @sp->gfn should be write-protected at the call site */ | 2049 | /* @sp->gfn should be write-protected at the call site */ |
1920 | static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 2050 | static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
@@ -2116,6 +2246,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
2116 | struct kvm_mmu_page *sp; | 2246 | struct kvm_mmu_page *sp; |
2117 | bool need_sync = false; | 2247 | bool need_sync = false; |
2118 | bool flush = false; | 2248 | bool flush = false; |
2249 | int collisions = 0; | ||
2119 | LIST_HEAD(invalid_list); | 2250 | LIST_HEAD(invalid_list); |
2120 | 2251 | ||
2121 | role = vcpu->arch.mmu.base_role; | 2252 | role = vcpu->arch.mmu.base_role; |
@@ -2130,7 +2261,12 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
2130 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; | 2261 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; |
2131 | role.quadrant = quadrant; | 2262 | role.quadrant = quadrant; |
2132 | } | 2263 | } |
2133 | for_each_gfn_valid_sp(vcpu->kvm, sp, gfn) { | 2264 | for_each_valid_sp(vcpu->kvm, sp, gfn) { |
2265 | if (sp->gfn != gfn) { | ||
2266 | collisions++; | ||
2267 | continue; | ||
2268 | } | ||
2269 | |||
2134 | if (!need_sync && sp->unsync) | 2270 | if (!need_sync && sp->unsync) |
2135 | need_sync = true; | 2271 | need_sync = true; |
2136 | 2272 | ||
@@ -2153,7 +2289,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
2153 | 2289 | ||
2154 | __clear_sp_write_flooding_count(sp); | 2290 | __clear_sp_write_flooding_count(sp); |
2155 | trace_kvm_mmu_get_page(sp, false); | 2291 | trace_kvm_mmu_get_page(sp, false); |
2156 | return sp; | 2292 | goto out; |
2157 | } | 2293 | } |
2158 | 2294 | ||
2159 | ++vcpu->kvm->stat.mmu_cache_miss; | 2295 | ++vcpu->kvm->stat.mmu_cache_miss; |
@@ -2183,6 +2319,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
2183 | trace_kvm_mmu_get_page(sp, true); | 2319 | trace_kvm_mmu_get_page(sp, true); |
2184 | 2320 | ||
2185 | kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush); | 2321 | kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush); |
2322 | out: | ||
2323 | if (collisions > vcpu->kvm->stat.max_mmu_page_hash_collisions) | ||
2324 | vcpu->kvm->stat.max_mmu_page_hash_collisions = collisions; | ||
2186 | return sp; | 2325 | return sp; |
2187 | } | 2326 | } |
2188 | 2327 | ||
@@ -2583,6 +2722,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2583 | spte |= shadow_dirty_mask; | 2722 | spte |= shadow_dirty_mask; |
2584 | } | 2723 | } |
2585 | 2724 | ||
2725 | if (speculative) | ||
2726 | spte = mark_spte_for_access_track(spte); | ||
2727 | |||
2586 | set_pte: | 2728 | set_pte: |
2587 | if (mmu_spte_update(sptep, spte)) | 2729 | if (mmu_spte_update(sptep, spte)) |
2588 | kvm_flush_remote_tlbs(vcpu->kvm); | 2730 | kvm_flush_remote_tlbs(vcpu->kvm); |
@@ -2636,7 +2778,7 @@ static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, | |||
2636 | pgprintk("%s: setting spte %llx\n", __func__, *sptep); | 2778 | pgprintk("%s: setting spte %llx\n", __func__, *sptep); |
2637 | pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n", | 2779 | pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n", |
2638 | is_large_pte(*sptep)? "2MB" : "4kB", | 2780 | is_large_pte(*sptep)? "2MB" : "4kB", |
2639 | *sptep & PT_PRESENT_MASK ?"RW":"R", gfn, | 2781 | *sptep & PT_WRITABLE_MASK ? "RW" : "R", gfn, |
2640 | *sptep, sptep); | 2782 | *sptep, sptep); |
2641 | if (!was_rmapped && is_large_pte(*sptep)) | 2783 | if (!was_rmapped && is_large_pte(*sptep)) |
2642 | ++vcpu->kvm->stat.lpages; | 2784 | ++vcpu->kvm->stat.lpages; |
@@ -2869,33 +3011,43 @@ static bool page_fault_can_be_fast(u32 error_code) | |||
2869 | if (unlikely(error_code & PFERR_RSVD_MASK)) | 3011 | if (unlikely(error_code & PFERR_RSVD_MASK)) |
2870 | return false; | 3012 | return false; |
2871 | 3013 | ||
3014 | /* See if the page fault is due to an NX violation */ | ||
3015 | if (unlikely(((error_code & (PFERR_FETCH_MASK | PFERR_PRESENT_MASK)) | ||
3016 | == (PFERR_FETCH_MASK | PFERR_PRESENT_MASK)))) | ||
3017 | return false; | ||
3018 | |||
2872 | /* | 3019 | /* |
2873 | * #PF can be fast only if the shadow page table is present and it | 3020 | * #PF can be fast if: |
2874 | * is caused by write-protect, that means we just need change the | 3021 | * 1. The shadow page table entry is not present, which could mean that |
2875 | * W bit of the spte which can be done out of mmu-lock. | 3022 | * the fault is potentially caused by access tracking (if enabled). |
3023 | * 2. The shadow page table entry is present and the fault | ||
3024 | * is caused by write-protect, that means we just need change the W | ||
3025 | * bit of the spte which can be done out of mmu-lock. | ||
3026 | * | ||
3027 | * However, if access tracking is disabled we know that a non-present | ||
3028 | * page must be a genuine page fault where we have to create a new SPTE. | ||
3029 | * So, if access tracking is disabled, we return true only for write | ||
3030 | * accesses to a present page. | ||
2876 | */ | 3031 | */ |
2877 | if (!(error_code & PFERR_PRESENT_MASK) || | ||
2878 | !(error_code & PFERR_WRITE_MASK)) | ||
2879 | return false; | ||
2880 | 3032 | ||
2881 | return true; | 3033 | return shadow_acc_track_mask != 0 || |
3034 | ((error_code & (PFERR_WRITE_MASK | PFERR_PRESENT_MASK)) | ||
3035 | == (PFERR_WRITE_MASK | PFERR_PRESENT_MASK)); | ||
2882 | } | 3036 | } |
2883 | 3037 | ||
3038 | /* | ||
3039 | * Returns true if the SPTE was fixed successfully. Otherwise, | ||
3040 | * someone else modified the SPTE from its original value. | ||
3041 | */ | ||
2884 | static bool | 3042 | static bool |
2885 | fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 3043 | fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
2886 | u64 *sptep, u64 spte) | 3044 | u64 *sptep, u64 old_spte, u64 new_spte) |
2887 | { | 3045 | { |
2888 | gfn_t gfn; | 3046 | gfn_t gfn; |
2889 | 3047 | ||
2890 | WARN_ON(!sp->role.direct); | 3048 | WARN_ON(!sp->role.direct); |
2891 | 3049 | ||
2892 | /* | 3050 | /* |
2893 | * The gfn of direct spte is stable since it is calculated | ||
2894 | * by sp->gfn. | ||
2895 | */ | ||
2896 | gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt); | ||
2897 | |||
2898 | /* | ||
2899 | * Theoretically we could also set dirty bit (and flush TLB) here in | 3051 | * Theoretically we could also set dirty bit (and flush TLB) here in |
2900 | * order to eliminate unnecessary PML logging. See comments in | 3052 | * order to eliminate unnecessary PML logging. See comments in |
2901 | * set_spte. But fast_page_fault is very unlikely to happen with PML | 3053 | * set_spte. But fast_page_fault is very unlikely to happen with PML |
@@ -2907,12 +3059,33 @@ fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
2907 | * | 3059 | * |
2908 | * Compare with set_spte where instead shadow_dirty_mask is set. | 3060 | * Compare with set_spte where instead shadow_dirty_mask is set. |
2909 | */ | 3061 | */ |
2910 | if (cmpxchg64(sptep, spte, spte | PT_WRITABLE_MASK) == spte) | 3062 | if (cmpxchg64(sptep, old_spte, new_spte) != old_spte) |
3063 | return false; | ||
3064 | |||
3065 | if (is_writable_pte(new_spte) && !is_writable_pte(old_spte)) { | ||
3066 | /* | ||
3067 | * The gfn of direct spte is stable since it is | ||
3068 | * calculated by sp->gfn. | ||
3069 | */ | ||
3070 | gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt); | ||
2911 | kvm_vcpu_mark_page_dirty(vcpu, gfn); | 3071 | kvm_vcpu_mark_page_dirty(vcpu, gfn); |
3072 | } | ||
2912 | 3073 | ||
2913 | return true; | 3074 | return true; |
2914 | } | 3075 | } |
2915 | 3076 | ||
3077 | static bool is_access_allowed(u32 fault_err_code, u64 spte) | ||
3078 | { | ||
3079 | if (fault_err_code & PFERR_FETCH_MASK) | ||
3080 | return is_executable_pte(spte); | ||
3081 | |||
3082 | if (fault_err_code & PFERR_WRITE_MASK) | ||
3083 | return is_writable_pte(spte); | ||
3084 | |||
3085 | /* Fault was on Read access */ | ||
3086 | return spte & PT_PRESENT_MASK; | ||
3087 | } | ||
3088 | |||
2916 | /* | 3089 | /* |
2917 | * Return value: | 3090 | * Return value: |
2918 | * - true: let the vcpu to access on the same address again. | 3091 | * - true: let the vcpu to access on the same address again. |
@@ -2923,8 +3096,9 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, | |||
2923 | { | 3096 | { |
2924 | struct kvm_shadow_walk_iterator iterator; | 3097 | struct kvm_shadow_walk_iterator iterator; |
2925 | struct kvm_mmu_page *sp; | 3098 | struct kvm_mmu_page *sp; |
2926 | bool ret = false; | 3099 | bool fault_handled = false; |
2927 | u64 spte = 0ull; | 3100 | u64 spte = 0ull; |
3101 | uint retry_count = 0; | ||
2928 | 3102 | ||
2929 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 3103 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
2930 | return false; | 3104 | return false; |
@@ -2933,66 +3107,93 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, | |||
2933 | return false; | 3107 | return false; |
2934 | 3108 | ||
2935 | walk_shadow_page_lockless_begin(vcpu); | 3109 | walk_shadow_page_lockless_begin(vcpu); |
2936 | for_each_shadow_entry_lockless(vcpu, gva, iterator, spte) | 3110 | |
2937 | if (!is_shadow_present_pte(spte) || iterator.level < level) | 3111 | do { |
3112 | u64 new_spte; | ||
3113 | |||
3114 | for_each_shadow_entry_lockless(vcpu, gva, iterator, spte) | ||
3115 | if (!is_shadow_present_pte(spte) || | ||
3116 | iterator.level < level) | ||
3117 | break; | ||
3118 | |||
3119 | sp = page_header(__pa(iterator.sptep)); | ||
3120 | if (!is_last_spte(spte, sp->role.level)) | ||
2938 | break; | 3121 | break; |
2939 | 3122 | ||
2940 | /* | 3123 | /* |
2941 | * If the mapping has been changed, let the vcpu fault on the | 3124 | * Check whether the memory access that caused the fault would |
2942 | * same address again. | 3125 | * still cause it if it were to be performed right now. If not, |
2943 | */ | 3126 | * then this is a spurious fault caused by TLB lazily flushed, |
2944 | if (!is_shadow_present_pte(spte)) { | 3127 | * or some other CPU has already fixed the PTE after the |
2945 | ret = true; | 3128 | * current CPU took the fault. |
2946 | goto exit; | 3129 | * |
2947 | } | 3130 | * Need not check the access of upper level table entries since |
3131 | * they are always ACC_ALL. | ||
3132 | */ | ||
3133 | if (is_access_allowed(error_code, spte)) { | ||
3134 | fault_handled = true; | ||
3135 | break; | ||
3136 | } | ||
2948 | 3137 | ||
2949 | sp = page_header(__pa(iterator.sptep)); | 3138 | new_spte = spte; |
2950 | if (!is_last_spte(spte, sp->role.level)) | ||
2951 | goto exit; | ||
2952 | 3139 | ||
2953 | /* | 3140 | if (is_access_track_spte(spte)) |
2954 | * Check if it is a spurious fault caused by TLB lazily flushed. | 3141 | new_spte = restore_acc_track_spte(new_spte); |
2955 | * | ||
2956 | * Need not check the access of upper level table entries since | ||
2957 | * they are always ACC_ALL. | ||
2958 | */ | ||
2959 | if (is_writable_pte(spte)) { | ||
2960 | ret = true; | ||
2961 | goto exit; | ||
2962 | } | ||
2963 | 3142 | ||
2964 | /* | 3143 | /* |
2965 | * Currently, to simplify the code, only the spte write-protected | 3144 | * Currently, to simplify the code, write-protection can |
2966 | * by dirty-log can be fast fixed. | 3145 | * be removed in the fast path only if the SPTE was |
2967 | */ | 3146 | * write-protected for dirty-logging or access tracking. |
2968 | if (!spte_is_locklessly_modifiable(spte)) | 3147 | */ |
2969 | goto exit; | 3148 | if ((error_code & PFERR_WRITE_MASK) && |
3149 | spte_can_locklessly_be_made_writable(spte)) | ||
3150 | { | ||
3151 | new_spte |= PT_WRITABLE_MASK; | ||
2970 | 3152 | ||
2971 | /* | 3153 | /* |
2972 | * Do not fix write-permission on the large spte since we only dirty | 3154 | * Do not fix write-permission on the large spte. Since |
2973 | * the first page into the dirty-bitmap in fast_pf_fix_direct_spte() | 3155 | * we only dirty the first page into the dirty-bitmap in |
2974 | * that means other pages are missed if its slot is dirty-logged. | 3156 | * fast_pf_fix_direct_spte(), other pages are missed |
2975 | * | 3157 | * if its slot has dirty logging enabled. |
2976 | * Instead, we let the slow page fault path create a normal spte to | 3158 | * |
2977 | * fix the access. | 3159 | * Instead, we let the slow page fault path create a |
2978 | * | 3160 | * normal spte to fix the access. |
2979 | * See the comments in kvm_arch_commit_memory_region(). | 3161 | * |
2980 | */ | 3162 | * See the comments in kvm_arch_commit_memory_region(). |
2981 | if (sp->role.level > PT_PAGE_TABLE_LEVEL) | 3163 | */ |
2982 | goto exit; | 3164 | if (sp->role.level > PT_PAGE_TABLE_LEVEL) |
3165 | break; | ||
3166 | } | ||
3167 | |||
3168 | /* Verify that the fault can be handled in the fast path */ | ||
3169 | if (new_spte == spte || | ||
3170 | !is_access_allowed(error_code, new_spte)) | ||
3171 | break; | ||
3172 | |||
3173 | /* | ||
3174 | * Currently, fast page fault only works for direct mapping | ||
3175 | * since the gfn is not stable for indirect shadow page. See | ||
3176 | * Documentation/virtual/kvm/locking.txt to get more detail. | ||
3177 | */ | ||
3178 | fault_handled = fast_pf_fix_direct_spte(vcpu, sp, | ||
3179 | iterator.sptep, spte, | ||
3180 | new_spte); | ||
3181 | if (fault_handled) | ||
3182 | break; | ||
3183 | |||
3184 | if (++retry_count > 4) { | ||
3185 | printk_once(KERN_WARNING | ||
3186 | "kvm: Fast #PF retrying more than 4 times.\n"); | ||
3187 | break; | ||
3188 | } | ||
3189 | |||
3190 | } while (true); | ||
2983 | 3191 | ||
2984 | /* | ||
2985 | * Currently, fast page fault only works for direct mapping since | ||
2986 | * the gfn is not stable for indirect shadow page. | ||
2987 | * See Documentation/virtual/kvm/locking.txt to get more detail. | ||
2988 | */ | ||
2989 | ret = fast_pf_fix_direct_spte(vcpu, sp, iterator.sptep, spte); | ||
2990 | exit: | ||
2991 | trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep, | 3192 | trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep, |
2992 | spte, ret); | 3193 | spte, fault_handled); |
2993 | walk_shadow_page_lockless_end(vcpu); | 3194 | walk_shadow_page_lockless_end(vcpu); |
2994 | 3195 | ||
2995 | return ret; | 3196 | return fault_handled; |
2996 | } | 3197 | } |
2997 | 3198 | ||
2998 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, | 3199 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, |
@@ -5063,6 +5264,8 @@ static void mmu_destroy_caches(void) | |||
5063 | 5264 | ||
5064 | int kvm_mmu_module_init(void) | 5265 | int kvm_mmu_module_init(void) |
5065 | { | 5266 | { |
5267 | kvm_mmu_clear_all_pte_masks(); | ||
5268 | |||
5066 | pte_list_desc_cache = kmem_cache_create("pte_list_desc", | 5269 | pte_list_desc_cache = kmem_cache_create("pte_list_desc", |
5067 | sizeof(struct pte_list_desc), | 5270 | sizeof(struct pte_list_desc), |
5068 | 0, 0, NULL); | 5271 | 0, 0, NULL); |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 08a4d3ab3455..d1efe2c62b3f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -971,8 +971,8 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) | |||
971 | * a particular vCPU. | 971 | * a particular vCPU. |
972 | */ | 972 | */ |
973 | #define SVM_VM_DATA_HASH_BITS 8 | 973 | #define SVM_VM_DATA_HASH_BITS 8 |
974 | DECLARE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS); | 974 | static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS); |
975 | static spinlock_t svm_vm_data_hash_lock; | 975 | static DEFINE_SPINLOCK(svm_vm_data_hash_lock); |
976 | 976 | ||
977 | /* Note: | 977 | /* Note: |
978 | * This function is called from IOMMU driver to notify | 978 | * This function is called from IOMMU driver to notify |
@@ -1077,8 +1077,6 @@ static __init int svm_hardware_setup(void) | |||
1077 | } else { | 1077 | } else { |
1078 | pr_info("AVIC enabled\n"); | 1078 | pr_info("AVIC enabled\n"); |
1079 | 1079 | ||
1080 | hash_init(svm_vm_data_hash); | ||
1081 | spin_lock_init(&svm_vm_data_hash_lock); | ||
1082 | amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier); | 1080 | amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier); |
1083 | } | 1081 | } |
1084 | } | 1082 | } |
@@ -1159,7 +1157,6 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
1159 | struct vmcb_control_area *control = &svm->vmcb->control; | 1157 | struct vmcb_control_area *control = &svm->vmcb->control; |
1160 | struct vmcb_save_area *save = &svm->vmcb->save; | 1158 | struct vmcb_save_area *save = &svm->vmcb->save; |
1161 | 1159 | ||
1162 | svm->vcpu.fpu_active = 1; | ||
1163 | svm->vcpu.arch.hflags = 0; | 1160 | svm->vcpu.arch.hflags = 0; |
1164 | 1161 | ||
1165 | set_cr_intercept(svm, INTERCEPT_CR0_READ); | 1162 | set_cr_intercept(svm, INTERCEPT_CR0_READ); |
@@ -1901,15 +1898,12 @@ static void update_cr0_intercept(struct vcpu_svm *svm) | |||
1901 | ulong gcr0 = svm->vcpu.arch.cr0; | 1898 | ulong gcr0 = svm->vcpu.arch.cr0; |
1902 | u64 *hcr0 = &svm->vmcb->save.cr0; | 1899 | u64 *hcr0 = &svm->vmcb->save.cr0; |
1903 | 1900 | ||
1904 | if (!svm->vcpu.fpu_active) | 1901 | *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) |
1905 | *hcr0 |= SVM_CR0_SELECTIVE_MASK; | 1902 | | (gcr0 & SVM_CR0_SELECTIVE_MASK); |
1906 | else | ||
1907 | *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) | ||
1908 | | (gcr0 & SVM_CR0_SELECTIVE_MASK); | ||
1909 | 1903 | ||
1910 | mark_dirty(svm->vmcb, VMCB_CR); | 1904 | mark_dirty(svm->vmcb, VMCB_CR); |
1911 | 1905 | ||
1912 | if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { | 1906 | if (gcr0 == *hcr0) { |
1913 | clr_cr_intercept(svm, INTERCEPT_CR0_READ); | 1907 | clr_cr_intercept(svm, INTERCEPT_CR0_READ); |
1914 | clr_cr_intercept(svm, INTERCEPT_CR0_WRITE); | 1908 | clr_cr_intercept(svm, INTERCEPT_CR0_WRITE); |
1915 | } else { | 1909 | } else { |
@@ -1940,8 +1934,6 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1940 | if (!npt_enabled) | 1934 | if (!npt_enabled) |
1941 | cr0 |= X86_CR0_PG | X86_CR0_WP; | 1935 | cr0 |= X86_CR0_PG | X86_CR0_WP; |
1942 | 1936 | ||
1943 | if (!vcpu->fpu_active) | ||
1944 | cr0 |= X86_CR0_TS; | ||
1945 | /* | 1937 | /* |
1946 | * re-enable caching here because the QEMU bios | 1938 | * re-enable caching here because the QEMU bios |
1947 | * does not do it - this results in some delay at | 1939 | * does not do it - this results in some delay at |
@@ -2160,22 +2152,6 @@ static int ac_interception(struct vcpu_svm *svm) | |||
2160 | return 1; | 2152 | return 1; |
2161 | } | 2153 | } |
2162 | 2154 | ||
2163 | static void svm_fpu_activate(struct kvm_vcpu *vcpu) | ||
2164 | { | ||
2165 | struct vcpu_svm *svm = to_svm(vcpu); | ||
2166 | |||
2167 | clr_exception_intercept(svm, NM_VECTOR); | ||
2168 | |||
2169 | svm->vcpu.fpu_active = 1; | ||
2170 | update_cr0_intercept(svm); | ||
2171 | } | ||
2172 | |||
2173 | static int nm_interception(struct vcpu_svm *svm) | ||
2174 | { | ||
2175 | svm_fpu_activate(&svm->vcpu); | ||
2176 | return 1; | ||
2177 | } | ||
2178 | |||
2179 | static bool is_erratum_383(void) | 2155 | static bool is_erratum_383(void) |
2180 | { | 2156 | { |
2181 | int err, i; | 2157 | int err, i; |
@@ -2573,9 +2549,6 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) | |||
2573 | if (!npt_enabled && svm->apf_reason == 0) | 2549 | if (!npt_enabled && svm->apf_reason == 0) |
2574 | return NESTED_EXIT_HOST; | 2550 | return NESTED_EXIT_HOST; |
2575 | break; | 2551 | break; |
2576 | case SVM_EXIT_EXCP_BASE + NM_VECTOR: | ||
2577 | nm_interception(svm); | ||
2578 | break; | ||
2579 | default: | 2552 | default: |
2580 | break; | 2553 | break; |
2581 | } | 2554 | } |
@@ -4020,7 +3993,6 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
4020 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, | 3993 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, |
4021 | [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, | 3994 | [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, |
4022 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, | 3995 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, |
4023 | [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, | ||
4024 | [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, | 3996 | [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, |
4025 | [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception, | 3997 | [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception, |
4026 | [SVM_EXIT_INTR] = intr_interception, | 3998 | [SVM_EXIT_INTR] = intr_interception, |
@@ -4182,6 +4154,8 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
4182 | 4154 | ||
4183 | trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); | 4155 | trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); |
4184 | 4156 | ||
4157 | vcpu->arch.gpa_available = (exit_code == SVM_EXIT_NPF); | ||
4158 | |||
4185 | if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) | 4159 | if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) |
4186 | vcpu->arch.cr0 = svm->vmcb->save.cr0; | 4160 | vcpu->arch.cr0 = svm->vmcb->save.cr0; |
4187 | if (npt_enabled) | 4161 | if (npt_enabled) |
@@ -4357,11 +4331,6 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | |||
4357 | return; | 4331 | return; |
4358 | } | 4332 | } |
4359 | 4333 | ||
4360 | static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) | ||
4361 | { | ||
4362 | return; | ||
4363 | } | ||
4364 | |||
4365 | static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) | 4334 | static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) |
4366 | { | 4335 | { |
4367 | kvm_lapic_set_irr(vec, vcpu->arch.apic); | 4336 | kvm_lapic_set_irr(vec, vcpu->arch.apic); |
@@ -5077,14 +5046,6 @@ static bool svm_has_wbinvd_exit(void) | |||
5077 | return true; | 5046 | return true; |
5078 | } | 5047 | } |
5079 | 5048 | ||
5080 | static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) | ||
5081 | { | ||
5082 | struct vcpu_svm *svm = to_svm(vcpu); | ||
5083 | |||
5084 | set_exception_intercept(svm, NM_VECTOR); | ||
5085 | update_cr0_intercept(svm); | ||
5086 | } | ||
5087 | |||
5088 | #define PRE_EX(exit) { .exit_code = (exit), \ | 5049 | #define PRE_EX(exit) { .exit_code = (exit), \ |
5089 | .stage = X86_ICPT_PRE_EXCEPT, } | 5050 | .stage = X86_ICPT_PRE_EXCEPT, } |
5090 | #define POST_EX(exit) { .exit_code = (exit), \ | 5051 | #define POST_EX(exit) { .exit_code = (exit), \ |
@@ -5345,9 +5306,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { | |||
5345 | 5306 | ||
5346 | .get_pkru = svm_get_pkru, | 5307 | .get_pkru = svm_get_pkru, |
5347 | 5308 | ||
5348 | .fpu_activate = svm_fpu_activate, | ||
5349 | .fpu_deactivate = svm_fpu_deactivate, | ||
5350 | |||
5351 | .tlb_flush = svm_flush_tlb, | 5309 | .tlb_flush = svm_flush_tlb, |
5352 | 5310 | ||
5353 | .run = svm_vcpu_run, | 5311 | .run = svm_vcpu_run, |
@@ -5371,7 +5329,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { | |||
5371 | .get_enable_apicv = svm_get_enable_apicv, | 5329 | .get_enable_apicv = svm_get_enable_apicv, |
5372 | .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl, | 5330 | .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl, |
5373 | .load_eoi_exitmap = svm_load_eoi_exitmap, | 5331 | .load_eoi_exitmap = svm_load_eoi_exitmap, |
5374 | .sync_pir_to_irr = svm_sync_pir_to_irr, | ||
5375 | .hwapic_irr_update = svm_hwapic_irr_update, | 5332 | .hwapic_irr_update = svm_hwapic_irr_update, |
5376 | .hwapic_isr_update = svm_hwapic_isr_update, | 5333 | .hwapic_isr_update = svm_hwapic_isr_update, |
5377 | .apicv_post_state_restore = avic_post_state_restore, | 5334 | .apicv_post_state_restore = avic_post_state_restore, |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a236decb81e4..ef4ba71dbb66 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -1856,7 +1856,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
1856 | u32 eb; | 1856 | u32 eb; |
1857 | 1857 | ||
1858 | eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | | 1858 | eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | |
1859 | (1u << NM_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR); | 1859 | (1u << DB_VECTOR) | (1u << AC_VECTOR); |
1860 | if ((vcpu->guest_debug & | 1860 | if ((vcpu->guest_debug & |
1861 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == | 1861 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == |
1862 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) | 1862 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) |
@@ -1865,8 +1865,6 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
1865 | eb = ~0; | 1865 | eb = ~0; |
1866 | if (enable_ept) | 1866 | if (enable_ept) |
1867 | eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ | 1867 | eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ |
1868 | if (vcpu->fpu_active) | ||
1869 | eb &= ~(1u << NM_VECTOR); | ||
1870 | 1868 | ||
1871 | /* When we are running a nested L2 guest and L1 specified for it a | 1869 | /* When we are running a nested L2 guest and L1 specified for it a |
1872 | * certain exception bitmap, we must trap the same exceptions and pass | 1870 | * certain exception bitmap, we must trap the same exceptions and pass |
@@ -1992,19 +1990,6 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, | |||
1992 | m->host[i].value = host_val; | 1990 | m->host[i].value = host_val; |
1993 | } | 1991 | } |
1994 | 1992 | ||
1995 | static void reload_tss(void) | ||
1996 | { | ||
1997 | /* | ||
1998 | * VT restores TR but not its size. Useless. | ||
1999 | */ | ||
2000 | struct desc_ptr *gdt = this_cpu_ptr(&host_gdt); | ||
2001 | struct desc_struct *descs; | ||
2002 | |||
2003 | descs = (void *)gdt->address; | ||
2004 | descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ | ||
2005 | load_TR_desc(); | ||
2006 | } | ||
2007 | |||
2008 | static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) | 1993 | static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) |
2009 | { | 1994 | { |
2010 | u64 guest_efer = vmx->vcpu.arch.efer; | 1995 | u64 guest_efer = vmx->vcpu.arch.efer; |
@@ -2059,41 +2044,36 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) | |||
2059 | } | 2044 | } |
2060 | } | 2045 | } |
2061 | 2046 | ||
2047 | #ifdef CONFIG_X86_32 | ||
2048 | /* | ||
2049 | * On 32-bit kernels, VM exits still load the FS and GS bases from the | ||
2050 | * VMCS rather than the segment table. KVM uses this helper to figure | ||
2051 | * out the current bases to poke them into the VMCS before entry. | ||
2052 | */ | ||
2062 | static unsigned long segment_base(u16 selector) | 2053 | static unsigned long segment_base(u16 selector) |
2063 | { | 2054 | { |
2064 | struct desc_ptr *gdt = this_cpu_ptr(&host_gdt); | 2055 | struct desc_ptr *gdt = this_cpu_ptr(&host_gdt); |
2065 | struct desc_struct *d; | 2056 | struct desc_struct *d; |
2066 | unsigned long table_base; | 2057 | struct desc_struct *table; |
2067 | unsigned long v; | 2058 | unsigned long v; |
2068 | 2059 | ||
2069 | if (!(selector & ~3)) | 2060 | if (!(selector & ~SEGMENT_RPL_MASK)) |
2070 | return 0; | 2061 | return 0; |
2071 | 2062 | ||
2072 | table_base = gdt->address; | 2063 | table = (struct desc_struct *)gdt->address; |
2073 | 2064 | ||
2074 | if (selector & 4) { /* from ldt */ | 2065 | if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) { |
2075 | u16 ldt_selector = kvm_read_ldt(); | 2066 | u16 ldt_selector = kvm_read_ldt(); |
2076 | 2067 | ||
2077 | if (!(ldt_selector & ~3)) | 2068 | if (!(ldt_selector & ~SEGMENT_RPL_MASK)) |
2078 | return 0; | 2069 | return 0; |
2079 | 2070 | ||
2080 | table_base = segment_base(ldt_selector); | 2071 | table = (struct desc_struct *)segment_base(ldt_selector); |
2081 | } | 2072 | } |
2082 | d = (struct desc_struct *)(table_base + (selector & ~7)); | 2073 | v = get_desc_base(&table[selector >> 3]); |
2083 | v = get_desc_base(d); | ||
2084 | #ifdef CONFIG_X86_64 | ||
2085 | if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) | ||
2086 | v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; | ||
2087 | #endif | ||
2088 | return v; | 2074 | return v; |
2089 | } | 2075 | } |
2090 | 2076 | #endif | |
2091 | static inline unsigned long kvm_read_tr_base(void) | ||
2092 | { | ||
2093 | u16 tr; | ||
2094 | asm("str %0" : "=g"(tr)); | ||
2095 | return segment_base(tr); | ||
2096 | } | ||
2097 | 2077 | ||
2098 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) | 2078 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) |
2099 | { | 2079 | { |
@@ -2179,7 +2159,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) | |||
2179 | loadsegment(es, vmx->host_state.es_sel); | 2159 | loadsegment(es, vmx->host_state.es_sel); |
2180 | } | 2160 | } |
2181 | #endif | 2161 | #endif |
2182 | reload_tss(); | 2162 | invalidate_tss_limit(); |
2183 | #ifdef CONFIG_X86_64 | 2163 | #ifdef CONFIG_X86_64 |
2184 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | 2164 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); |
2185 | #endif | 2165 | #endif |
@@ -2294,10 +2274,19 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
2294 | 2274 | ||
2295 | /* | 2275 | /* |
2296 | * Linux uses per-cpu TSS and GDT, so set these when switching | 2276 | * Linux uses per-cpu TSS and GDT, so set these when switching |
2297 | * processors. | 2277 | * processors. See 22.2.4. |
2298 | */ | 2278 | */ |
2299 | vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ | 2279 | vmcs_writel(HOST_TR_BASE, |
2300 | vmcs_writel(HOST_GDTR_BASE, gdt->address); /* 22.2.4 */ | 2280 | (unsigned long)this_cpu_ptr(&cpu_tss)); |
2281 | vmcs_writel(HOST_GDTR_BASE, gdt->address); | ||
2282 | |||
2283 | /* | ||
2284 | * VM exits change the host TR limit to 0x67 after a VM | ||
2285 | * exit. This is okay, since 0x67 covers everything except | ||
2286 | * the IO bitmap and have have code to handle the IO bitmap | ||
2287 | * being lost after a VM exit. | ||
2288 | */ | ||
2289 | BUILD_BUG_ON(IO_BITMAP_OFFSET - 1 != 0x67); | ||
2301 | 2290 | ||
2302 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); | 2291 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); |
2303 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ | 2292 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ |
@@ -2340,25 +2329,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) | |||
2340 | } | 2329 | } |
2341 | } | 2330 | } |
2342 | 2331 | ||
2343 | static void vmx_fpu_activate(struct kvm_vcpu *vcpu) | ||
2344 | { | ||
2345 | ulong cr0; | ||
2346 | |||
2347 | if (vcpu->fpu_active) | ||
2348 | return; | ||
2349 | vcpu->fpu_active = 1; | ||
2350 | cr0 = vmcs_readl(GUEST_CR0); | ||
2351 | cr0 &= ~(X86_CR0_TS | X86_CR0_MP); | ||
2352 | cr0 |= kvm_read_cr0_bits(vcpu, X86_CR0_TS | X86_CR0_MP); | ||
2353 | vmcs_writel(GUEST_CR0, cr0); | ||
2354 | update_exception_bitmap(vcpu); | ||
2355 | vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS; | ||
2356 | if (is_guest_mode(vcpu)) | ||
2357 | vcpu->arch.cr0_guest_owned_bits &= | ||
2358 | ~get_vmcs12(vcpu)->cr0_guest_host_mask; | ||
2359 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); | ||
2360 | } | ||
2361 | |||
2362 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); | 2332 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); |
2363 | 2333 | ||
2364 | /* | 2334 | /* |
@@ -2377,33 +2347,6 @@ static inline unsigned long nested_read_cr4(struct vmcs12 *fields) | |||
2377 | (fields->cr4_read_shadow & fields->cr4_guest_host_mask); | 2347 | (fields->cr4_read_shadow & fields->cr4_guest_host_mask); |
2378 | } | 2348 | } |
2379 | 2349 | ||
2380 | static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) | ||
2381 | { | ||
2382 | /* Note that there is no vcpu->fpu_active = 0 here. The caller must | ||
2383 | * set this *before* calling this function. | ||
2384 | */ | ||
2385 | vmx_decache_cr0_guest_bits(vcpu); | ||
2386 | vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP); | ||
2387 | update_exception_bitmap(vcpu); | ||
2388 | vcpu->arch.cr0_guest_owned_bits = 0; | ||
2389 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); | ||
2390 | if (is_guest_mode(vcpu)) { | ||
2391 | /* | ||
2392 | * L1's specified read shadow might not contain the TS bit, | ||
2393 | * so now that we turned on shadowing of this bit, we need to | ||
2394 | * set this bit of the shadow. Like in nested_vmx_run we need | ||
2395 | * nested_read_cr0(vmcs12), but vmcs12->guest_cr0 is not yet | ||
2396 | * up-to-date here because we just decached cr0.TS (and we'll | ||
2397 | * only update vmcs12->guest_cr0 on nested exit). | ||
2398 | */ | ||
2399 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
2400 | vmcs12->guest_cr0 = (vmcs12->guest_cr0 & ~X86_CR0_TS) | | ||
2401 | (vcpu->arch.cr0 & X86_CR0_TS); | ||
2402 | vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12)); | ||
2403 | } else | ||
2404 | vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); | ||
2405 | } | ||
2406 | |||
2407 | static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) | 2350 | static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) |
2408 | { | 2351 | { |
2409 | unsigned long rflags, save_rflags; | 2352 | unsigned long rflags, save_rflags; |
@@ -3962,7 +3905,7 @@ static void fix_rmode_seg(int seg, struct kvm_segment *save) | |||
3962 | } | 3905 | } |
3963 | 3906 | ||
3964 | vmcs_write16(sf->selector, var.selector); | 3907 | vmcs_write16(sf->selector, var.selector); |
3965 | vmcs_write32(sf->base, var.base); | 3908 | vmcs_writel(sf->base, var.base); |
3966 | vmcs_write32(sf->limit, var.limit); | 3909 | vmcs_write32(sf->limit, var.limit); |
3967 | vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var)); | 3910 | vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var)); |
3968 | } | 3911 | } |
@@ -4232,9 +4175,6 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
4232 | if (enable_ept) | 4175 | if (enable_ept) |
4233 | ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); | 4176 | ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); |
4234 | 4177 | ||
4235 | if (!vcpu->fpu_active) | ||
4236 | hw_cr0 |= X86_CR0_TS | X86_CR0_MP; | ||
4237 | |||
4238 | vmcs_writel(CR0_READ_SHADOW, cr0); | 4178 | vmcs_writel(CR0_READ_SHADOW, cr0); |
4239 | vmcs_writel(GUEST_CR0, hw_cr0); | 4179 | vmcs_writel(GUEST_CR0, hw_cr0); |
4240 | vcpu->arch.cr0 = cr0; | 4180 | vcpu->arch.cr0 = cr0; |
@@ -4953,7 +4893,7 @@ static bool vmx_get_enable_apicv(void) | |||
4953 | return enable_apicv; | 4893 | return enable_apicv; |
4954 | } | 4894 | } |
4955 | 4895 | ||
4956 | static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) | 4896 | static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) |
4957 | { | 4897 | { |
4958 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 4898 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
4959 | int max_irr; | 4899 | int max_irr; |
@@ -4964,19 +4904,15 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) | |||
4964 | vmx->nested.pi_pending) { | 4904 | vmx->nested.pi_pending) { |
4965 | vmx->nested.pi_pending = false; | 4905 | vmx->nested.pi_pending = false; |
4966 | if (!pi_test_and_clear_on(vmx->nested.pi_desc)) | 4906 | if (!pi_test_and_clear_on(vmx->nested.pi_desc)) |
4967 | return 0; | 4907 | return; |
4968 | 4908 | ||
4969 | max_irr = find_last_bit( | 4909 | max_irr = find_last_bit( |
4970 | (unsigned long *)vmx->nested.pi_desc->pir, 256); | 4910 | (unsigned long *)vmx->nested.pi_desc->pir, 256); |
4971 | 4911 | ||
4972 | if (max_irr == 256) | 4912 | if (max_irr == 256) |
4973 | return 0; | 4913 | return; |
4974 | 4914 | ||
4975 | vapic_page = kmap(vmx->nested.virtual_apic_page); | 4915 | vapic_page = kmap(vmx->nested.virtual_apic_page); |
4976 | if (!vapic_page) { | ||
4977 | WARN_ON(1); | ||
4978 | return -ENOMEM; | ||
4979 | } | ||
4980 | __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); | 4916 | __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); |
4981 | kunmap(vmx->nested.virtual_apic_page); | 4917 | kunmap(vmx->nested.virtual_apic_page); |
4982 | 4918 | ||
@@ -4987,7 +4923,6 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) | |||
4987 | vmcs_write16(GUEST_INTR_STATUS, status); | 4923 | vmcs_write16(GUEST_INTR_STATUS, status); |
4988 | } | 4924 | } |
4989 | } | 4925 | } |
4990 | return 0; | ||
4991 | } | 4926 | } |
4992 | 4927 | ||
4993 | static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) | 4928 | static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) |
@@ -5056,26 +4991,12 @@ static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) | |||
5056 | if (pi_test_and_set_pir(vector, &vmx->pi_desc)) | 4991 | if (pi_test_and_set_pir(vector, &vmx->pi_desc)) |
5057 | return; | 4992 | return; |
5058 | 4993 | ||
5059 | r = pi_test_and_set_on(&vmx->pi_desc); | 4994 | /* If a previous notification has sent the IPI, nothing to do. */ |
5060 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 4995 | if (pi_test_and_set_on(&vmx->pi_desc)) |
5061 | if (r || !kvm_vcpu_trigger_posted_interrupt(vcpu)) | ||
5062 | kvm_vcpu_kick(vcpu); | ||
5063 | } | ||
5064 | |||
5065 | static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) | ||
5066 | { | ||
5067 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
5068 | |||
5069 | if (!pi_test_on(&vmx->pi_desc)) | ||
5070 | return; | 4996 | return; |
5071 | 4997 | ||
5072 | pi_clear_on(&vmx->pi_desc); | 4998 | if (!kvm_vcpu_trigger_posted_interrupt(vcpu)) |
5073 | /* | 4999 | kvm_vcpu_kick(vcpu); |
5074 | * IOMMU can write to PIR.ON, so the barrier matters even on UP. | ||
5075 | * But on x86 this is just a compiler barrier anyway. | ||
5076 | */ | ||
5077 | smp_mb__after_atomic(); | ||
5078 | kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); | ||
5079 | } | 5000 | } |
5080 | 5001 | ||
5081 | /* | 5002 | /* |
@@ -5236,10 +5157,8 @@ static void ept_set_mmio_spte_mask(void) | |||
5236 | /* | 5157 | /* |
5237 | * EPT Misconfigurations can be generated if the value of bits 2:0 | 5158 | * EPT Misconfigurations can be generated if the value of bits 2:0 |
5238 | * of an EPT paging-structure entry is 110b (write/execute). | 5159 | * of an EPT paging-structure entry is 110b (write/execute). |
5239 | * Also, magic bits (0x3ull << 62) is set to quickly identify mmio | ||
5240 | * spte. | ||
5241 | */ | 5160 | */ |
5242 | kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull); | 5161 | kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE); |
5243 | } | 5162 | } |
5244 | 5163 | ||
5245 | #define VMX_XSS_EXIT_BITMAP 0 | 5164 | #define VMX_XSS_EXIT_BITMAP 0 |
@@ -5342,7 +5261,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
5342 | /* 22.2.1, 20.8.1 */ | 5261 | /* 22.2.1, 20.8.1 */ |
5343 | vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl); | 5262 | vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl); |
5344 | 5263 | ||
5345 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); | 5264 | vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS; |
5265 | vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS); | ||
5266 | |||
5346 | set_cr4_guest_host_mask(vmx); | 5267 | set_cr4_guest_host_mask(vmx); |
5347 | 5268 | ||
5348 | if (vmx_xsaves_supported()) | 5269 | if (vmx_xsaves_supported()) |
@@ -5446,7 +5367,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
5446 | vmx_set_cr0(vcpu, cr0); /* enter rmode */ | 5367 | vmx_set_cr0(vcpu, cr0); /* enter rmode */ |
5447 | vmx_set_cr4(vcpu, 0); | 5368 | vmx_set_cr4(vcpu, 0); |
5448 | vmx_set_efer(vcpu, 0); | 5369 | vmx_set_efer(vcpu, 0); |
5449 | vmx_fpu_activate(vcpu); | 5370 | |
5450 | update_exception_bitmap(vcpu); | 5371 | update_exception_bitmap(vcpu); |
5451 | 5372 | ||
5452 | vpid_sync_context(vmx->vpid); | 5373 | vpid_sync_context(vmx->vpid); |
@@ -5480,26 +5401,20 @@ static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) | |||
5480 | 5401 | ||
5481 | static void enable_irq_window(struct kvm_vcpu *vcpu) | 5402 | static void enable_irq_window(struct kvm_vcpu *vcpu) |
5482 | { | 5403 | { |
5483 | u32 cpu_based_vm_exec_control; | 5404 | vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, |
5484 | 5405 | CPU_BASED_VIRTUAL_INTR_PENDING); | |
5485 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
5486 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; | ||
5487 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
5488 | } | 5406 | } |
5489 | 5407 | ||
5490 | static void enable_nmi_window(struct kvm_vcpu *vcpu) | 5408 | static void enable_nmi_window(struct kvm_vcpu *vcpu) |
5491 | { | 5409 | { |
5492 | u32 cpu_based_vm_exec_control; | ||
5493 | |||
5494 | if (!cpu_has_virtual_nmis() || | 5410 | if (!cpu_has_virtual_nmis() || |
5495 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { | 5411 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { |
5496 | enable_irq_window(vcpu); | 5412 | enable_irq_window(vcpu); |
5497 | return; | 5413 | return; |
5498 | } | 5414 | } |
5499 | 5415 | ||
5500 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 5416 | vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, |
5501 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; | 5417 | CPU_BASED_VIRTUAL_NMI_PENDING); |
5502 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
5503 | } | 5418 | } |
5504 | 5419 | ||
5505 | static void vmx_inject_irq(struct kvm_vcpu *vcpu) | 5420 | static void vmx_inject_irq(struct kvm_vcpu *vcpu) |
@@ -5725,11 +5640,6 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
5725 | if (is_nmi(intr_info)) | 5640 | if (is_nmi(intr_info)) |
5726 | return 1; /* already handled by vmx_vcpu_run() */ | 5641 | return 1; /* already handled by vmx_vcpu_run() */ |
5727 | 5642 | ||
5728 | if (is_no_device(intr_info)) { | ||
5729 | vmx_fpu_activate(vcpu); | ||
5730 | return 1; | ||
5731 | } | ||
5732 | |||
5733 | if (is_invalid_opcode(intr_info)) { | 5643 | if (is_invalid_opcode(intr_info)) { |
5734 | if (is_guest_mode(vcpu)) { | 5644 | if (is_guest_mode(vcpu)) { |
5735 | kvm_queue_exception(vcpu, UD_VECTOR); | 5645 | kvm_queue_exception(vcpu, UD_VECTOR); |
@@ -5919,22 +5829,6 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) | |||
5919 | return kvm_set_cr4(vcpu, val); | 5829 | return kvm_set_cr4(vcpu, val); |
5920 | } | 5830 | } |
5921 | 5831 | ||
5922 | /* called to set cr0 as appropriate for clts instruction exit. */ | ||
5923 | static void handle_clts(struct kvm_vcpu *vcpu) | ||
5924 | { | ||
5925 | if (is_guest_mode(vcpu)) { | ||
5926 | /* | ||
5927 | * We get here when L2 did CLTS, and L1 didn't shadow CR0.TS | ||
5928 | * but we did (!fpu_active). We need to keep GUEST_CR0.TS on, | ||
5929 | * just pretend it's off (also in arch.cr0 for fpu_activate). | ||
5930 | */ | ||
5931 | vmcs_writel(CR0_READ_SHADOW, | ||
5932 | vmcs_readl(CR0_READ_SHADOW) & ~X86_CR0_TS); | ||
5933 | vcpu->arch.cr0 &= ~X86_CR0_TS; | ||
5934 | } else | ||
5935 | vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); | ||
5936 | } | ||
5937 | |||
5938 | static int handle_cr(struct kvm_vcpu *vcpu) | 5832 | static int handle_cr(struct kvm_vcpu *vcpu) |
5939 | { | 5833 | { |
5940 | unsigned long exit_qualification, val; | 5834 | unsigned long exit_qualification, val; |
@@ -5980,9 +5874,9 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
5980 | } | 5874 | } |
5981 | break; | 5875 | break; |
5982 | case 2: /* clts */ | 5876 | case 2: /* clts */ |
5983 | handle_clts(vcpu); | 5877 | WARN_ONCE(1, "Guest should always own CR0.TS"); |
5878 | vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); | ||
5984 | trace_kvm_cr_write(0, kvm_read_cr0(vcpu)); | 5879 | trace_kvm_cr_write(0, kvm_read_cr0(vcpu)); |
5985 | vmx_fpu_activate(vcpu); | ||
5986 | return kvm_skip_emulated_instruction(vcpu); | 5880 | return kvm_skip_emulated_instruction(vcpu); |
5987 | case 1: /*mov from cr*/ | 5881 | case 1: /*mov from cr*/ |
5988 | switch (cr) { | 5882 | switch (cr) { |
@@ -6152,18 +6046,14 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu) | |||
6152 | 6046 | ||
6153 | static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) | 6047 | static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) |
6154 | { | 6048 | { |
6155 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 6049 | kvm_apic_update_ppr(vcpu); |
6156 | return 1; | 6050 | return 1; |
6157 | } | 6051 | } |
6158 | 6052 | ||
6159 | static int handle_interrupt_window(struct kvm_vcpu *vcpu) | 6053 | static int handle_interrupt_window(struct kvm_vcpu *vcpu) |
6160 | { | 6054 | { |
6161 | u32 cpu_based_vm_exec_control; | 6055 | vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, |
6162 | 6056 | CPU_BASED_VIRTUAL_INTR_PENDING); | |
6163 | /* clear pending irq */ | ||
6164 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
6165 | cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; | ||
6166 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
6167 | 6057 | ||
6168 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 6058 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
6169 | 6059 | ||
@@ -6374,15 +6264,22 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) | |||
6374 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); | 6264 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); |
6375 | trace_kvm_page_fault(gpa, exit_qualification); | 6265 | trace_kvm_page_fault(gpa, exit_qualification); |
6376 | 6266 | ||
6377 | /* it is a read fault? */ | 6267 | /* Is it a read fault? */ |
6378 | error_code = (exit_qualification << 2) & PFERR_USER_MASK; | 6268 | error_code = (exit_qualification & EPT_VIOLATION_ACC_READ) |
6379 | /* it is a write fault? */ | 6269 | ? PFERR_USER_MASK : 0; |
6380 | error_code |= exit_qualification & PFERR_WRITE_MASK; | 6270 | /* Is it a write fault? */ |
6381 | /* It is a fetch fault? */ | 6271 | error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE) |
6382 | error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK; | 6272 | ? PFERR_WRITE_MASK : 0; |
6383 | /* ept page table is present? */ | 6273 | /* Is it a fetch fault? */ |
6384 | error_code |= (exit_qualification & 0x38) != 0; | 6274 | error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR) |
6385 | 6275 | ? PFERR_FETCH_MASK : 0; | |
6276 | /* ept page table entry is present? */ | ||
6277 | error_code |= (exit_qualification & | ||
6278 | (EPT_VIOLATION_READABLE | EPT_VIOLATION_WRITABLE | | ||
6279 | EPT_VIOLATION_EXECUTABLE)) | ||
6280 | ? PFERR_PRESENT_MASK : 0; | ||
6281 | |||
6282 | vcpu->arch.gpa_available = true; | ||
6386 | vcpu->arch.exit_qualification = exit_qualification; | 6283 | vcpu->arch.exit_qualification = exit_qualification; |
6387 | 6284 | ||
6388 | return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); | 6285 | return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); |
@@ -6400,6 +6297,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) | |||
6400 | } | 6297 | } |
6401 | 6298 | ||
6402 | ret = handle_mmio_page_fault(vcpu, gpa, true); | 6299 | ret = handle_mmio_page_fault(vcpu, gpa, true); |
6300 | vcpu->arch.gpa_available = true; | ||
6403 | if (likely(ret == RET_MMIO_PF_EMULATE)) | 6301 | if (likely(ret == RET_MMIO_PF_EMULATE)) |
6404 | return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) == | 6302 | return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) == |
6405 | EMULATE_DONE; | 6303 | EMULATE_DONE; |
@@ -6421,12 +6319,8 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) | |||
6421 | 6319 | ||
6422 | static int handle_nmi_window(struct kvm_vcpu *vcpu) | 6320 | static int handle_nmi_window(struct kvm_vcpu *vcpu) |
6423 | { | 6321 | { |
6424 | u32 cpu_based_vm_exec_control; | 6322 | vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, |
6425 | 6323 | CPU_BASED_VIRTUAL_NMI_PENDING); | |
6426 | /* clear pending NMI */ | ||
6427 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
6428 | cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; | ||
6429 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
6430 | ++vcpu->stat.nmi_window_exits; | 6324 | ++vcpu->stat.nmi_window_exits; |
6431 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 6325 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
6432 | 6326 | ||
@@ -6572,6 +6466,19 @@ static void wakeup_handler(void) | |||
6572 | spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); | 6466 | spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); |
6573 | } | 6467 | } |
6574 | 6468 | ||
6469 | void vmx_enable_tdp(void) | ||
6470 | { | ||
6471 | kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, | ||
6472 | enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull, | ||
6473 | enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull, | ||
6474 | 0ull, VMX_EPT_EXECUTABLE_MASK, | ||
6475 | cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK, | ||
6476 | enable_ept_ad_bits ? 0ull : VMX_EPT_RWX_MASK); | ||
6477 | |||
6478 | ept_set_mmio_spte_mask(); | ||
6479 | kvm_enable_tdp(); | ||
6480 | } | ||
6481 | |||
6575 | static __init int hardware_setup(void) | 6482 | static __init int hardware_setup(void) |
6576 | { | 6483 | { |
6577 | int r = -ENOMEM, i, msr; | 6484 | int r = -ENOMEM, i, msr; |
@@ -6651,8 +6558,10 @@ static __init int hardware_setup(void) | |||
6651 | if (!cpu_has_vmx_ple()) | 6558 | if (!cpu_has_vmx_ple()) |
6652 | ple_gap = 0; | 6559 | ple_gap = 0; |
6653 | 6560 | ||
6654 | if (!cpu_has_vmx_apicv()) | 6561 | if (!cpu_has_vmx_apicv()) { |
6655 | enable_apicv = 0; | 6562 | enable_apicv = 0; |
6563 | kvm_x86_ops->sync_pir_to_irr = NULL; | ||
6564 | } | ||
6656 | 6565 | ||
6657 | if (cpu_has_vmx_tsc_scaling()) { | 6566 | if (cpu_has_vmx_tsc_scaling()) { |
6658 | kvm_has_tsc_control = true; | 6567 | kvm_has_tsc_control = true; |
@@ -6697,16 +6606,9 @@ static __init int hardware_setup(void) | |||
6697 | /* SELF-IPI */ | 6606 | /* SELF-IPI */ |
6698 | vmx_disable_intercept_msr_x2apic(0x83f, MSR_TYPE_W, true); | 6607 | vmx_disable_intercept_msr_x2apic(0x83f, MSR_TYPE_W, true); |
6699 | 6608 | ||
6700 | if (enable_ept) { | 6609 | if (enable_ept) |
6701 | kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, | 6610 | vmx_enable_tdp(); |
6702 | (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, | 6611 | else |
6703 | (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull, | ||
6704 | 0ull, VMX_EPT_EXECUTABLE_MASK, | ||
6705 | cpu_has_vmx_ept_execute_only() ? | ||
6706 | 0ull : VMX_EPT_READABLE_MASK); | ||
6707 | ept_set_mmio_spte_mask(); | ||
6708 | kvm_enable_tdp(); | ||
6709 | } else | ||
6710 | kvm_disable_tdp(); | 6612 | kvm_disable_tdp(); |
6711 | 6613 | ||
6712 | update_ple_window_actual_max(); | 6614 | update_ple_window_actual_max(); |
@@ -7085,13 +6987,18 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, | |||
7085 | } | 6987 | } |
7086 | 6988 | ||
7087 | page = nested_get_page(vcpu, vmptr); | 6989 | page = nested_get_page(vcpu, vmptr); |
7088 | if (page == NULL || | 6990 | if (page == NULL) { |
7089 | *(u32 *)kmap(page) != VMCS12_REVISION) { | ||
7090 | nested_vmx_failInvalid(vcpu); | 6991 | nested_vmx_failInvalid(vcpu); |
6992 | return kvm_skip_emulated_instruction(vcpu); | ||
6993 | } | ||
6994 | if (*(u32 *)kmap(page) != VMCS12_REVISION) { | ||
7091 | kunmap(page); | 6995 | kunmap(page); |
6996 | nested_release_page_clean(page); | ||
6997 | nested_vmx_failInvalid(vcpu); | ||
7092 | return kvm_skip_emulated_instruction(vcpu); | 6998 | return kvm_skip_emulated_instruction(vcpu); |
7093 | } | 6999 | } |
7094 | kunmap(page); | 7000 | kunmap(page); |
7001 | nested_release_page_clean(page); | ||
7095 | vmx->nested.vmxon_ptr = vmptr; | 7002 | vmx->nested.vmxon_ptr = vmptr; |
7096 | break; | 7003 | break; |
7097 | case EXIT_REASON_VMCLEAR: | 7004 | case EXIT_REASON_VMCLEAR: |
@@ -7129,6 +7036,53 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, | |||
7129 | return 0; | 7036 | return 0; |
7130 | } | 7037 | } |
7131 | 7038 | ||
7039 | static int enter_vmx_operation(struct kvm_vcpu *vcpu) | ||
7040 | { | ||
7041 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
7042 | struct vmcs *shadow_vmcs; | ||
7043 | |||
7044 | if (cpu_has_vmx_msr_bitmap()) { | ||
7045 | vmx->nested.msr_bitmap = | ||
7046 | (unsigned long *)__get_free_page(GFP_KERNEL); | ||
7047 | if (!vmx->nested.msr_bitmap) | ||
7048 | goto out_msr_bitmap; | ||
7049 | } | ||
7050 | |||
7051 | vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); | ||
7052 | if (!vmx->nested.cached_vmcs12) | ||
7053 | goto out_cached_vmcs12; | ||
7054 | |||
7055 | if (enable_shadow_vmcs) { | ||
7056 | shadow_vmcs = alloc_vmcs(); | ||
7057 | if (!shadow_vmcs) | ||
7058 | goto out_shadow_vmcs; | ||
7059 | /* mark vmcs as shadow */ | ||
7060 | shadow_vmcs->revision_id |= (1u << 31); | ||
7061 | /* init shadow vmcs */ | ||
7062 | vmcs_clear(shadow_vmcs); | ||
7063 | vmx->vmcs01.shadow_vmcs = shadow_vmcs; | ||
7064 | } | ||
7065 | |||
7066 | INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); | ||
7067 | vmx->nested.vmcs02_num = 0; | ||
7068 | |||
7069 | hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, | ||
7070 | HRTIMER_MODE_REL_PINNED); | ||
7071 | vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; | ||
7072 | |||
7073 | vmx->nested.vmxon = true; | ||
7074 | return 0; | ||
7075 | |||
7076 | out_shadow_vmcs: | ||
7077 | kfree(vmx->nested.cached_vmcs12); | ||
7078 | |||
7079 | out_cached_vmcs12: | ||
7080 | free_page((unsigned long)vmx->nested.msr_bitmap); | ||
7081 | |||
7082 | out_msr_bitmap: | ||
7083 | return -ENOMEM; | ||
7084 | } | ||
7085 | |||
7132 | /* | 7086 | /* |
7133 | * Emulate the VMXON instruction. | 7087 | * Emulate the VMXON instruction. |
7134 | * Currently, we just remember that VMX is active, and do not save or even | 7088 | * Currently, we just remember that VMX is active, and do not save or even |
@@ -7139,9 +7093,9 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, | |||
7139 | */ | 7093 | */ |
7140 | static int handle_vmon(struct kvm_vcpu *vcpu) | 7094 | static int handle_vmon(struct kvm_vcpu *vcpu) |
7141 | { | 7095 | { |
7096 | int ret; | ||
7142 | struct kvm_segment cs; | 7097 | struct kvm_segment cs; |
7143 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 7098 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
7144 | struct vmcs *shadow_vmcs; | ||
7145 | const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED | 7099 | const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED |
7146 | | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; | 7100 | | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; |
7147 | 7101 | ||
@@ -7168,9 +7122,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
7168 | return 1; | 7122 | return 1; |
7169 | } | 7123 | } |
7170 | 7124 | ||
7171 | if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMON, NULL)) | ||
7172 | return 1; | ||
7173 | |||
7174 | if (vmx->nested.vmxon) { | 7125 | if (vmx->nested.vmxon) { |
7175 | nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION); | 7126 | nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION); |
7176 | return kvm_skip_emulated_instruction(vcpu); | 7127 | return kvm_skip_emulated_instruction(vcpu); |
@@ -7182,48 +7133,15 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
7182 | return 1; | 7133 | return 1; |
7183 | } | 7134 | } |
7184 | 7135 | ||
7185 | if (cpu_has_vmx_msr_bitmap()) { | 7136 | if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMON, NULL)) |
7186 | vmx->nested.msr_bitmap = | 7137 | return 1; |
7187 | (unsigned long *)__get_free_page(GFP_KERNEL); | 7138 | |
7188 | if (!vmx->nested.msr_bitmap) | 7139 | ret = enter_vmx_operation(vcpu); |
7189 | goto out_msr_bitmap; | 7140 | if (ret) |
7190 | } | 7141 | return ret; |
7191 | |||
7192 | vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); | ||
7193 | if (!vmx->nested.cached_vmcs12) | ||
7194 | goto out_cached_vmcs12; | ||
7195 | |||
7196 | if (enable_shadow_vmcs) { | ||
7197 | shadow_vmcs = alloc_vmcs(); | ||
7198 | if (!shadow_vmcs) | ||
7199 | goto out_shadow_vmcs; | ||
7200 | /* mark vmcs as shadow */ | ||
7201 | shadow_vmcs->revision_id |= (1u << 31); | ||
7202 | /* init shadow vmcs */ | ||
7203 | vmcs_clear(shadow_vmcs); | ||
7204 | vmx->vmcs01.shadow_vmcs = shadow_vmcs; | ||
7205 | } | ||
7206 | |||
7207 | INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); | ||
7208 | vmx->nested.vmcs02_num = 0; | ||
7209 | |||
7210 | hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, | ||
7211 | HRTIMER_MODE_REL_PINNED); | ||
7212 | vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; | ||
7213 | |||
7214 | vmx->nested.vmxon = true; | ||
7215 | 7142 | ||
7216 | nested_vmx_succeed(vcpu); | 7143 | nested_vmx_succeed(vcpu); |
7217 | return kvm_skip_emulated_instruction(vcpu); | 7144 | return kvm_skip_emulated_instruction(vcpu); |
7218 | |||
7219 | out_shadow_vmcs: | ||
7220 | kfree(vmx->nested.cached_vmcs12); | ||
7221 | |||
7222 | out_cached_vmcs12: | ||
7223 | free_page((unsigned long)vmx->nested.msr_bitmap); | ||
7224 | |||
7225 | out_msr_bitmap: | ||
7226 | return -ENOMEM; | ||
7227 | } | 7145 | } |
7228 | 7146 | ||
7229 | /* | 7147 | /* |
@@ -7672,6 +7590,18 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) | |||
7672 | return kvm_skip_emulated_instruction(vcpu); | 7590 | return kvm_skip_emulated_instruction(vcpu); |
7673 | } | 7591 | } |
7674 | 7592 | ||
7593 | static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr) | ||
7594 | { | ||
7595 | vmx->nested.current_vmptr = vmptr; | ||
7596 | if (enable_shadow_vmcs) { | ||
7597 | vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, | ||
7598 | SECONDARY_EXEC_SHADOW_VMCS); | ||
7599 | vmcs_write64(VMCS_LINK_POINTER, | ||
7600 | __pa(vmx->vmcs01.shadow_vmcs)); | ||
7601 | vmx->nested.sync_shadow_vmcs = true; | ||
7602 | } | ||
7603 | } | ||
7604 | |||
7675 | /* Emulate the VMPTRLD instruction */ | 7605 | /* Emulate the VMPTRLD instruction */ |
7676 | static int handle_vmptrld(struct kvm_vcpu *vcpu) | 7606 | static int handle_vmptrld(struct kvm_vcpu *vcpu) |
7677 | { | 7607 | { |
@@ -7702,7 +7632,6 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) | |||
7702 | } | 7632 | } |
7703 | 7633 | ||
7704 | nested_release_vmcs12(vmx); | 7634 | nested_release_vmcs12(vmx); |
7705 | vmx->nested.current_vmptr = vmptr; | ||
7706 | vmx->nested.current_vmcs12 = new_vmcs12; | 7635 | vmx->nested.current_vmcs12 = new_vmcs12; |
7707 | vmx->nested.current_vmcs12_page = page; | 7636 | vmx->nested.current_vmcs12_page = page; |
7708 | /* | 7637 | /* |
@@ -7711,14 +7640,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) | |||
7711 | */ | 7640 | */ |
7712 | memcpy(vmx->nested.cached_vmcs12, | 7641 | memcpy(vmx->nested.cached_vmcs12, |
7713 | vmx->nested.current_vmcs12, VMCS12_SIZE); | 7642 | vmx->nested.current_vmcs12, VMCS12_SIZE); |
7714 | 7643 | set_current_vmptr(vmx, vmptr); | |
7715 | if (enable_shadow_vmcs) { | ||
7716 | vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, | ||
7717 | SECONDARY_EXEC_SHADOW_VMCS); | ||
7718 | vmcs_write64(VMCS_LINK_POINTER, | ||
7719 | __pa(vmx->vmcs01.shadow_vmcs)); | ||
7720 | vmx->nested.sync_shadow_vmcs = true; | ||
7721 | } | ||
7722 | } | 7644 | } |
7723 | 7645 | ||
7724 | nested_vmx_succeed(vcpu); | 7646 | nested_vmx_succeed(vcpu); |
@@ -8191,8 +8113,6 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
8191 | case EXIT_REASON_TASK_SWITCH: | 8113 | case EXIT_REASON_TASK_SWITCH: |
8192 | return true; | 8114 | return true; |
8193 | case EXIT_REASON_CPUID: | 8115 | case EXIT_REASON_CPUID: |
8194 | if (kvm_register_read(vcpu, VCPU_REGS_RAX) == 0xa) | ||
8195 | return false; | ||
8196 | return true; | 8116 | return true; |
8197 | case EXIT_REASON_HLT: | 8117 | case EXIT_REASON_HLT: |
8198 | return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING); | 8118 | return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING); |
@@ -8350,7 +8270,7 @@ static void kvm_flush_pml_buffers(struct kvm *kvm) | |||
8350 | static void vmx_dump_sel(char *name, uint32_t sel) | 8270 | static void vmx_dump_sel(char *name, uint32_t sel) |
8351 | { | 8271 | { |
8352 | pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n", | 8272 | pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n", |
8353 | name, vmcs_read32(sel), | 8273 | name, vmcs_read16(sel), |
8354 | vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR), | 8274 | vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR), |
8355 | vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR), | 8275 | vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR), |
8356 | vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR)); | 8276 | vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR)); |
@@ -8514,6 +8434,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
8514 | u32 vectoring_info = vmx->idt_vectoring_info; | 8434 | u32 vectoring_info = vmx->idt_vectoring_info; |
8515 | 8435 | ||
8516 | trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); | 8436 | trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); |
8437 | vcpu->arch.gpa_available = false; | ||
8517 | 8438 | ||
8518 | /* | 8439 | /* |
8519 | * Flush logged GPAs PML buffer, this will make dirty_bitmap more | 8440 | * Flush logged GPAs PML buffer, this will make dirty_bitmap more |
@@ -8732,6 +8653,27 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) | |||
8732 | } | 8653 | } |
8733 | } | 8654 | } |
8734 | 8655 | ||
8656 | static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) | ||
8657 | { | ||
8658 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
8659 | int max_irr; | ||
8660 | |||
8661 | WARN_ON(!vcpu->arch.apicv_active); | ||
8662 | if (pi_test_on(&vmx->pi_desc)) { | ||
8663 | pi_clear_on(&vmx->pi_desc); | ||
8664 | /* | ||
8665 | * IOMMU can write to PIR.ON, so the barrier matters even on UP. | ||
8666 | * But on x86 this is just a compiler barrier anyway. | ||
8667 | */ | ||
8668 | smp_mb__after_atomic(); | ||
8669 | max_irr = kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); | ||
8670 | } else { | ||
8671 | max_irr = kvm_lapic_find_highest_irr(vcpu); | ||
8672 | } | ||
8673 | vmx_hwapic_irr_update(vcpu, max_irr); | ||
8674 | return max_irr; | ||
8675 | } | ||
8676 | |||
8735 | static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | 8677 | static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) |
8736 | { | 8678 | { |
8737 | if (!kvm_vcpu_apicv_active(vcpu)) | 8679 | if (!kvm_vcpu_apicv_active(vcpu)) |
@@ -8743,6 +8685,14 @@ static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | |||
8743 | vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]); | 8685 | vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]); |
8744 | } | 8686 | } |
8745 | 8687 | ||
8688 | static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu) | ||
8689 | { | ||
8690 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
8691 | |||
8692 | pi_clear_on(&vmx->pi_desc); | ||
8693 | memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir)); | ||
8694 | } | ||
8695 | |||
8746 | static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) | 8696 | static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) |
8747 | { | 8697 | { |
8748 | u32 exit_intr_info; | 8698 | u32 exit_intr_info; |
@@ -9588,17 +9538,16 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, | |||
9588 | kvm_inject_page_fault(vcpu, fault); | 9538 | kvm_inject_page_fault(vcpu, fault); |
9589 | } | 9539 | } |
9590 | 9540 | ||
9591 | static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, | 9541 | static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, |
9542 | struct vmcs12 *vmcs12); | ||
9543 | |||
9544 | static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, | ||
9592 | struct vmcs12 *vmcs12) | 9545 | struct vmcs12 *vmcs12) |
9593 | { | 9546 | { |
9594 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 9547 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
9595 | int maxphyaddr = cpuid_maxphyaddr(vcpu); | 9548 | u64 hpa; |
9596 | 9549 | ||
9597 | if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { | 9550 | if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { |
9598 | if (!PAGE_ALIGNED(vmcs12->apic_access_addr) || | ||
9599 | vmcs12->apic_access_addr >> maxphyaddr) | ||
9600 | return false; | ||
9601 | |||
9602 | /* | 9551 | /* |
9603 | * Translate L1 physical address to host physical | 9552 | * Translate L1 physical address to host physical |
9604 | * address for vmcs02. Keep the page pinned, so this | 9553 | * address for vmcs02. Keep the page pinned, so this |
@@ -9609,59 +9558,80 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, | |||
9609 | nested_release_page(vmx->nested.apic_access_page); | 9558 | nested_release_page(vmx->nested.apic_access_page); |
9610 | vmx->nested.apic_access_page = | 9559 | vmx->nested.apic_access_page = |
9611 | nested_get_page(vcpu, vmcs12->apic_access_addr); | 9560 | nested_get_page(vcpu, vmcs12->apic_access_addr); |
9561 | /* | ||
9562 | * If translation failed, no matter: This feature asks | ||
9563 | * to exit when accessing the given address, and if it | ||
9564 | * can never be accessed, this feature won't do | ||
9565 | * anything anyway. | ||
9566 | */ | ||
9567 | if (vmx->nested.apic_access_page) { | ||
9568 | hpa = page_to_phys(vmx->nested.apic_access_page); | ||
9569 | vmcs_write64(APIC_ACCESS_ADDR, hpa); | ||
9570 | } else { | ||
9571 | vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, | ||
9572 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); | ||
9573 | } | ||
9574 | } else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) && | ||
9575 | cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { | ||
9576 | vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, | ||
9577 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); | ||
9578 | kvm_vcpu_reload_apic_access_page(vcpu); | ||
9612 | } | 9579 | } |
9613 | 9580 | ||
9614 | if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { | 9581 | if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { |
9615 | if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr) || | ||
9616 | vmcs12->virtual_apic_page_addr >> maxphyaddr) | ||
9617 | return false; | ||
9618 | |||
9619 | if (vmx->nested.virtual_apic_page) /* shouldn't happen */ | 9582 | if (vmx->nested.virtual_apic_page) /* shouldn't happen */ |
9620 | nested_release_page(vmx->nested.virtual_apic_page); | 9583 | nested_release_page(vmx->nested.virtual_apic_page); |
9621 | vmx->nested.virtual_apic_page = | 9584 | vmx->nested.virtual_apic_page = |
9622 | nested_get_page(vcpu, vmcs12->virtual_apic_page_addr); | 9585 | nested_get_page(vcpu, vmcs12->virtual_apic_page_addr); |
9623 | 9586 | ||
9624 | /* | 9587 | /* |
9625 | * Failing the vm entry is _not_ what the processor does | 9588 | * If translation failed, VM entry will fail because |
9626 | * but it's basically the only possibility we have. | 9589 | * prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull. |
9627 | * We could still enter the guest if CR8 load exits are | 9590 | * Failing the vm entry is _not_ what the processor |
9628 | * enabled, CR8 store exits are enabled, and virtualize APIC | 9591 | * does but it's basically the only possibility we |
9629 | * access is disabled; in this case the processor would never | 9592 | * have. We could still enter the guest if CR8 load |
9630 | * use the TPR shadow and we could simply clear the bit from | 9593 | * exits are enabled, CR8 store exits are enabled, and |
9631 | * the execution control. But such a configuration is useless, | 9594 | * virtualize APIC access is disabled; in this case |
9632 | * so let's keep the code simple. | 9595 | * the processor would never use the TPR shadow and we |
9596 | * could simply clear the bit from the execution | ||
9597 | * control. But such a configuration is useless, so | ||
9598 | * let's keep the code simple. | ||
9633 | */ | 9599 | */ |
9634 | if (!vmx->nested.virtual_apic_page) | 9600 | if (vmx->nested.virtual_apic_page) { |
9635 | return false; | 9601 | hpa = page_to_phys(vmx->nested.virtual_apic_page); |
9602 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa); | ||
9603 | } | ||
9636 | } | 9604 | } |
9637 | 9605 | ||
9638 | if (nested_cpu_has_posted_intr(vmcs12)) { | 9606 | if (nested_cpu_has_posted_intr(vmcs12)) { |
9639 | if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64) || | ||
9640 | vmcs12->posted_intr_desc_addr >> maxphyaddr) | ||
9641 | return false; | ||
9642 | |||
9643 | if (vmx->nested.pi_desc_page) { /* shouldn't happen */ | 9607 | if (vmx->nested.pi_desc_page) { /* shouldn't happen */ |
9644 | kunmap(vmx->nested.pi_desc_page); | 9608 | kunmap(vmx->nested.pi_desc_page); |
9645 | nested_release_page(vmx->nested.pi_desc_page); | 9609 | nested_release_page(vmx->nested.pi_desc_page); |
9646 | } | 9610 | } |
9647 | vmx->nested.pi_desc_page = | 9611 | vmx->nested.pi_desc_page = |
9648 | nested_get_page(vcpu, vmcs12->posted_intr_desc_addr); | 9612 | nested_get_page(vcpu, vmcs12->posted_intr_desc_addr); |
9649 | if (!vmx->nested.pi_desc_page) | ||
9650 | return false; | ||
9651 | |||
9652 | vmx->nested.pi_desc = | 9613 | vmx->nested.pi_desc = |
9653 | (struct pi_desc *)kmap(vmx->nested.pi_desc_page); | 9614 | (struct pi_desc *)kmap(vmx->nested.pi_desc_page); |
9654 | if (!vmx->nested.pi_desc) { | 9615 | if (!vmx->nested.pi_desc) { |
9655 | nested_release_page_clean(vmx->nested.pi_desc_page); | 9616 | nested_release_page_clean(vmx->nested.pi_desc_page); |
9656 | return false; | 9617 | return; |
9657 | } | 9618 | } |
9658 | vmx->nested.pi_desc = | 9619 | vmx->nested.pi_desc = |
9659 | (struct pi_desc *)((void *)vmx->nested.pi_desc + | 9620 | (struct pi_desc *)((void *)vmx->nested.pi_desc + |
9660 | (unsigned long)(vmcs12->posted_intr_desc_addr & | 9621 | (unsigned long)(vmcs12->posted_intr_desc_addr & |
9661 | (PAGE_SIZE - 1))); | 9622 | (PAGE_SIZE - 1))); |
9623 | vmcs_write64(POSTED_INTR_DESC_ADDR, | ||
9624 | page_to_phys(vmx->nested.pi_desc_page) + | ||
9625 | (unsigned long)(vmcs12->posted_intr_desc_addr & | ||
9626 | (PAGE_SIZE - 1))); | ||
9662 | } | 9627 | } |
9663 | 9628 | if (cpu_has_vmx_msr_bitmap() && | |
9664 | return true; | 9629 | nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS) && |
9630 | nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) | ||
9631 | ; | ||
9632 | else | ||
9633 | vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, | ||
9634 | CPU_BASED_USE_MSR_BITMAPS); | ||
9665 | } | 9635 | } |
9666 | 9636 | ||
9667 | static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) | 9637 | static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) |
@@ -9730,11 +9700,6 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, | |||
9730 | return false; | 9700 | return false; |
9731 | } | 9701 | } |
9732 | msr_bitmap_l1 = (unsigned long *)kmap(page); | 9702 | msr_bitmap_l1 = (unsigned long *)kmap(page); |
9733 | if (!msr_bitmap_l1) { | ||
9734 | nested_release_page_clean(page); | ||
9735 | WARN_ON(1); | ||
9736 | return false; | ||
9737 | } | ||
9738 | 9703 | ||
9739 | memset(msr_bitmap_l0, 0xff, PAGE_SIZE); | 9704 | memset(msr_bitmap_l0, 0xff, PAGE_SIZE); |
9740 | 9705 | ||
@@ -9982,7 +9947,7 @@ static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val) | |||
9982 | * is assigned to entry_failure_code on failure. | 9947 | * is assigned to entry_failure_code on failure. |
9983 | */ | 9948 | */ |
9984 | static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept, | 9949 | static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept, |
9985 | unsigned long *entry_failure_code) | 9950 | u32 *entry_failure_code) |
9986 | { | 9951 | { |
9987 | if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) { | 9952 | if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) { |
9988 | if (!nested_cr3_valid(vcpu, cr3)) { | 9953 | if (!nested_cr3_valid(vcpu, cr3)) { |
@@ -10022,7 +9987,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne | |||
10022 | * is assigned to entry_failure_code on failure. | 9987 | * is assigned to entry_failure_code on failure. |
10023 | */ | 9988 | */ |
10024 | static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | 9989 | static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, |
10025 | unsigned long *entry_failure_code) | 9990 | bool from_vmentry, u32 *entry_failure_code) |
10026 | { | 9991 | { |
10027 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 9992 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
10028 | u32 exec_control; | 9993 | u32 exec_control; |
@@ -10065,21 +10030,26 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
10065 | vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base); | 10030 | vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base); |
10066 | vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base); | 10031 | vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base); |
10067 | 10032 | ||
10068 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) { | 10033 | if (from_vmentry && |
10034 | (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) { | ||
10069 | kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); | 10035 | kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); |
10070 | vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); | 10036 | vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); |
10071 | } else { | 10037 | } else { |
10072 | kvm_set_dr(vcpu, 7, vcpu->arch.dr7); | 10038 | kvm_set_dr(vcpu, 7, vcpu->arch.dr7); |
10073 | vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl); | 10039 | vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl); |
10074 | } | 10040 | } |
10075 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 10041 | if (from_vmentry) { |
10076 | vmcs12->vm_entry_intr_info_field); | 10042 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, |
10077 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, | 10043 | vmcs12->vm_entry_intr_info_field); |
10078 | vmcs12->vm_entry_exception_error_code); | 10044 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, |
10079 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, | 10045 | vmcs12->vm_entry_exception_error_code); |
10080 | vmcs12->vm_entry_instruction_len); | 10046 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, |
10081 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, | 10047 | vmcs12->vm_entry_instruction_len); |
10082 | vmcs12->guest_interruptibility_info); | 10048 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, |
10049 | vmcs12->guest_interruptibility_info); | ||
10050 | } else { | ||
10051 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); | ||
10052 | } | ||
10083 | vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); | 10053 | vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); |
10084 | vmx_set_rflags(vcpu, vmcs12->guest_rflags); | 10054 | vmx_set_rflags(vcpu, vmcs12->guest_rflags); |
10085 | vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, | 10055 | vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, |
@@ -10108,12 +10078,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
10108 | vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; | 10078 | vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; |
10109 | vmx->nested.pi_pending = false; | 10079 | vmx->nested.pi_pending = false; |
10110 | vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); | 10080 | vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); |
10111 | vmcs_write64(POSTED_INTR_DESC_ADDR, | 10081 | } else { |
10112 | page_to_phys(vmx->nested.pi_desc_page) + | ||
10113 | (unsigned long)(vmcs12->posted_intr_desc_addr & | ||
10114 | (PAGE_SIZE - 1))); | ||
10115 | } else | ||
10116 | exec_control &= ~PIN_BASED_POSTED_INTR; | 10082 | exec_control &= ~PIN_BASED_POSTED_INTR; |
10083 | } | ||
10117 | 10084 | ||
10118 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control); | 10085 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control); |
10119 | 10086 | ||
@@ -10158,26 +10125,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
10158 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) | 10125 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) |
10159 | exec_control |= vmcs12->secondary_vm_exec_control; | 10126 | exec_control |= vmcs12->secondary_vm_exec_control; |
10160 | 10127 | ||
10161 | if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) { | ||
10162 | /* | ||
10163 | * If translation failed, no matter: This feature asks | ||
10164 | * to exit when accessing the given address, and if it | ||
10165 | * can never be accessed, this feature won't do | ||
10166 | * anything anyway. | ||
10167 | */ | ||
10168 | if (!vmx->nested.apic_access_page) | ||
10169 | exec_control &= | ||
10170 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | ||
10171 | else | ||
10172 | vmcs_write64(APIC_ACCESS_ADDR, | ||
10173 | page_to_phys(vmx->nested.apic_access_page)); | ||
10174 | } else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) && | ||
10175 | cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { | ||
10176 | exec_control |= | ||
10177 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | ||
10178 | kvm_vcpu_reload_apic_access_page(vcpu); | ||
10179 | } | ||
10180 | |||
10181 | if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { | 10128 | if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { |
10182 | vmcs_write64(EOI_EXIT_BITMAP0, | 10129 | vmcs_write64(EOI_EXIT_BITMAP0, |
10183 | vmcs12->eoi_exit_bitmap0); | 10130 | vmcs12->eoi_exit_bitmap0); |
@@ -10192,6 +10139,15 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
10192 | } | 10139 | } |
10193 | 10140 | ||
10194 | nested_ept_enabled = (exec_control & SECONDARY_EXEC_ENABLE_EPT) != 0; | 10141 | nested_ept_enabled = (exec_control & SECONDARY_EXEC_ENABLE_EPT) != 0; |
10142 | |||
10143 | /* | ||
10144 | * Write an illegal value to APIC_ACCESS_ADDR. Later, | ||
10145 | * nested_get_vmcs12_pages will either fix it up or | ||
10146 | * remove the VM execution control. | ||
10147 | */ | ||
10148 | if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) | ||
10149 | vmcs_write64(APIC_ACCESS_ADDR, -1ull); | ||
10150 | |||
10195 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 10151 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); |
10196 | } | 10152 | } |
10197 | 10153 | ||
@@ -10228,19 +10184,16 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
10228 | exec_control &= ~CPU_BASED_TPR_SHADOW; | 10184 | exec_control &= ~CPU_BASED_TPR_SHADOW; |
10229 | exec_control |= vmcs12->cpu_based_vm_exec_control; | 10185 | exec_control |= vmcs12->cpu_based_vm_exec_control; |
10230 | 10186 | ||
10187 | /* | ||
10188 | * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR. Later, if | ||
10189 | * nested_get_vmcs12_pages can't fix it up, the illegal value | ||
10190 | * will result in a VM entry failure. | ||
10191 | */ | ||
10231 | if (exec_control & CPU_BASED_TPR_SHADOW) { | 10192 | if (exec_control & CPU_BASED_TPR_SHADOW) { |
10232 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, | 10193 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull); |
10233 | page_to_phys(vmx->nested.virtual_apic_page)); | ||
10234 | vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); | 10194 | vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); |
10235 | } | 10195 | } |
10236 | 10196 | ||
10237 | if (cpu_has_vmx_msr_bitmap() && | ||
10238 | exec_control & CPU_BASED_USE_MSR_BITMAPS && | ||
10239 | nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) | ||
10240 | ; /* MSR_BITMAP will be set by following vmx_set_efer. */ | ||
10241 | else | ||
10242 | exec_control &= ~CPU_BASED_USE_MSR_BITMAPS; | ||
10243 | |||
10244 | /* | 10197 | /* |
10245 | * Merging of IO bitmap not currently supported. | 10198 | * Merging of IO bitmap not currently supported. |
10246 | * Rather, exit every time. | 10199 | * Rather, exit every time. |
@@ -10272,16 +10225,18 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
10272 | ~VM_ENTRY_IA32E_MODE) | | 10225 | ~VM_ENTRY_IA32E_MODE) | |
10273 | (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); | 10226 | (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); |
10274 | 10227 | ||
10275 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) { | 10228 | if (from_vmentry && |
10229 | (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) { | ||
10276 | vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat); | 10230 | vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat); |
10277 | vcpu->arch.pat = vmcs12->guest_ia32_pat; | 10231 | vcpu->arch.pat = vmcs12->guest_ia32_pat; |
10278 | } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) | 10232 | } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { |
10279 | vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); | 10233 | vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); |
10280 | 10234 | } | |
10281 | 10235 | ||
10282 | set_cr4_guest_host_mask(vmx); | 10236 | set_cr4_guest_host_mask(vmx); |
10283 | 10237 | ||
10284 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) | 10238 | if (from_vmentry && |
10239 | vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) | ||
10285 | vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); | 10240 | vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); |
10286 | 10241 | ||
10287 | if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) | 10242 | if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) |
@@ -10320,8 +10275,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
10320 | } | 10275 | } |
10321 | 10276 | ||
10322 | /* | 10277 | /* |
10323 | * This sets GUEST_CR0 to vmcs12->guest_cr0, with possibly a modified | 10278 | * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those |
10324 | * TS bit (for lazy fpu) and bits which we consider mandatory enabled. | 10279 | * bits which we consider mandatory enabled. |
10325 | * The CR0_READ_SHADOW is what L2 should have expected to read given | 10280 | * The CR0_READ_SHADOW is what L2 should have expected to read given |
10326 | * the specifications by L1; It's not enough to take | 10281 | * the specifications by L1; It's not enough to take |
10327 | * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we we | 10282 | * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we we |
@@ -10333,7 +10288,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
10333 | vmx_set_cr4(vcpu, vmcs12->guest_cr4); | 10288 | vmx_set_cr4(vcpu, vmcs12->guest_cr4); |
10334 | vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12)); | 10289 | vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12)); |
10335 | 10290 | ||
10336 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) | 10291 | if (from_vmentry && |
10292 | (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) | ||
10337 | vcpu->arch.efer = vmcs12->guest_ia32_efer; | 10293 | vcpu->arch.efer = vmcs12->guest_ia32_efer; |
10338 | else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) | 10294 | else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) |
10339 | vcpu->arch.efer |= (EFER_LMA | EFER_LME); | 10295 | vcpu->arch.efer |= (EFER_LMA | EFER_LME); |
@@ -10367,73 +10323,22 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
10367 | return 0; | 10323 | return 0; |
10368 | } | 10324 | } |
10369 | 10325 | ||
10370 | /* | 10326 | static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) |
10371 | * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1 | ||
10372 | * for running an L2 nested guest. | ||
10373 | */ | ||
10374 | static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | ||
10375 | { | 10327 | { |
10376 | struct vmcs12 *vmcs12; | ||
10377 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 10328 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
10378 | int cpu; | ||
10379 | struct loaded_vmcs *vmcs02; | ||
10380 | bool ia32e; | ||
10381 | u32 msr_entry_idx; | ||
10382 | unsigned long exit_qualification; | ||
10383 | |||
10384 | if (!nested_vmx_check_permission(vcpu)) | ||
10385 | return 1; | ||
10386 | |||
10387 | if (!nested_vmx_check_vmcs12(vcpu)) | ||
10388 | goto out; | ||
10389 | |||
10390 | vmcs12 = get_vmcs12(vcpu); | ||
10391 | |||
10392 | if (enable_shadow_vmcs) | ||
10393 | copy_shadow_to_vmcs12(vmx); | ||
10394 | |||
10395 | /* | ||
10396 | * The nested entry process starts with enforcing various prerequisites | ||
10397 | * on vmcs12 as required by the Intel SDM, and act appropriately when | ||
10398 | * they fail: As the SDM explains, some conditions should cause the | ||
10399 | * instruction to fail, while others will cause the instruction to seem | ||
10400 | * to succeed, but return an EXIT_REASON_INVALID_STATE. | ||
10401 | * To speed up the normal (success) code path, we should avoid checking | ||
10402 | * for misconfigurations which will anyway be caught by the processor | ||
10403 | * when using the merged vmcs02. | ||
10404 | */ | ||
10405 | if (vmcs12->launch_state == launch) { | ||
10406 | nested_vmx_failValid(vcpu, | ||
10407 | launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS | ||
10408 | : VMXERR_VMRESUME_NONLAUNCHED_VMCS); | ||
10409 | goto out; | ||
10410 | } | ||
10411 | 10329 | ||
10412 | if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && | 10330 | if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && |
10413 | vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) { | 10331 | vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) |
10414 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 10332 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; |
10415 | goto out; | ||
10416 | } | ||
10417 | 10333 | ||
10418 | if (!nested_get_vmcs12_pages(vcpu, vmcs12)) { | 10334 | if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) |
10419 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 10335 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; |
10420 | goto out; | ||
10421 | } | ||
10422 | 10336 | ||
10423 | if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) { | 10337 | if (nested_vmx_check_apicv_controls(vcpu, vmcs12)) |
10424 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 10338 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; |
10425 | goto out; | ||
10426 | } | ||
10427 | 10339 | ||
10428 | if (nested_vmx_check_apicv_controls(vcpu, vmcs12)) { | 10340 | if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12)) |
10429 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 10341 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; |
10430 | goto out; | ||
10431 | } | ||
10432 | |||
10433 | if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12)) { | ||
10434 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | ||
10435 | goto out; | ||
10436 | } | ||
10437 | 10342 | ||
10438 | if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, | 10343 | if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, |
10439 | vmx->nested.nested_vmx_procbased_ctls_low, | 10344 | vmx->nested.nested_vmx_procbased_ctls_low, |
@@ -10450,28 +10355,30 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
10450 | !vmx_control_verify(vmcs12->vm_entry_controls, | 10355 | !vmx_control_verify(vmcs12->vm_entry_controls, |
10451 | vmx->nested.nested_vmx_entry_ctls_low, | 10356 | vmx->nested.nested_vmx_entry_ctls_low, |
10452 | vmx->nested.nested_vmx_entry_ctls_high)) | 10357 | vmx->nested.nested_vmx_entry_ctls_high)) |
10453 | { | 10358 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; |
10454 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | ||
10455 | goto out; | ||
10456 | } | ||
10457 | 10359 | ||
10458 | if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) || | 10360 | if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) || |
10459 | !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) || | 10361 | !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) || |
10460 | !nested_cr3_valid(vcpu, vmcs12->host_cr3)) { | 10362 | !nested_cr3_valid(vcpu, vmcs12->host_cr3)) |
10461 | nested_vmx_failValid(vcpu, | 10363 | return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD; |
10462 | VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); | 10364 | |
10463 | goto out; | 10365 | return 0; |
10464 | } | 10366 | } |
10367 | |||
10368 | static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | ||
10369 | u32 *exit_qual) | ||
10370 | { | ||
10371 | bool ia32e; | ||
10372 | |||
10373 | *exit_qual = ENTRY_FAIL_DEFAULT; | ||
10465 | 10374 | ||
10466 | if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) || | 10375 | if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) || |
10467 | !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)) { | 10376 | !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)) |
10468 | nested_vmx_entry_failure(vcpu, vmcs12, | ||
10469 | EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); | ||
10470 | return 1; | 10377 | return 1; |
10471 | } | 10378 | |
10472 | if (vmcs12->vmcs_link_pointer != -1ull) { | 10379 | if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_SHADOW_VMCS) && |
10473 | nested_vmx_entry_failure(vcpu, vmcs12, | 10380 | vmcs12->vmcs_link_pointer != -1ull) { |
10474 | EXIT_REASON_INVALID_STATE, ENTRY_FAIL_VMCS_LINK_PTR); | 10381 | *exit_qual = ENTRY_FAIL_VMCS_LINK_PTR; |
10475 | return 1; | 10382 | return 1; |
10476 | } | 10383 | } |
10477 | 10384 | ||
@@ -10484,16 +10391,14 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
10484 | * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to | 10391 | * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to |
10485 | * CR0.PG) is 1. | 10392 | * CR0.PG) is 1. |
10486 | */ | 10393 | */ |
10487 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) { | 10394 | if (to_vmx(vcpu)->nested.nested_run_pending && |
10395 | (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) { | ||
10488 | ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0; | 10396 | ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0; |
10489 | if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) || | 10397 | if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) || |
10490 | ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) || | 10398 | ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) || |
10491 | ((vmcs12->guest_cr0 & X86_CR0_PG) && | 10399 | ((vmcs12->guest_cr0 & X86_CR0_PG) && |
10492 | ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) { | 10400 | ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) |
10493 | nested_vmx_entry_failure(vcpu, vmcs12, | ||
10494 | EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); | ||
10495 | return 1; | 10401 | return 1; |
10496 | } | ||
10497 | } | 10402 | } |
10498 | 10403 | ||
10499 | /* | 10404 | /* |
@@ -10507,28 +10412,26 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
10507 | VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0; | 10412 | VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0; |
10508 | if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) || | 10413 | if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) || |
10509 | ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) || | 10414 | ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) || |
10510 | ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) { | 10415 | ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) |
10511 | nested_vmx_entry_failure(vcpu, vmcs12, | ||
10512 | EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); | ||
10513 | return 1; | 10416 | return 1; |
10514 | } | ||
10515 | } | 10417 | } |
10516 | 10418 | ||
10517 | /* | 10419 | return 0; |
10518 | * We're finally done with prerequisite checking, and can start with | 10420 | } |
10519 | * the nested entry. | 10421 | |
10520 | */ | 10422 | static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) |
10423 | { | ||
10424 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
10425 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
10426 | struct loaded_vmcs *vmcs02; | ||
10427 | int cpu; | ||
10428 | u32 msr_entry_idx; | ||
10429 | u32 exit_qual; | ||
10521 | 10430 | ||
10522 | vmcs02 = nested_get_current_vmcs02(vmx); | 10431 | vmcs02 = nested_get_current_vmcs02(vmx); |
10523 | if (!vmcs02) | 10432 | if (!vmcs02) |
10524 | return -ENOMEM; | 10433 | return -ENOMEM; |
10525 | 10434 | ||
10526 | /* | ||
10527 | * After this point, the trap flag no longer triggers a singlestep trap | ||
10528 | * on the vm entry instructions. Don't call | ||
10529 | * kvm_skip_emulated_instruction. | ||
10530 | */ | ||
10531 | skip_emulated_instruction(vcpu); | ||
10532 | enter_guest_mode(vcpu); | 10435 | enter_guest_mode(vcpu); |
10533 | 10436 | ||
10534 | if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) | 10437 | if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) |
@@ -10543,14 +10446,16 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
10543 | 10446 | ||
10544 | vmx_segment_cache_clear(vmx); | 10447 | vmx_segment_cache_clear(vmx); |
10545 | 10448 | ||
10546 | if (prepare_vmcs02(vcpu, vmcs12, &exit_qualification)) { | 10449 | if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { |
10547 | leave_guest_mode(vcpu); | 10450 | leave_guest_mode(vcpu); |
10548 | vmx_load_vmcs01(vcpu); | 10451 | vmx_load_vmcs01(vcpu); |
10549 | nested_vmx_entry_failure(vcpu, vmcs12, | 10452 | nested_vmx_entry_failure(vcpu, vmcs12, |
10550 | EXIT_REASON_INVALID_STATE, exit_qualification); | 10453 | EXIT_REASON_INVALID_STATE, exit_qual); |
10551 | return 1; | 10454 | return 1; |
10552 | } | 10455 | } |
10553 | 10456 | ||
10457 | nested_get_vmcs12_pages(vcpu, vmcs12); | ||
10458 | |||
10554 | msr_entry_idx = nested_vmx_load_msr(vcpu, | 10459 | msr_entry_idx = nested_vmx_load_msr(vcpu, |
10555 | vmcs12->vm_entry_msr_load_addr, | 10460 | vmcs12->vm_entry_msr_load_addr, |
10556 | vmcs12->vm_entry_msr_load_count); | 10461 | vmcs12->vm_entry_msr_load_count); |
@@ -10564,17 +10469,90 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
10564 | 10469 | ||
10565 | vmcs12->launch_state = 1; | 10470 | vmcs12->launch_state = 1; |
10566 | 10471 | ||
10567 | if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) | ||
10568 | return kvm_vcpu_halt(vcpu); | ||
10569 | |||
10570 | vmx->nested.nested_run_pending = 1; | ||
10571 | |||
10572 | /* | 10472 | /* |
10573 | * Note no nested_vmx_succeed or nested_vmx_fail here. At this point | 10473 | * Note no nested_vmx_succeed or nested_vmx_fail here. At this point |
10574 | * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet | 10474 | * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet |
10575 | * returned as far as L1 is concerned. It will only return (and set | 10475 | * returned as far as L1 is concerned. It will only return (and set |
10576 | * the success flag) when L2 exits (see nested_vmx_vmexit()). | 10476 | * the success flag) when L2 exits (see nested_vmx_vmexit()). |
10577 | */ | 10477 | */ |
10478 | return 0; | ||
10479 | } | ||
10480 | |||
10481 | /* | ||
10482 | * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1 | ||
10483 | * for running an L2 nested guest. | ||
10484 | */ | ||
10485 | static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | ||
10486 | { | ||
10487 | struct vmcs12 *vmcs12; | ||
10488 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
10489 | u32 exit_qual; | ||
10490 | int ret; | ||
10491 | |||
10492 | if (!nested_vmx_check_permission(vcpu)) | ||
10493 | return 1; | ||
10494 | |||
10495 | if (!nested_vmx_check_vmcs12(vcpu)) | ||
10496 | goto out; | ||
10497 | |||
10498 | vmcs12 = get_vmcs12(vcpu); | ||
10499 | |||
10500 | if (enable_shadow_vmcs) | ||
10501 | copy_shadow_to_vmcs12(vmx); | ||
10502 | |||
10503 | /* | ||
10504 | * The nested entry process starts with enforcing various prerequisites | ||
10505 | * on vmcs12 as required by the Intel SDM, and act appropriately when | ||
10506 | * they fail: As the SDM explains, some conditions should cause the | ||
10507 | * instruction to fail, while others will cause the instruction to seem | ||
10508 | * to succeed, but return an EXIT_REASON_INVALID_STATE. | ||
10509 | * To speed up the normal (success) code path, we should avoid checking | ||
10510 | * for misconfigurations which will anyway be caught by the processor | ||
10511 | * when using the merged vmcs02. | ||
10512 | */ | ||
10513 | if (vmcs12->launch_state == launch) { | ||
10514 | nested_vmx_failValid(vcpu, | ||
10515 | launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS | ||
10516 | : VMXERR_VMRESUME_NONLAUNCHED_VMCS); | ||
10517 | goto out; | ||
10518 | } | ||
10519 | |||
10520 | ret = check_vmentry_prereqs(vcpu, vmcs12); | ||
10521 | if (ret) { | ||
10522 | nested_vmx_failValid(vcpu, ret); | ||
10523 | goto out; | ||
10524 | } | ||
10525 | |||
10526 | /* | ||
10527 | * After this point, the trap flag no longer triggers a singlestep trap | ||
10528 | * on the vm entry instructions; don't call kvm_skip_emulated_instruction. | ||
10529 | * This is not 100% correct; for performance reasons, we delegate most | ||
10530 | * of the checks on host state to the processor. If those fail, | ||
10531 | * the singlestep trap is missed. | ||
10532 | */ | ||
10533 | skip_emulated_instruction(vcpu); | ||
10534 | |||
10535 | ret = check_vmentry_postreqs(vcpu, vmcs12, &exit_qual); | ||
10536 | if (ret) { | ||
10537 | nested_vmx_entry_failure(vcpu, vmcs12, | ||
10538 | EXIT_REASON_INVALID_STATE, exit_qual); | ||
10539 | return 1; | ||
10540 | } | ||
10541 | |||
10542 | /* | ||
10543 | * We're finally done with prerequisite checking, and can start with | ||
10544 | * the nested entry. | ||
10545 | */ | ||
10546 | |||
10547 | ret = enter_vmx_non_root_mode(vcpu, true); | ||
10548 | if (ret) | ||
10549 | return ret; | ||
10550 | |||
10551 | if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) | ||
10552 | return kvm_vcpu_halt(vcpu); | ||
10553 | |||
10554 | vmx->nested.nested_run_pending = 1; | ||
10555 | |||
10578 | return 1; | 10556 | return 1; |
10579 | 10557 | ||
10580 | out: | 10558 | out: |
@@ -10696,7 +10674,8 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) | |||
10696 | return 0; | 10674 | return 0; |
10697 | } | 10675 | } |
10698 | 10676 | ||
10699 | return vmx_complete_nested_posted_interrupt(vcpu); | 10677 | vmx_complete_nested_posted_interrupt(vcpu); |
10678 | return 0; | ||
10700 | } | 10679 | } |
10701 | 10680 | ||
10702 | static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) | 10681 | static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) |
@@ -10714,21 +10693,13 @@ static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) | |||
10714 | } | 10693 | } |
10715 | 10694 | ||
10716 | /* | 10695 | /* |
10717 | * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits | 10696 | * Update the guest state fields of vmcs12 to reflect changes that |
10718 | * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), | 10697 | * occurred while L2 was running. (The "IA-32e mode guest" bit of the |
10719 | * and this function updates it to reflect the changes to the guest state while | 10698 | * VM-entry controls is also updated, since this is really a guest |
10720 | * L2 was running (and perhaps made some exits which were handled directly by L0 | 10699 | * state bit.) |
10721 | * without going back to L1), and to reflect the exit reason. | ||
10722 | * Note that we do not have to copy here all VMCS fields, just those that | ||
10723 | * could have changed by the L2 guest or the exit - i.e., the guest-state and | ||
10724 | * exit-information fields only. Other fields are modified by L1 with VMWRITE, | ||
10725 | * which already writes to vmcs12 directly. | ||
10726 | */ | 10700 | */ |
10727 | static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | 10701 | static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) |
10728 | u32 exit_reason, u32 exit_intr_info, | ||
10729 | unsigned long exit_qualification) | ||
10730 | { | 10702 | { |
10731 | /* update guest state fields: */ | ||
10732 | vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); | 10703 | vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); |
10733 | vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); | 10704 | vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); |
10734 | 10705 | ||
@@ -10834,6 +10805,25 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
10834 | vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); | 10805 | vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); |
10835 | if (nested_cpu_has_xsaves(vmcs12)) | 10806 | if (nested_cpu_has_xsaves(vmcs12)) |
10836 | vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP); | 10807 | vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP); |
10808 | } | ||
10809 | |||
10810 | /* | ||
10811 | * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits | ||
10812 | * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), | ||
10813 | * and this function updates it to reflect the changes to the guest state while | ||
10814 | * L2 was running (and perhaps made some exits which were handled directly by L0 | ||
10815 | * without going back to L1), and to reflect the exit reason. | ||
10816 | * Note that we do not have to copy here all VMCS fields, just those that | ||
10817 | * could have changed by the L2 guest or the exit - i.e., the guest-state and | ||
10818 | * exit-information fields only. Other fields are modified by L1 with VMWRITE, | ||
10819 | * which already writes to vmcs12 directly. | ||
10820 | */ | ||
10821 | static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | ||
10822 | u32 exit_reason, u32 exit_intr_info, | ||
10823 | unsigned long exit_qualification) | ||
10824 | { | ||
10825 | /* update guest state fields: */ | ||
10826 | sync_vmcs12(vcpu, vmcs12); | ||
10837 | 10827 | ||
10838 | /* update exit information fields: */ | 10828 | /* update exit information fields: */ |
10839 | 10829 | ||
@@ -10884,7 +10874,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | |||
10884 | struct vmcs12 *vmcs12) | 10874 | struct vmcs12 *vmcs12) |
10885 | { | 10875 | { |
10886 | struct kvm_segment seg; | 10876 | struct kvm_segment seg; |
10887 | unsigned long entry_failure_code; | 10877 | u32 entry_failure_code; |
10888 | 10878 | ||
10889 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) | 10879 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) |
10890 | vcpu->arch.efer = vmcs12->host_ia32_efer; | 10880 | vcpu->arch.efer = vmcs12->host_ia32_efer; |
@@ -10899,24 +10889,15 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | |||
10899 | vmx_set_rflags(vcpu, X86_EFLAGS_FIXED); | 10889 | vmx_set_rflags(vcpu, X86_EFLAGS_FIXED); |
10900 | /* | 10890 | /* |
10901 | * Note that calling vmx_set_cr0 is important, even if cr0 hasn't | 10891 | * Note that calling vmx_set_cr0 is important, even if cr0 hasn't |
10902 | * actually changed, because it depends on the current state of | 10892 | * actually changed, because vmx_set_cr0 refers to efer set above. |
10903 | * fpu_active (which may have changed). | 10893 | * |
10904 | * Note that vmx_set_cr0 refers to efer set above. | 10894 | * CR0_GUEST_HOST_MASK is already set in the original vmcs01 |
10895 | * (KVM doesn't change it); | ||
10905 | */ | 10896 | */ |
10897 | vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS; | ||
10906 | vmx_set_cr0(vcpu, vmcs12->host_cr0); | 10898 | vmx_set_cr0(vcpu, vmcs12->host_cr0); |
10907 | /* | ||
10908 | * If we did fpu_activate()/fpu_deactivate() during L2's run, we need | ||
10909 | * to apply the same changes to L1's vmcs. We just set cr0 correctly, | ||
10910 | * but we also need to update cr0_guest_host_mask and exception_bitmap. | ||
10911 | */ | ||
10912 | update_exception_bitmap(vcpu); | ||
10913 | vcpu->arch.cr0_guest_owned_bits = (vcpu->fpu_active ? X86_CR0_TS : 0); | ||
10914 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); | ||
10915 | 10899 | ||
10916 | /* | 10900 | /* Same as above - no reason to call set_cr4_guest_host_mask(). */ |
10917 | * Note that CR4_GUEST_HOST_MASK is already set in the original vmcs01 | ||
10918 | * (KVM doesn't change it)- no reason to call set_cr4_guest_host_mask(); | ||
10919 | */ | ||
10920 | vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); | 10901 | vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); |
10921 | kvm_set_cr4(vcpu, vmcs12->host_cr4); | 10902 | kvm_set_cr4(vcpu, vmcs12->host_cr4); |
10922 | 10903 | ||
@@ -11545,9 +11526,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { | |||
11545 | 11526 | ||
11546 | .get_pkru = vmx_get_pkru, | 11527 | .get_pkru = vmx_get_pkru, |
11547 | 11528 | ||
11548 | .fpu_activate = vmx_fpu_activate, | ||
11549 | .fpu_deactivate = vmx_fpu_deactivate, | ||
11550 | |||
11551 | .tlb_flush = vmx_flush_tlb, | 11529 | .tlb_flush = vmx_flush_tlb, |
11552 | 11530 | ||
11553 | .run = vmx_vcpu_run, | 11531 | .run = vmx_vcpu_run, |
@@ -11572,6 +11550,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { | |||
11572 | .get_enable_apicv = vmx_get_enable_apicv, | 11550 | .get_enable_apicv = vmx_get_enable_apicv, |
11573 | .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, | 11551 | .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, |
11574 | .load_eoi_exitmap = vmx_load_eoi_exitmap, | 11552 | .load_eoi_exitmap = vmx_load_eoi_exitmap, |
11553 | .apicv_post_state_restore = vmx_apicv_post_state_restore, | ||
11575 | .hwapic_irr_update = vmx_hwapic_irr_update, | 11554 | .hwapic_irr_update = vmx_hwapic_irr_update, |
11576 | .hwapic_isr_update = vmx_hwapic_isr_update, | 11555 | .hwapic_isr_update = vmx_hwapic_isr_update, |
11577 | .sync_pir_to_irr = vmx_sync_pir_to_irr, | 11556 | .sync_pir_to_irr = vmx_sync_pir_to_irr, |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e52c9088660f..b2a4b11274b0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -180,6 +180,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
180 | { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, | 180 | { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, |
181 | { "irq_injections", VCPU_STAT(irq_injections) }, | 181 | { "irq_injections", VCPU_STAT(irq_injections) }, |
182 | { "nmi_injections", VCPU_STAT(nmi_injections) }, | 182 | { "nmi_injections", VCPU_STAT(nmi_injections) }, |
183 | { "req_event", VCPU_STAT(req_event) }, | ||
183 | { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, | 184 | { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, |
184 | { "mmu_pte_write", VM_STAT(mmu_pte_write) }, | 185 | { "mmu_pte_write", VM_STAT(mmu_pte_write) }, |
185 | { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, | 186 | { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, |
@@ -190,6 +191,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
190 | { "mmu_unsync", VM_STAT(mmu_unsync) }, | 191 | { "mmu_unsync", VM_STAT(mmu_unsync) }, |
191 | { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, | 192 | { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, |
192 | { "largepages", VM_STAT(lpages) }, | 193 | { "largepages", VM_STAT(lpages) }, |
194 | { "max_mmu_page_hash_collisions", | ||
195 | VM_STAT(max_mmu_page_hash_collisions) }, | ||
193 | { NULL } | 196 | { NULL } |
194 | }; | 197 | }; |
195 | 198 | ||
@@ -1139,6 +1142,7 @@ struct pvclock_gtod_data { | |||
1139 | 1142 | ||
1140 | u64 boot_ns; | 1143 | u64 boot_ns; |
1141 | u64 nsec_base; | 1144 | u64 nsec_base; |
1145 | u64 wall_time_sec; | ||
1142 | }; | 1146 | }; |
1143 | 1147 | ||
1144 | static struct pvclock_gtod_data pvclock_gtod_data; | 1148 | static struct pvclock_gtod_data pvclock_gtod_data; |
@@ -1162,6 +1166,8 @@ static void update_pvclock_gtod(struct timekeeper *tk) | |||
1162 | vdata->boot_ns = boot_ns; | 1166 | vdata->boot_ns = boot_ns; |
1163 | vdata->nsec_base = tk->tkr_mono.xtime_nsec; | 1167 | vdata->nsec_base = tk->tkr_mono.xtime_nsec; |
1164 | 1168 | ||
1169 | vdata->wall_time_sec = tk->xtime_sec; | ||
1170 | |||
1165 | write_seqcount_end(&vdata->seq); | 1171 | write_seqcount_end(&vdata->seq); |
1166 | } | 1172 | } |
1167 | #endif | 1173 | #endif |
@@ -1623,6 +1629,28 @@ static int do_monotonic_boot(s64 *t, u64 *cycle_now) | |||
1623 | return mode; | 1629 | return mode; |
1624 | } | 1630 | } |
1625 | 1631 | ||
1632 | static int do_realtime(struct timespec *ts, u64 *cycle_now) | ||
1633 | { | ||
1634 | struct pvclock_gtod_data *gtod = &pvclock_gtod_data; | ||
1635 | unsigned long seq; | ||
1636 | int mode; | ||
1637 | u64 ns; | ||
1638 | |||
1639 | do { | ||
1640 | seq = read_seqcount_begin(>od->seq); | ||
1641 | mode = gtod->clock.vclock_mode; | ||
1642 | ts->tv_sec = gtod->wall_time_sec; | ||
1643 | ns = gtod->nsec_base; | ||
1644 | ns += vgettsc(cycle_now); | ||
1645 | ns >>= gtod->clock.shift; | ||
1646 | } while (unlikely(read_seqcount_retry(>od->seq, seq))); | ||
1647 | |||
1648 | ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); | ||
1649 | ts->tv_nsec = ns; | ||
1650 | |||
1651 | return mode; | ||
1652 | } | ||
1653 | |||
1626 | /* returns true if host is using tsc clocksource */ | 1654 | /* returns true if host is using tsc clocksource */ |
1627 | static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now) | 1655 | static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now) |
1628 | { | 1656 | { |
@@ -1632,6 +1660,17 @@ static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now) | |||
1632 | 1660 | ||
1633 | return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC; | 1661 | return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC; |
1634 | } | 1662 | } |
1663 | |||
1664 | /* returns true if host is using tsc clocksource */ | ||
1665 | static bool kvm_get_walltime_and_clockread(struct timespec *ts, | ||
1666 | u64 *cycle_now) | ||
1667 | { | ||
1668 | /* checked again under seqlock below */ | ||
1669 | if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC) | ||
1670 | return false; | ||
1671 | |||
1672 | return do_realtime(ts, cycle_now) == VCLOCK_TSC; | ||
1673 | } | ||
1635 | #endif | 1674 | #endif |
1636 | 1675 | ||
1637 | /* | 1676 | /* |
@@ -1772,7 +1811,7 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v) | |||
1772 | struct kvm_vcpu_arch *vcpu = &v->arch; | 1811 | struct kvm_vcpu_arch *vcpu = &v->arch; |
1773 | struct pvclock_vcpu_time_info guest_hv_clock; | 1812 | struct pvclock_vcpu_time_info guest_hv_clock; |
1774 | 1813 | ||
1775 | if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time, | 1814 | if (unlikely(kvm_vcpu_read_guest_cached(v, &vcpu->pv_time, |
1776 | &guest_hv_clock, sizeof(guest_hv_clock)))) | 1815 | &guest_hv_clock, sizeof(guest_hv_clock)))) |
1777 | return; | 1816 | return; |
1778 | 1817 | ||
@@ -1793,9 +1832,9 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v) | |||
1793 | BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); | 1832 | BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); |
1794 | 1833 | ||
1795 | vcpu->hv_clock.version = guest_hv_clock.version + 1; | 1834 | vcpu->hv_clock.version = guest_hv_clock.version + 1; |
1796 | kvm_write_guest_cached(v->kvm, &vcpu->pv_time, | 1835 | kvm_vcpu_write_guest_cached(v, &vcpu->pv_time, |
1797 | &vcpu->hv_clock, | 1836 | &vcpu->hv_clock, |
1798 | sizeof(vcpu->hv_clock.version)); | 1837 | sizeof(vcpu->hv_clock.version)); |
1799 | 1838 | ||
1800 | smp_wmb(); | 1839 | smp_wmb(); |
1801 | 1840 | ||
@@ -1809,16 +1848,16 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v) | |||
1809 | 1848 | ||
1810 | trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock); | 1849 | trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock); |
1811 | 1850 | ||
1812 | kvm_write_guest_cached(v->kvm, &vcpu->pv_time, | 1851 | kvm_vcpu_write_guest_cached(v, &vcpu->pv_time, |
1813 | &vcpu->hv_clock, | 1852 | &vcpu->hv_clock, |
1814 | sizeof(vcpu->hv_clock)); | 1853 | sizeof(vcpu->hv_clock)); |
1815 | 1854 | ||
1816 | smp_wmb(); | 1855 | smp_wmb(); |
1817 | 1856 | ||
1818 | vcpu->hv_clock.version++; | 1857 | vcpu->hv_clock.version++; |
1819 | kvm_write_guest_cached(v->kvm, &vcpu->pv_time, | 1858 | kvm_vcpu_write_guest_cached(v, &vcpu->pv_time, |
1820 | &vcpu->hv_clock, | 1859 | &vcpu->hv_clock, |
1821 | sizeof(vcpu->hv_clock.version)); | 1860 | sizeof(vcpu->hv_clock.version)); |
1822 | } | 1861 | } |
1823 | 1862 | ||
1824 | static int kvm_guest_time_update(struct kvm_vcpu *v) | 1863 | static int kvm_guest_time_update(struct kvm_vcpu *v) |
@@ -2051,7 +2090,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) | |||
2051 | return 0; | 2090 | return 0; |
2052 | } | 2091 | } |
2053 | 2092 | ||
2054 | if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa, | 2093 | if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.apf.data, gpa, |
2055 | sizeof(u32))) | 2094 | sizeof(u32))) |
2056 | return 1; | 2095 | return 1; |
2057 | 2096 | ||
@@ -2070,7 +2109,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu) | |||
2070 | if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) | 2109 | if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) |
2071 | return; | 2110 | return; |
2072 | 2111 | ||
2073 | if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, | 2112 | if (unlikely(kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.st.stime, |
2074 | &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) | 2113 | &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) |
2075 | return; | 2114 | return; |
2076 | 2115 | ||
@@ -2081,7 +2120,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu) | |||
2081 | 2120 | ||
2082 | vcpu->arch.st.steal.version += 1; | 2121 | vcpu->arch.st.steal.version += 1; |
2083 | 2122 | ||
2084 | kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, | 2123 | kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime, |
2085 | &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); | 2124 | &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); |
2086 | 2125 | ||
2087 | smp_wmb(); | 2126 | smp_wmb(); |
@@ -2090,14 +2129,14 @@ static void record_steal_time(struct kvm_vcpu *vcpu) | |||
2090 | vcpu->arch.st.last_steal; | 2129 | vcpu->arch.st.last_steal; |
2091 | vcpu->arch.st.last_steal = current->sched_info.run_delay; | 2130 | vcpu->arch.st.last_steal = current->sched_info.run_delay; |
2092 | 2131 | ||
2093 | kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, | 2132 | kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime, |
2094 | &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); | 2133 | &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); |
2095 | 2134 | ||
2096 | smp_wmb(); | 2135 | smp_wmb(); |
2097 | 2136 | ||
2098 | vcpu->arch.st.steal.version += 1; | 2137 | vcpu->arch.st.steal.version += 1; |
2099 | 2138 | ||
2100 | kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, | 2139 | kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime, |
2101 | &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); | 2140 | &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); |
2102 | } | 2141 | } |
2103 | 2142 | ||
@@ -2202,7 +2241,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2202 | if (!(data & 1)) | 2241 | if (!(data & 1)) |
2203 | break; | 2242 | break; |
2204 | 2243 | ||
2205 | if (kvm_gfn_to_hva_cache_init(vcpu->kvm, | 2244 | if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, |
2206 | &vcpu->arch.pv_time, data & ~1ULL, | 2245 | &vcpu->arch.pv_time, data & ~1ULL, |
2207 | sizeof(struct pvclock_vcpu_time_info))) | 2246 | sizeof(struct pvclock_vcpu_time_info))) |
2208 | vcpu->arch.pv_time_enabled = false; | 2247 | vcpu->arch.pv_time_enabled = false; |
@@ -2223,7 +2262,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2223 | if (data & KVM_STEAL_RESERVED_MASK) | 2262 | if (data & KVM_STEAL_RESERVED_MASK) |
2224 | return 1; | 2263 | return 1; |
2225 | 2264 | ||
2226 | if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime, | 2265 | if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.st.stime, |
2227 | data & KVM_STEAL_VALID_BITS, | 2266 | data & KVM_STEAL_VALID_BITS, |
2228 | sizeof(struct kvm_steal_time))) | 2267 | sizeof(struct kvm_steal_time))) |
2229 | return 1; | 2268 | return 1; |
@@ -2633,6 +2672,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
2633 | case KVM_CAP_DISABLE_QUIRKS: | 2672 | case KVM_CAP_DISABLE_QUIRKS: |
2634 | case KVM_CAP_SET_BOOT_CPU_ID: | 2673 | case KVM_CAP_SET_BOOT_CPU_ID: |
2635 | case KVM_CAP_SPLIT_IRQCHIP: | 2674 | case KVM_CAP_SPLIT_IRQCHIP: |
2675 | case KVM_CAP_IMMEDIATE_EXIT: | ||
2636 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT | 2676 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT |
2637 | case KVM_CAP_ASSIGN_DEV_IRQ: | 2677 | case KVM_CAP_ASSIGN_DEV_IRQ: |
2638 | case KVM_CAP_PCI_2_3: | 2678 | case KVM_CAP_PCI_2_3: |
@@ -2836,7 +2876,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) | |||
2836 | 2876 | ||
2837 | vcpu->arch.st.steal.preempted = 1; | 2877 | vcpu->arch.st.steal.preempted = 1; |
2838 | 2878 | ||
2839 | kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime, | 2879 | kvm_vcpu_write_guest_offset_cached(vcpu, &vcpu->arch.st.stime, |
2840 | &vcpu->arch.st.steal.preempted, | 2880 | &vcpu->arch.st.steal.preempted, |
2841 | offsetof(struct kvm_steal_time, preempted), | 2881 | offsetof(struct kvm_steal_time, preempted), |
2842 | sizeof(vcpu->arch.st.steal.preempted)); | 2882 | sizeof(vcpu->arch.st.steal.preempted)); |
@@ -2870,7 +2910,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | |||
2870 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, | 2910 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, |
2871 | struct kvm_lapic_state *s) | 2911 | struct kvm_lapic_state *s) |
2872 | { | 2912 | { |
2873 | if (vcpu->arch.apicv_active) | 2913 | if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active) |
2874 | kvm_x86_ops->sync_pir_to_irr(vcpu); | 2914 | kvm_x86_ops->sync_pir_to_irr(vcpu); |
2875 | 2915 | ||
2876 | return kvm_apic_get_state(vcpu, s); | 2916 | return kvm_apic_get_state(vcpu, s); |
@@ -3897,7 +3937,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, | |||
3897 | goto split_irqchip_unlock; | 3937 | goto split_irqchip_unlock; |
3898 | /* Pairs with irqchip_in_kernel. */ | 3938 | /* Pairs with irqchip_in_kernel. */ |
3899 | smp_wmb(); | 3939 | smp_wmb(); |
3900 | kvm->arch.irqchip_split = true; | 3940 | kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT; |
3901 | kvm->arch.nr_reserved_ioapic_pins = cap->args[0]; | 3941 | kvm->arch.nr_reserved_ioapic_pins = cap->args[0]; |
3902 | r = 0; | 3942 | r = 0; |
3903 | split_irqchip_unlock: | 3943 | split_irqchip_unlock: |
@@ -3960,40 +4000,41 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3960 | r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); | 4000 | r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); |
3961 | break; | 4001 | break; |
3962 | case KVM_CREATE_IRQCHIP: { | 4002 | case KVM_CREATE_IRQCHIP: { |
3963 | struct kvm_pic *vpic; | ||
3964 | |||
3965 | mutex_lock(&kvm->lock); | 4003 | mutex_lock(&kvm->lock); |
4004 | |||
3966 | r = -EEXIST; | 4005 | r = -EEXIST; |
3967 | if (kvm->arch.vpic) | 4006 | if (irqchip_in_kernel(kvm)) |
3968 | goto create_irqchip_unlock; | 4007 | goto create_irqchip_unlock; |
4008 | |||
3969 | r = -EINVAL; | 4009 | r = -EINVAL; |
3970 | if (kvm->created_vcpus) | 4010 | if (kvm->created_vcpus) |
3971 | goto create_irqchip_unlock; | 4011 | goto create_irqchip_unlock; |
3972 | r = -ENOMEM; | 4012 | |
3973 | vpic = kvm_create_pic(kvm); | 4013 | r = kvm_pic_init(kvm); |
3974 | if (vpic) { | 4014 | if (r) |
3975 | r = kvm_ioapic_init(kvm); | 4015 | goto create_irqchip_unlock; |
3976 | if (r) { | 4016 | |
3977 | mutex_lock(&kvm->slots_lock); | 4017 | r = kvm_ioapic_init(kvm); |
3978 | kvm_destroy_pic(vpic); | 4018 | if (r) { |
3979 | mutex_unlock(&kvm->slots_lock); | 4019 | mutex_lock(&kvm->slots_lock); |
3980 | goto create_irqchip_unlock; | 4020 | kvm_pic_destroy(kvm); |
3981 | } | 4021 | mutex_unlock(&kvm->slots_lock); |
3982 | } else | ||
3983 | goto create_irqchip_unlock; | 4022 | goto create_irqchip_unlock; |
4023 | } | ||
4024 | |||
3984 | r = kvm_setup_default_irq_routing(kvm); | 4025 | r = kvm_setup_default_irq_routing(kvm); |
3985 | if (r) { | 4026 | if (r) { |
3986 | mutex_lock(&kvm->slots_lock); | 4027 | mutex_lock(&kvm->slots_lock); |
3987 | mutex_lock(&kvm->irq_lock); | 4028 | mutex_lock(&kvm->irq_lock); |
3988 | kvm_ioapic_destroy(kvm); | 4029 | kvm_ioapic_destroy(kvm); |
3989 | kvm_destroy_pic(vpic); | 4030 | kvm_pic_destroy(kvm); |
3990 | mutex_unlock(&kvm->irq_lock); | 4031 | mutex_unlock(&kvm->irq_lock); |
3991 | mutex_unlock(&kvm->slots_lock); | 4032 | mutex_unlock(&kvm->slots_lock); |
3992 | goto create_irqchip_unlock; | 4033 | goto create_irqchip_unlock; |
3993 | } | 4034 | } |
3994 | /* Write kvm->irq_routing before kvm->arch.vpic. */ | 4035 | /* Write kvm->irq_routing before enabling irqchip_in_kernel. */ |
3995 | smp_wmb(); | 4036 | smp_wmb(); |
3996 | kvm->arch.vpic = vpic; | 4037 | kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL; |
3997 | create_irqchip_unlock: | 4038 | create_irqchip_unlock: |
3998 | mutex_unlock(&kvm->lock); | 4039 | mutex_unlock(&kvm->lock); |
3999 | break; | 4040 | break; |
@@ -4029,7 +4070,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
4029 | } | 4070 | } |
4030 | 4071 | ||
4031 | r = -ENXIO; | 4072 | r = -ENXIO; |
4032 | if (!irqchip_in_kernel(kvm) || irqchip_split(kvm)) | 4073 | if (!irqchip_kernel(kvm)) |
4033 | goto get_irqchip_out; | 4074 | goto get_irqchip_out; |
4034 | r = kvm_vm_ioctl_get_irqchip(kvm, chip); | 4075 | r = kvm_vm_ioctl_get_irqchip(kvm, chip); |
4035 | if (r) | 4076 | if (r) |
@@ -4053,7 +4094,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
4053 | } | 4094 | } |
4054 | 4095 | ||
4055 | r = -ENXIO; | 4096 | r = -ENXIO; |
4056 | if (!irqchip_in_kernel(kvm) || irqchip_split(kvm)) | 4097 | if (!irqchip_kernel(kvm)) |
4057 | goto set_irqchip_out; | 4098 | goto set_irqchip_out; |
4058 | r = kvm_vm_ioctl_set_irqchip(kvm, chip); | 4099 | r = kvm_vm_ioctl_set_irqchip(kvm, chip); |
4059 | if (r) | 4100 | if (r) |
@@ -4462,6 +4503,21 @@ out: | |||
4462 | } | 4503 | } |
4463 | EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system); | 4504 | EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system); |
4464 | 4505 | ||
4506 | static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva, | ||
4507 | gpa_t gpa, bool write) | ||
4508 | { | ||
4509 | /* For APIC access vmexit */ | ||
4510 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | ||
4511 | return 1; | ||
4512 | |||
4513 | if (vcpu_match_mmio_gpa(vcpu, gpa)) { | ||
4514 | trace_vcpu_match_mmio(gva, gpa, write, true); | ||
4515 | return 1; | ||
4516 | } | ||
4517 | |||
4518 | return 0; | ||
4519 | } | ||
4520 | |||
4465 | static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, | 4521 | static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, |
4466 | gpa_t *gpa, struct x86_exception *exception, | 4522 | gpa_t *gpa, struct x86_exception *exception, |
4467 | bool write) | 4523 | bool write) |
@@ -4488,16 +4544,7 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, | |||
4488 | if (*gpa == UNMAPPED_GVA) | 4544 | if (*gpa == UNMAPPED_GVA) |
4489 | return -1; | 4545 | return -1; |
4490 | 4546 | ||
4491 | /* For APIC access vmexit */ | 4547 | return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write); |
4492 | if ((*gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | ||
4493 | return 1; | ||
4494 | |||
4495 | if (vcpu_match_mmio_gpa(vcpu, *gpa)) { | ||
4496 | trace_vcpu_match_mmio(gva, *gpa, write, true); | ||
4497 | return 1; | ||
4498 | } | ||
4499 | |||
4500 | return 0; | ||
4501 | } | 4548 | } |
4502 | 4549 | ||
4503 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | 4550 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, |
@@ -4594,6 +4641,22 @@ static int emulator_read_write_onepage(unsigned long addr, void *val, | |||
4594 | int handled, ret; | 4641 | int handled, ret; |
4595 | bool write = ops->write; | 4642 | bool write = ops->write; |
4596 | struct kvm_mmio_fragment *frag; | 4643 | struct kvm_mmio_fragment *frag; |
4644 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; | ||
4645 | |||
4646 | /* | ||
4647 | * If the exit was due to a NPF we may already have a GPA. | ||
4648 | * If the GPA is present, use it to avoid the GVA to GPA table walk. | ||
4649 | * Note, this cannot be used on string operations since string | ||
4650 | * operation using rep will only have the initial GPA from the NPF | ||
4651 | * occurred. | ||
4652 | */ | ||
4653 | if (vcpu->arch.gpa_available && | ||
4654 | emulator_can_use_gpa(ctxt) && | ||
4655 | vcpu_is_mmio_gpa(vcpu, addr, exception->address, write) && | ||
4656 | (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) { | ||
4657 | gpa = exception->address; | ||
4658 | goto mmio; | ||
4659 | } | ||
4597 | 4660 | ||
4598 | ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write); | 4661 | ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write); |
4599 | 4662 | ||
@@ -5610,6 +5673,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
5610 | } | 5673 | } |
5611 | 5674 | ||
5612 | restart: | 5675 | restart: |
5676 | /* Save the faulting GPA (cr2) in the address field */ | ||
5677 | ctxt->exception.address = cr2; | ||
5678 | |||
5613 | r = x86_emulate_insn(ctxt); | 5679 | r = x86_emulate_insn(ctxt); |
5614 | 5680 | ||
5615 | if (r == EMULATION_INTERCEPTED) | 5681 | if (r == EMULATION_INTERCEPTED) |
@@ -5924,9 +5990,6 @@ static void kvm_set_mmio_spte_mask(void) | |||
5924 | /* Mask the reserved physical address bits. */ | 5990 | /* Mask the reserved physical address bits. */ |
5925 | mask = rsvd_bits(maxphyaddr, 51); | 5991 | mask = rsvd_bits(maxphyaddr, 51); |
5926 | 5992 | ||
5927 | /* Bit 62 is always reserved for 32bit host. */ | ||
5928 | mask |= 0x3ull << 62; | ||
5929 | |||
5930 | /* Set the present bit. */ | 5993 | /* Set the present bit. */ |
5931 | mask |= 1ull; | 5994 | mask |= 1ull; |
5932 | 5995 | ||
@@ -6025,7 +6088,7 @@ int kvm_arch_init(void *opaque) | |||
6025 | 6088 | ||
6026 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, | 6089 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, |
6027 | PT_DIRTY_MASK, PT64_NX_MASK, 0, | 6090 | PT_DIRTY_MASK, PT64_NX_MASK, 0, |
6028 | PT_PRESENT_MASK); | 6091 | PT_PRESENT_MASK, 0); |
6029 | kvm_timer_init(); | 6092 | kvm_timer_init(); |
6030 | 6093 | ||
6031 | perf_register_guest_info_callbacks(&kvm_guest_cbs); | 6094 | perf_register_guest_info_callbacks(&kvm_guest_cbs); |
@@ -6087,6 +6150,35 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) | |||
6087 | } | 6150 | } |
6088 | EXPORT_SYMBOL_GPL(kvm_emulate_halt); | 6151 | EXPORT_SYMBOL_GPL(kvm_emulate_halt); |
6089 | 6152 | ||
6153 | #ifdef CONFIG_X86_64 | ||
6154 | static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, | ||
6155 | unsigned long clock_type) | ||
6156 | { | ||
6157 | struct kvm_clock_pairing clock_pairing; | ||
6158 | struct timespec ts; | ||
6159 | u64 cycle; | ||
6160 | int ret; | ||
6161 | |||
6162 | if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK) | ||
6163 | return -KVM_EOPNOTSUPP; | ||
6164 | |||
6165 | if (kvm_get_walltime_and_clockread(&ts, &cycle) == false) | ||
6166 | return -KVM_EOPNOTSUPP; | ||
6167 | |||
6168 | clock_pairing.sec = ts.tv_sec; | ||
6169 | clock_pairing.nsec = ts.tv_nsec; | ||
6170 | clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle); | ||
6171 | clock_pairing.flags = 0; | ||
6172 | |||
6173 | ret = 0; | ||
6174 | if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing, | ||
6175 | sizeof(struct kvm_clock_pairing))) | ||
6176 | ret = -KVM_EFAULT; | ||
6177 | |||
6178 | return ret; | ||
6179 | } | ||
6180 | #endif | ||
6181 | |||
6090 | /* | 6182 | /* |
6091 | * kvm_pv_kick_cpu_op: Kick a vcpu. | 6183 | * kvm_pv_kick_cpu_op: Kick a vcpu. |
6092 | * | 6184 | * |
@@ -6151,6 +6243,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | |||
6151 | kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1); | 6243 | kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1); |
6152 | ret = 0; | 6244 | ret = 0; |
6153 | break; | 6245 | break; |
6246 | #ifdef CONFIG_X86_64 | ||
6247 | case KVM_HC_CLOCK_PAIRING: | ||
6248 | ret = kvm_pv_clock_pairing(vcpu, a0, a1); | ||
6249 | break; | ||
6250 | #endif | ||
6154 | default: | 6251 | default: |
6155 | ret = -KVM_ENOSYS; | 6252 | ret = -KVM_ENOSYS; |
6156 | break; | 6253 | break; |
@@ -6564,7 +6661,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) | |||
6564 | if (irqchip_split(vcpu->kvm)) | 6661 | if (irqchip_split(vcpu->kvm)) |
6565 | kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors); | 6662 | kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors); |
6566 | else { | 6663 | else { |
6567 | if (vcpu->arch.apicv_active) | 6664 | if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active) |
6568 | kvm_x86_ops->sync_pir_to_irr(vcpu); | 6665 | kvm_x86_ops->sync_pir_to_irr(vcpu); |
6569 | kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); | 6666 | kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); |
6570 | } | 6667 | } |
@@ -6655,10 +6752,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
6655 | r = 0; | 6752 | r = 0; |
6656 | goto out; | 6753 | goto out; |
6657 | } | 6754 | } |
6658 | if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) { | ||
6659 | vcpu->fpu_active = 0; | ||
6660 | kvm_x86_ops->fpu_deactivate(vcpu); | ||
6661 | } | ||
6662 | if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) { | 6755 | if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) { |
6663 | /* Page is swapped out. Do synthetic halt */ | 6756 | /* Page is swapped out. Do synthetic halt */ |
6664 | vcpu->arch.apf.halted = true; | 6757 | vcpu->arch.apf.halted = true; |
@@ -6718,21 +6811,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
6718 | kvm_hv_process_stimers(vcpu); | 6811 | kvm_hv_process_stimers(vcpu); |
6719 | } | 6812 | } |
6720 | 6813 | ||
6721 | /* | ||
6722 | * KVM_REQ_EVENT is not set when posted interrupts are set by | ||
6723 | * VT-d hardware, so we have to update RVI unconditionally. | ||
6724 | */ | ||
6725 | if (kvm_lapic_enabled(vcpu)) { | ||
6726 | /* | ||
6727 | * Update architecture specific hints for APIC | ||
6728 | * virtual interrupt delivery. | ||
6729 | */ | ||
6730 | if (vcpu->arch.apicv_active) | ||
6731 | kvm_x86_ops->hwapic_irr_update(vcpu, | ||
6732 | kvm_lapic_find_highest_irr(vcpu)); | ||
6733 | } | ||
6734 | |||
6735 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { | 6814 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { |
6815 | ++vcpu->stat.req_event; | ||
6736 | kvm_apic_accept_events(vcpu); | 6816 | kvm_apic_accept_events(vcpu); |
6737 | if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { | 6817 | if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { |
6738 | r = 1; | 6818 | r = 1; |
@@ -6773,22 +6853,40 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
6773 | preempt_disable(); | 6853 | preempt_disable(); |
6774 | 6854 | ||
6775 | kvm_x86_ops->prepare_guest_switch(vcpu); | 6855 | kvm_x86_ops->prepare_guest_switch(vcpu); |
6776 | if (vcpu->fpu_active) | 6856 | kvm_load_guest_fpu(vcpu); |
6777 | kvm_load_guest_fpu(vcpu); | 6857 | |
6858 | /* | ||
6859 | * Disable IRQs before setting IN_GUEST_MODE. Posted interrupt | ||
6860 | * IPI are then delayed after guest entry, which ensures that they | ||
6861 | * result in virtual interrupt delivery. | ||
6862 | */ | ||
6863 | local_irq_disable(); | ||
6778 | vcpu->mode = IN_GUEST_MODE; | 6864 | vcpu->mode = IN_GUEST_MODE; |
6779 | 6865 | ||
6780 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 6866 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
6781 | 6867 | ||
6782 | /* | 6868 | /* |
6783 | * We should set ->mode before check ->requests, | 6869 | * 1) We should set ->mode before checking ->requests. Please see |
6784 | * Please see the comment in kvm_make_all_cpus_request. | 6870 | * the comment in kvm_make_all_cpus_request. |
6785 | * This also orders the write to mode from any reads | 6871 | * |
6786 | * to the page tables done while the VCPU is running. | 6872 | * 2) For APICv, we should set ->mode before checking PIR.ON. This |
6787 | * Please see the comment in kvm_flush_remote_tlbs. | 6873 | * pairs with the memory barrier implicit in pi_test_and_set_on |
6874 | * (see vmx_deliver_posted_interrupt). | ||
6875 | * | ||
6876 | * 3) This also orders the write to mode from any reads to the page | ||
6877 | * tables done while the VCPU is running. Please see the comment | ||
6878 | * in kvm_flush_remote_tlbs. | ||
6788 | */ | 6879 | */ |
6789 | smp_mb__after_srcu_read_unlock(); | 6880 | smp_mb__after_srcu_read_unlock(); |
6790 | 6881 | ||
6791 | local_irq_disable(); | 6882 | /* |
6883 | * This handles the case where a posted interrupt was | ||
6884 | * notified with kvm_vcpu_kick. | ||
6885 | */ | ||
6886 | if (kvm_lapic_enabled(vcpu)) { | ||
6887 | if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active) | ||
6888 | kvm_x86_ops->sync_pir_to_irr(vcpu); | ||
6889 | } | ||
6792 | 6890 | ||
6793 | if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests | 6891 | if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests |
6794 | || need_resched() || signal_pending(current)) { | 6892 | || need_resched() || signal_pending(current)) { |
@@ -6927,6 +7025,9 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) | |||
6927 | 7025 | ||
6928 | static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) | 7026 | static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) |
6929 | { | 7027 | { |
7028 | if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) | ||
7029 | kvm_x86_ops->check_nested_events(vcpu, false); | ||
7030 | |||
6930 | return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && | 7031 | return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && |
6931 | !vcpu->arch.apf.halted); | 7032 | !vcpu->arch.apf.halted); |
6932 | } | 7033 | } |
@@ -7098,7 +7199,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
7098 | } else | 7199 | } else |
7099 | WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); | 7200 | WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); |
7100 | 7201 | ||
7101 | r = vcpu_run(vcpu); | 7202 | if (kvm_run->immediate_exit) |
7203 | r = -EINTR; | ||
7204 | else | ||
7205 | r = vcpu_run(vcpu); | ||
7102 | 7206 | ||
7103 | out: | 7207 | out: |
7104 | post_kvm_run_save(vcpu); | 7208 | post_kvm_run_save(vcpu); |
@@ -8293,9 +8397,6 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) | |||
8293 | 8397 | ||
8294 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | 8398 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) |
8295 | { | 8399 | { |
8296 | if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) | ||
8297 | kvm_x86_ops->check_nested_events(vcpu, false); | ||
8298 | |||
8299 | return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); | 8400 | return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); |
8300 | } | 8401 | } |
8301 | 8402 | ||
@@ -8432,9 +8533,8 @@ static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) | |||
8432 | 8533 | ||
8433 | static int apf_put_user(struct kvm_vcpu *vcpu, u32 val) | 8534 | static int apf_put_user(struct kvm_vcpu *vcpu, u32 val) |
8434 | { | 8535 | { |
8435 | 8536 | return kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.apf.data, &val, | |
8436 | return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val, | 8537 | sizeof(val)); |
8437 | sizeof(val)); | ||
8438 | } | 8538 | } |
8439 | 8539 | ||
8440 | void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, | 8540 | void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, |
diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index bdce33291161..384f661a6496 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig | |||
@@ -90,4 +90,16 @@ config PTP_1588_CLOCK_PCH | |||
90 | To compile this driver as a module, choose M here: the module | 90 | To compile this driver as a module, choose M here: the module |
91 | will be called ptp_pch. | 91 | will be called ptp_pch. |
92 | 92 | ||
93 | config PTP_1588_CLOCK_KVM | ||
94 | tristate "KVM virtual PTP clock" | ||
95 | depends on PTP_1588_CLOCK | ||
96 | depends on KVM_GUEST && X86 | ||
97 | default y | ||
98 | help | ||
99 | This driver adds support for using kvm infrastructure as a PTP | ||
100 | clock. This clock is only useful if you are using KVM guests. | ||
101 | |||
102 | To compile this driver as a module, choose M here: the module | ||
103 | will be called ptp_kvm. | ||
104 | |||
93 | endmenu | 105 | endmenu |
diff --git a/drivers/ptp/Makefile b/drivers/ptp/Makefile index 8b58597298de..530736161a8b 100644 --- a/drivers/ptp/Makefile +++ b/drivers/ptp/Makefile | |||
@@ -6,3 +6,4 @@ ptp-y := ptp_clock.o ptp_chardev.o ptp_sysfs.o | |||
6 | obj-$(CONFIG_PTP_1588_CLOCK) += ptp.o | 6 | obj-$(CONFIG_PTP_1588_CLOCK) += ptp.o |
7 | obj-$(CONFIG_PTP_1588_CLOCK_IXP46X) += ptp_ixp46x.o | 7 | obj-$(CONFIG_PTP_1588_CLOCK_IXP46X) += ptp_ixp46x.o |
8 | obj-$(CONFIG_PTP_1588_CLOCK_PCH) += ptp_pch.o | 8 | obj-$(CONFIG_PTP_1588_CLOCK_PCH) += ptp_pch.o |
9 | obj-$(CONFIG_PTP_1588_CLOCK_KVM) += ptp_kvm.o | ||
diff --git a/drivers/ptp/ptp_kvm.c b/drivers/ptp/ptp_kvm.c new file mode 100644 index 000000000000..09b4df74291e --- /dev/null +++ b/drivers/ptp/ptp_kvm.c | |||
@@ -0,0 +1,207 @@ | |||
1 | /* | ||
2 | * Virtual PTP 1588 clock for use with KVM guests | ||
3 | * | ||
4 | * Copyright (C) 2017 Red Hat Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | */ | ||
17 | #include <linux/device.h> | ||
18 | #include <linux/err.h> | ||
19 | #include <linux/init.h> | ||
20 | #include <linux/kernel.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <uapi/linux/kvm_para.h> | ||
23 | #include <asm/kvm_para.h> | ||
24 | #include <asm/pvclock.h> | ||
25 | #include <asm/kvmclock.h> | ||
26 | #include <uapi/asm/kvm_para.h> | ||
27 | |||
28 | #include <linux/ptp_clock_kernel.h> | ||
29 | |||
30 | struct kvm_ptp_clock { | ||
31 | struct ptp_clock *ptp_clock; | ||
32 | struct ptp_clock_info caps; | ||
33 | }; | ||
34 | |||
35 | DEFINE_SPINLOCK(kvm_ptp_lock); | ||
36 | |||
37 | static struct pvclock_vsyscall_time_info *hv_clock; | ||
38 | |||
39 | static struct kvm_clock_pairing clock_pair; | ||
40 | static phys_addr_t clock_pair_gpa; | ||
41 | |||
42 | static int ptp_kvm_get_time_fn(ktime_t *device_time, | ||
43 | struct system_counterval_t *system_counter, | ||
44 | void *ctx) | ||
45 | { | ||
46 | unsigned long ret; | ||
47 | struct timespec64 tspec; | ||
48 | unsigned version; | ||
49 | int cpu; | ||
50 | struct pvclock_vcpu_time_info *src; | ||
51 | |||
52 | spin_lock(&kvm_ptp_lock); | ||
53 | |||
54 | preempt_disable_notrace(); | ||
55 | cpu = smp_processor_id(); | ||
56 | src = &hv_clock[cpu].pvti; | ||
57 | |||
58 | do { | ||
59 | /* | ||
60 | * We are using a TSC value read in the hosts | ||
61 | * kvm_hc_clock_pairing handling. | ||
62 | * So any changes to tsc_to_system_mul | ||
63 | * and tsc_shift or any other pvclock | ||
64 | * data invalidate that measurement. | ||
65 | */ | ||
66 | version = pvclock_read_begin(src); | ||
67 | |||
68 | ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING, | ||
69 | clock_pair_gpa, | ||
70 | KVM_CLOCK_PAIRING_WALLCLOCK); | ||
71 | if (ret != 0) { | ||
72 | pr_err_ratelimited("clock pairing hypercall ret %lu\n", ret); | ||
73 | spin_unlock(&kvm_ptp_lock); | ||
74 | preempt_enable_notrace(); | ||
75 | return -EOPNOTSUPP; | ||
76 | } | ||
77 | |||
78 | tspec.tv_sec = clock_pair.sec; | ||
79 | tspec.tv_nsec = clock_pair.nsec; | ||
80 | ret = __pvclock_read_cycles(src, clock_pair.tsc); | ||
81 | } while (pvclock_read_retry(src, version)); | ||
82 | |||
83 | preempt_enable_notrace(); | ||
84 | |||
85 | system_counter->cycles = ret; | ||
86 | system_counter->cs = &kvm_clock; | ||
87 | |||
88 | *device_time = timespec64_to_ktime(tspec); | ||
89 | |||
90 | spin_unlock(&kvm_ptp_lock); | ||
91 | |||
92 | return 0; | ||
93 | } | ||
94 | |||
95 | static int ptp_kvm_getcrosststamp(struct ptp_clock_info *ptp, | ||
96 | struct system_device_crosststamp *xtstamp) | ||
97 | { | ||
98 | return get_device_system_crosststamp(ptp_kvm_get_time_fn, NULL, | ||
99 | NULL, xtstamp); | ||
100 | } | ||
101 | |||
102 | /* | ||
103 | * PTP clock operations | ||
104 | */ | ||
105 | |||
106 | static int ptp_kvm_adjfreq(struct ptp_clock_info *ptp, s32 ppb) | ||
107 | { | ||
108 | return -EOPNOTSUPP; | ||
109 | } | ||
110 | |||
111 | static int ptp_kvm_adjtime(struct ptp_clock_info *ptp, s64 delta) | ||
112 | { | ||
113 | return -EOPNOTSUPP; | ||
114 | } | ||
115 | |||
116 | static int ptp_kvm_settime(struct ptp_clock_info *ptp, | ||
117 | const struct timespec64 *ts) | ||
118 | { | ||
119 | return -EOPNOTSUPP; | ||
120 | } | ||
121 | |||
122 | static int ptp_kvm_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) | ||
123 | { | ||
124 | unsigned long ret; | ||
125 | struct timespec64 tspec; | ||
126 | |||
127 | spin_lock(&kvm_ptp_lock); | ||
128 | |||
129 | ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING, | ||
130 | clock_pair_gpa, | ||
131 | KVM_CLOCK_PAIRING_WALLCLOCK); | ||
132 | if (ret != 0) { | ||
133 | pr_err_ratelimited("clock offset hypercall ret %lu\n", ret); | ||
134 | spin_unlock(&kvm_ptp_lock); | ||
135 | return -EOPNOTSUPP; | ||
136 | } | ||
137 | |||
138 | tspec.tv_sec = clock_pair.sec; | ||
139 | tspec.tv_nsec = clock_pair.nsec; | ||
140 | spin_unlock(&kvm_ptp_lock); | ||
141 | |||
142 | memcpy(ts, &tspec, sizeof(struct timespec64)); | ||
143 | |||
144 | return 0; | ||
145 | } | ||
146 | |||
147 | static int ptp_kvm_enable(struct ptp_clock_info *ptp, | ||
148 | struct ptp_clock_request *rq, int on) | ||
149 | { | ||
150 | return -EOPNOTSUPP; | ||
151 | } | ||
152 | |||
153 | static struct ptp_clock_info ptp_kvm_caps = { | ||
154 | .owner = THIS_MODULE, | ||
155 | .name = "KVM virtual PTP", | ||
156 | .max_adj = 0, | ||
157 | .n_ext_ts = 0, | ||
158 | .n_pins = 0, | ||
159 | .pps = 0, | ||
160 | .adjfreq = ptp_kvm_adjfreq, | ||
161 | .adjtime = ptp_kvm_adjtime, | ||
162 | .gettime64 = ptp_kvm_gettime, | ||
163 | .settime64 = ptp_kvm_settime, | ||
164 | .enable = ptp_kvm_enable, | ||
165 | .getcrosststamp = ptp_kvm_getcrosststamp, | ||
166 | }; | ||
167 | |||
168 | /* module operations */ | ||
169 | |||
170 | static struct kvm_ptp_clock kvm_ptp_clock; | ||
171 | |||
172 | static void __exit ptp_kvm_exit(void) | ||
173 | { | ||
174 | ptp_clock_unregister(kvm_ptp_clock.ptp_clock); | ||
175 | } | ||
176 | |||
177 | static int __init ptp_kvm_init(void) | ||
178 | { | ||
179 | long ret; | ||
180 | |||
181 | clock_pair_gpa = slow_virt_to_phys(&clock_pair); | ||
182 | hv_clock = pvclock_pvti_cpu0_va(); | ||
183 | |||
184 | if (!hv_clock) | ||
185 | return -ENODEV; | ||
186 | |||
187 | ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING, clock_pair_gpa, | ||
188 | KVM_CLOCK_PAIRING_WALLCLOCK); | ||
189 | if (ret == -KVM_ENOSYS || ret == -KVM_EOPNOTSUPP) | ||
190 | return -ENODEV; | ||
191 | |||
192 | kvm_ptp_clock.caps = ptp_kvm_caps; | ||
193 | |||
194 | kvm_ptp_clock.ptp_clock = ptp_clock_register(&kvm_ptp_clock.caps, NULL); | ||
195 | |||
196 | if (IS_ERR(kvm_ptp_clock.ptp_clock)) | ||
197 | return PTR_ERR(kvm_ptp_clock.ptp_clock); | ||
198 | |||
199 | return 0; | ||
200 | } | ||
201 | |||
202 | module_init(ptp_kvm_init); | ||
203 | module_exit(ptp_kvm_exit); | ||
204 | |||
205 | MODULE_AUTHOR("Marcelo Tosatti <mtosatti@redhat.com>"); | ||
206 | MODULE_DESCRIPTION("PTP clock using KVMCLOCK"); | ||
207 | MODULE_LICENSE("GPL"); | ||
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 5c970ce67949..fe797d6ef89d 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h | |||
@@ -23,20 +23,24 @@ | |||
23 | #include <linux/hrtimer.h> | 23 | #include <linux/hrtimer.h> |
24 | #include <linux/workqueue.h> | 24 | #include <linux/workqueue.h> |
25 | 25 | ||
26 | struct arch_timer_kvm { | 26 | struct arch_timer_context { |
27 | /* Registers: control register, timer value */ | ||
28 | u32 cnt_ctl; | ||
29 | u64 cnt_cval; | ||
30 | |||
31 | /* Timer IRQ */ | ||
32 | struct kvm_irq_level irq; | ||
33 | |||
34 | /* Active IRQ state caching */ | ||
35 | bool active_cleared_last; | ||
36 | |||
27 | /* Virtual offset */ | 37 | /* Virtual offset */ |
28 | u64 cntvoff; | 38 | u64 cntvoff; |
29 | }; | 39 | }; |
30 | 40 | ||
31 | struct arch_timer_cpu { | 41 | struct arch_timer_cpu { |
32 | /* Registers: control register, timer value */ | 42 | struct arch_timer_context vtimer; |
33 | u32 cntv_ctl; /* Saved/restored */ | 43 | struct arch_timer_context ptimer; |
34 | u64 cntv_cval; /* Saved/restored */ | ||
35 | |||
36 | /* | ||
37 | * Anything that is not used directly from assembly code goes | ||
38 | * here. | ||
39 | */ | ||
40 | 44 | ||
41 | /* Background timer used when the guest is not running */ | 45 | /* Background timer used when the guest is not running */ |
42 | struct hrtimer timer; | 46 | struct hrtimer timer; |
@@ -47,21 +51,15 @@ struct arch_timer_cpu { | |||
47 | /* Background timer active */ | 51 | /* Background timer active */ |
48 | bool armed; | 52 | bool armed; |
49 | 53 | ||
50 | /* Timer IRQ */ | ||
51 | struct kvm_irq_level irq; | ||
52 | |||
53 | /* Active IRQ state caching */ | ||
54 | bool active_cleared_last; | ||
55 | |||
56 | /* Is the timer enabled */ | 54 | /* Is the timer enabled */ |
57 | bool enabled; | 55 | bool enabled; |
58 | }; | 56 | }; |
59 | 57 | ||
60 | int kvm_timer_hyp_init(void); | 58 | int kvm_timer_hyp_init(void); |
61 | int kvm_timer_enable(struct kvm_vcpu *vcpu); | 59 | int kvm_timer_enable(struct kvm_vcpu *vcpu); |
62 | void kvm_timer_init(struct kvm *kvm); | ||
63 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, | 60 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, |
64 | const struct kvm_irq_level *irq); | 61 | const struct kvm_irq_level *virt_irq, |
62 | const struct kvm_irq_level *phys_irq); | ||
65 | void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); | 63 | void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); |
66 | void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); | 64 | void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); |
67 | void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); | 65 | void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); |
@@ -70,11 +68,16 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); | |||
70 | u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); | 68 | u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); |
71 | int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); | 69 | int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); |
72 | 70 | ||
73 | bool kvm_timer_should_fire(struct kvm_vcpu *vcpu); | 71 | bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); |
74 | void kvm_timer_schedule(struct kvm_vcpu *vcpu); | 72 | void kvm_timer_schedule(struct kvm_vcpu *vcpu); |
75 | void kvm_timer_unschedule(struct kvm_vcpu *vcpu); | 73 | void kvm_timer_unschedule(struct kvm_vcpu *vcpu); |
76 | 74 | ||
75 | u64 kvm_phys_timer_read(void); | ||
76 | |||
77 | void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu); | 77 | void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu); |
78 | 78 | ||
79 | void kvm_timer_init_vhe(void); | 79 | void kvm_timer_init_vhe(void); |
80 | |||
81 | #define vcpu_vtimer(v) (&(v)->arch.timer_cpu.vtimer) | ||
82 | #define vcpu_ptimer(v) (&(v)->arch.timer_cpu.ptimer) | ||
80 | #endif | 83 | #endif |
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 002f0922cd92..b72dd2ad5f44 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h | |||
@@ -71,6 +71,8 @@ struct vgic_global { | |||
71 | 71 | ||
72 | /* GIC system register CPU interface */ | 72 | /* GIC system register CPU interface */ |
73 | struct static_key_false gicv3_cpuif; | 73 | struct static_key_false gicv3_cpuif; |
74 | |||
75 | u32 ich_vtr_el2; | ||
74 | }; | 76 | }; |
75 | 77 | ||
76 | extern struct vgic_global kvm_vgic_global_state; | 78 | extern struct vgic_global kvm_vgic_global_state; |
@@ -101,9 +103,10 @@ struct vgic_irq { | |||
101 | */ | 103 | */ |
102 | 104 | ||
103 | u32 intid; /* Guest visible INTID */ | 105 | u32 intid; /* Guest visible INTID */ |
104 | bool pending; | ||
105 | bool line_level; /* Level only */ | 106 | bool line_level; /* Level only */ |
106 | bool soft_pending; /* Level only */ | 107 | bool pending_latch; /* The pending latch state used to calculate |
108 | * the pending state for both level | ||
109 | * and edge triggered IRQs. */ | ||
107 | bool active; /* not used for LPIs */ | 110 | bool active; /* not used for LPIs */ |
108 | bool enabled; | 111 | bool enabled; |
109 | bool hw; /* Tied to HW IRQ */ | 112 | bool hw; /* Tied to HW IRQ */ |
@@ -165,6 +168,8 @@ struct vgic_its { | |||
165 | struct list_head collection_list; | 168 | struct list_head collection_list; |
166 | }; | 169 | }; |
167 | 170 | ||
171 | struct vgic_state_iter; | ||
172 | |||
168 | struct vgic_dist { | 173 | struct vgic_dist { |
169 | bool in_kernel; | 174 | bool in_kernel; |
170 | bool ready; | 175 | bool ready; |
@@ -212,6 +217,9 @@ struct vgic_dist { | |||
212 | spinlock_t lpi_list_lock; | 217 | spinlock_t lpi_list_lock; |
213 | struct list_head lpi_list_head; | 218 | struct list_head lpi_list_head; |
214 | int lpi_list_count; | 219 | int lpi_list_count; |
220 | |||
221 | /* used by vgic-debug */ | ||
222 | struct vgic_state_iter *iter; | ||
215 | }; | 223 | }; |
216 | 224 | ||
217 | struct vgic_v2_cpu_if { | 225 | struct vgic_v2_cpu_if { |
@@ -269,6 +277,12 @@ struct vgic_cpu { | |||
269 | u64 pendbaser; | 277 | u64 pendbaser; |
270 | 278 | ||
271 | bool lpis_enabled; | 279 | bool lpis_enabled; |
280 | |||
281 | /* Cache guest priority bits */ | ||
282 | u32 num_pri_bits; | ||
283 | |||
284 | /* Cache guest interrupt ID bits */ | ||
285 | u32 num_id_bits; | ||
272 | }; | 286 | }; |
273 | 287 | ||
274 | extern struct static_key_false vgic_v2_cpuif_trap; | 288 | extern struct static_key_false vgic_v2_cpuif_trap; |
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 725e86b506f3..672cfef72fc8 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h | |||
@@ -349,8 +349,30 @@ | |||
349 | /* | 349 | /* |
350 | * CPU interface registers | 350 | * CPU interface registers |
351 | */ | 351 | */ |
352 | #define ICC_CTLR_EL1_EOImode_drop_dir (0U << 1) | 352 | #define ICC_CTLR_EL1_EOImode_SHIFT (1) |
353 | #define ICC_CTLR_EL1_EOImode_drop (1U << 1) | 353 | #define ICC_CTLR_EL1_EOImode_drop_dir (0U << ICC_CTLR_EL1_EOImode_SHIFT) |
354 | #define ICC_CTLR_EL1_EOImode_drop (1U << ICC_CTLR_EL1_EOImode_SHIFT) | ||
355 | #define ICC_CTLR_EL1_EOImode_MASK (1 << ICC_CTLR_EL1_EOImode_SHIFT) | ||
356 | #define ICC_CTLR_EL1_CBPR_SHIFT 0 | ||
357 | #define ICC_CTLR_EL1_CBPR_MASK (1 << ICC_CTLR_EL1_CBPR_SHIFT) | ||
358 | #define ICC_CTLR_EL1_PRI_BITS_SHIFT 8 | ||
359 | #define ICC_CTLR_EL1_PRI_BITS_MASK (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT) | ||
360 | #define ICC_CTLR_EL1_ID_BITS_SHIFT 11 | ||
361 | #define ICC_CTLR_EL1_ID_BITS_MASK (0x7 << ICC_CTLR_EL1_ID_BITS_SHIFT) | ||
362 | #define ICC_CTLR_EL1_SEIS_SHIFT 14 | ||
363 | #define ICC_CTLR_EL1_SEIS_MASK (0x1 << ICC_CTLR_EL1_SEIS_SHIFT) | ||
364 | #define ICC_CTLR_EL1_A3V_SHIFT 15 | ||
365 | #define ICC_CTLR_EL1_A3V_MASK (0x1 << ICC_CTLR_EL1_A3V_SHIFT) | ||
366 | #define ICC_PMR_EL1_SHIFT 0 | ||
367 | #define ICC_PMR_EL1_MASK (0xff << ICC_PMR_EL1_SHIFT) | ||
368 | #define ICC_BPR0_EL1_SHIFT 0 | ||
369 | #define ICC_BPR0_EL1_MASK (0x7 << ICC_BPR0_EL1_SHIFT) | ||
370 | #define ICC_BPR1_EL1_SHIFT 0 | ||
371 | #define ICC_BPR1_EL1_MASK (0x7 << ICC_BPR1_EL1_SHIFT) | ||
372 | #define ICC_IGRPEN0_EL1_SHIFT 0 | ||
373 | #define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT) | ||
374 | #define ICC_IGRPEN1_EL1_SHIFT 0 | ||
375 | #define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT) | ||
354 | #define ICC_SRE_EL1_SRE (1U << 0) | 376 | #define ICC_SRE_EL1_SRE (1U << 0) |
355 | 377 | ||
356 | /* | 378 | /* |
@@ -379,14 +401,29 @@ | |||
379 | #define ICH_HCR_EN (1 << 0) | 401 | #define ICH_HCR_EN (1 << 0) |
380 | #define ICH_HCR_UIE (1 << 1) | 402 | #define ICH_HCR_UIE (1 << 1) |
381 | 403 | ||
382 | #define ICH_VMCR_CTLR_SHIFT 0 | 404 | #define ICH_VMCR_CBPR_SHIFT 4 |
383 | #define ICH_VMCR_CTLR_MASK (0x21f << ICH_VMCR_CTLR_SHIFT) | 405 | #define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT) |
406 | #define ICH_VMCR_EOIM_SHIFT 9 | ||
407 | #define ICH_VMCR_EOIM_MASK (1 << ICH_VMCR_EOIM_SHIFT) | ||
384 | #define ICH_VMCR_BPR1_SHIFT 18 | 408 | #define ICH_VMCR_BPR1_SHIFT 18 |
385 | #define ICH_VMCR_BPR1_MASK (7 << ICH_VMCR_BPR1_SHIFT) | 409 | #define ICH_VMCR_BPR1_MASK (7 << ICH_VMCR_BPR1_SHIFT) |
386 | #define ICH_VMCR_BPR0_SHIFT 21 | 410 | #define ICH_VMCR_BPR0_SHIFT 21 |
387 | #define ICH_VMCR_BPR0_MASK (7 << ICH_VMCR_BPR0_SHIFT) | 411 | #define ICH_VMCR_BPR0_MASK (7 << ICH_VMCR_BPR0_SHIFT) |
388 | #define ICH_VMCR_PMR_SHIFT 24 | 412 | #define ICH_VMCR_PMR_SHIFT 24 |
389 | #define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT) | 413 | #define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT) |
414 | #define ICH_VMCR_ENG0_SHIFT 0 | ||
415 | #define ICH_VMCR_ENG0_MASK (1 << ICH_VMCR_ENG0_SHIFT) | ||
416 | #define ICH_VMCR_ENG1_SHIFT 1 | ||
417 | #define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT) | ||
418 | |||
419 | #define ICH_VTR_PRI_BITS_SHIFT 29 | ||
420 | #define ICH_VTR_PRI_BITS_MASK (7 << ICH_VTR_PRI_BITS_SHIFT) | ||
421 | #define ICH_VTR_ID_BITS_SHIFT 23 | ||
422 | #define ICH_VTR_ID_BITS_MASK (7 << ICH_VTR_ID_BITS_SHIFT) | ||
423 | #define ICH_VTR_SEIS_SHIFT 22 | ||
424 | #define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT) | ||
425 | #define ICH_VTR_A3V_SHIFT 21 | ||
426 | #define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT) | ||
390 | 427 | ||
391 | #define ICC_IAR1_EL1_SPURIOUS 0x3ff | 428 | #define ICC_IAR1_EL1_SPURIOUS 0x3ff |
392 | 429 | ||
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1c5190dab2c1..8d69d5150748 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -45,7 +45,6 @@ | |||
45 | * include/linux/kvm_h. | 45 | * include/linux/kvm_h. |
46 | */ | 46 | */ |
47 | #define KVM_MEMSLOT_INVALID (1UL << 16) | 47 | #define KVM_MEMSLOT_INVALID (1UL << 16) |
48 | #define KVM_MEMSLOT_INCOHERENT (1UL << 17) | ||
49 | 48 | ||
50 | /* Two fragments for cross MMIO pages. */ | 49 | /* Two fragments for cross MMIO pages. */ |
51 | #define KVM_MAX_MMIO_FRAGMENTS 2 | 50 | #define KVM_MAX_MMIO_FRAGMENTS 2 |
@@ -222,7 +221,6 @@ struct kvm_vcpu { | |||
222 | struct mutex mutex; | 221 | struct mutex mutex; |
223 | struct kvm_run *run; | 222 | struct kvm_run *run; |
224 | 223 | ||
225 | int fpu_active; | ||
226 | int guest_fpu_loaded, guest_xcr0_loaded; | 224 | int guest_fpu_loaded, guest_xcr0_loaded; |
227 | struct swait_queue_head wq; | 225 | struct swait_queue_head wq; |
228 | struct pid *pid; | 226 | struct pid *pid; |
@@ -642,18 +640,18 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, | |||
642 | int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, | 640 | int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, |
643 | unsigned long len); | 641 | unsigned long len); |
644 | int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); | 642 | int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); |
645 | int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | 643 | int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, |
646 | void *data, unsigned long len); | 644 | void *data, unsigned long len); |
647 | int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, | 645 | int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, |
648 | int offset, int len); | 646 | int offset, int len); |
649 | int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, | 647 | int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, |
650 | unsigned long len); | 648 | unsigned long len); |
651 | int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | 649 | int kvm_vcpu_write_guest_cached(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc, |
652 | void *data, unsigned long len); | 650 | void *data, unsigned long len); |
653 | int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | 651 | int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc, |
654 | void *data, int offset, unsigned long len); | 652 | void *data, int offset, unsigned long len); |
655 | int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | 653 | int kvm_vcpu_gfn_to_hva_cache_init(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc, |
656 | gpa_t gpa, unsigned long len); | 654 | gpa_t gpa, unsigned long len); |
657 | int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); | 655 | int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); |
658 | int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); | 656 | int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); |
659 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); | 657 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); |
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e0035808c814..f51d5082a377 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h | |||
@@ -218,7 +218,8 @@ struct kvm_hyperv_exit { | |||
218 | struct kvm_run { | 218 | struct kvm_run { |
219 | /* in */ | 219 | /* in */ |
220 | __u8 request_interrupt_window; | 220 | __u8 request_interrupt_window; |
221 | __u8 padding1[7]; | 221 | __u8 immediate_exit; |
222 | __u8 padding1[6]; | ||
222 | 223 | ||
223 | /* out */ | 224 | /* out */ |
224 | __u32 exit_reason; | 225 | __u32 exit_reason; |
@@ -685,6 +686,13 @@ struct kvm_ppc_smmu_info { | |||
685 | struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; | 686 | struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; |
686 | }; | 687 | }; |
687 | 688 | ||
689 | /* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */ | ||
690 | struct kvm_ppc_resize_hpt { | ||
691 | __u64 flags; | ||
692 | __u32 shift; | ||
693 | __u32 pad; | ||
694 | }; | ||
695 | |||
688 | #define KVMIO 0xAE | 696 | #define KVMIO 0xAE |
689 | 697 | ||
690 | /* machine type bits, to be used as argument to KVM_CREATE_VM */ | 698 | /* machine type bits, to be used as argument to KVM_CREATE_VM */ |
@@ -871,8 +879,10 @@ struct kvm_ppc_smmu_info { | |||
871 | #define KVM_CAP_S390_USER_INSTR0 130 | 879 | #define KVM_CAP_S390_USER_INSTR0 130 |
872 | #define KVM_CAP_MSI_DEVID 131 | 880 | #define KVM_CAP_MSI_DEVID 131 |
873 | #define KVM_CAP_PPC_HTM 132 | 881 | #define KVM_CAP_PPC_HTM 132 |
882 | #define KVM_CAP_SPAPR_RESIZE_HPT 133 | ||
874 | #define KVM_CAP_PPC_MMU_RADIX 134 | 883 | #define KVM_CAP_PPC_MMU_RADIX 134 |
875 | #define KVM_CAP_PPC_MMU_HASH_V3 135 | 884 | #define KVM_CAP_PPC_MMU_HASH_V3 135 |
885 | #define KVM_CAP_IMMEDIATE_EXIT 136 | ||
876 | 886 | ||
877 | #ifdef KVM_CAP_IRQ_ROUTING | 887 | #ifdef KVM_CAP_IRQ_ROUTING |
878 | 888 | ||
@@ -1189,6 +1199,9 @@ struct kvm_s390_ucas_mapping { | |||
1189 | #define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr) | 1199 | #define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr) |
1190 | /* Available with KVM_CAP_PPC_RTAS */ | 1200 | /* Available with KVM_CAP_PPC_RTAS */ |
1191 | #define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args) | 1201 | #define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args) |
1202 | /* Available with KVM_CAP_SPAPR_RESIZE_HPT */ | ||
1203 | #define KVM_PPC_RESIZE_HPT_PREPARE _IOR(KVMIO, 0xad, struct kvm_ppc_resize_hpt) | ||
1204 | #define KVM_PPC_RESIZE_HPT_COMMIT _IOR(KVMIO, 0xae, struct kvm_ppc_resize_hpt) | ||
1192 | /* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */ | 1205 | /* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */ |
1193 | #define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg) | 1206 | #define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg) |
1194 | /* Available with KVM_CAP_PPC_RADIX_MMU */ | 1207 | /* Available with KVM_CAP_PPC_RADIX_MMU */ |
diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h index bf6cd7d5cac2..fed506aeff62 100644 --- a/include/uapi/linux/kvm_para.h +++ b/include/uapi/linux/kvm_para.h | |||
@@ -14,6 +14,7 @@ | |||
14 | #define KVM_EFAULT EFAULT | 14 | #define KVM_EFAULT EFAULT |
15 | #define KVM_E2BIG E2BIG | 15 | #define KVM_E2BIG E2BIG |
16 | #define KVM_EPERM EPERM | 16 | #define KVM_EPERM EPERM |
17 | #define KVM_EOPNOTSUPP 95 | ||
17 | 18 | ||
18 | #define KVM_HC_VAPIC_POLL_IRQ 1 | 19 | #define KVM_HC_VAPIC_POLL_IRQ 1 |
19 | #define KVM_HC_MMU_OP 2 | 20 | #define KVM_HC_MMU_OP 2 |
@@ -23,6 +24,7 @@ | |||
23 | #define KVM_HC_MIPS_GET_CLOCK_FREQ 6 | 24 | #define KVM_HC_MIPS_GET_CLOCK_FREQ 6 |
24 | #define KVM_HC_MIPS_EXIT_VM 7 | 25 | #define KVM_HC_MIPS_EXIT_VM 7 |
25 | #define KVM_HC_MIPS_CONSOLE_OUTPUT 8 | 26 | #define KVM_HC_MIPS_CONSOLE_OUTPUT 8 |
27 | #define KVM_HC_CLOCK_PAIRING 9 | ||
26 | 28 | ||
27 | /* | 29 | /* |
28 | * hypercalls use architecture specific | 30 | * hypercalls use architecture specific |
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 6a084cd57b88..35d7100e0815 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c | |||
@@ -37,10 +37,10 @@ static u32 host_vtimer_irq_flags; | |||
37 | 37 | ||
38 | void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) | 38 | void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) |
39 | { | 39 | { |
40 | vcpu->arch.timer_cpu.active_cleared_last = false; | 40 | vcpu_vtimer(vcpu)->active_cleared_last = false; |
41 | } | 41 | } |
42 | 42 | ||
43 | static u64 kvm_phys_timer_read(void) | 43 | u64 kvm_phys_timer_read(void) |
44 | { | 44 | { |
45 | return timecounter->cc->read(timecounter->cc); | 45 | return timecounter->cc->read(timecounter->cc); |
46 | } | 46 | } |
@@ -98,12 +98,12 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) | |||
98 | kvm_vcpu_kick(vcpu); | 98 | kvm_vcpu_kick(vcpu); |
99 | } | 99 | } |
100 | 100 | ||
101 | static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu) | 101 | static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) |
102 | { | 102 | { |
103 | u64 cval, now; | 103 | u64 cval, now; |
104 | 104 | ||
105 | cval = vcpu->arch.timer_cpu.cntv_cval; | 105 | cval = timer_ctx->cnt_cval; |
106 | now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; | 106 | now = kvm_phys_timer_read() - timer_ctx->cntvoff; |
107 | 107 | ||
108 | if (now < cval) { | 108 | if (now < cval) { |
109 | u64 ns; | 109 | u64 ns; |
@@ -118,6 +118,35 @@ static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu) | |||
118 | return 0; | 118 | return 0; |
119 | } | 119 | } |
120 | 120 | ||
121 | static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) | ||
122 | { | ||
123 | return !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) && | ||
124 | (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE); | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * Returns the earliest expiration time in ns among guest timers. | ||
129 | * Note that it will return 0 if none of timers can fire. | ||
130 | */ | ||
131 | static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) | ||
132 | { | ||
133 | u64 min_virt = ULLONG_MAX, min_phys = ULLONG_MAX; | ||
134 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | ||
135 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
136 | |||
137 | if (kvm_timer_irq_can_fire(vtimer)) | ||
138 | min_virt = kvm_timer_compute_delta(vtimer); | ||
139 | |||
140 | if (kvm_timer_irq_can_fire(ptimer)) | ||
141 | min_phys = kvm_timer_compute_delta(ptimer); | ||
142 | |||
143 | /* If none of timers can fire, then return 0 */ | ||
144 | if ((min_virt == ULLONG_MAX) && (min_phys == ULLONG_MAX)) | ||
145 | return 0; | ||
146 | |||
147 | return min(min_virt, min_phys); | ||
148 | } | ||
149 | |||
121 | static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) | 150 | static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) |
122 | { | 151 | { |
123 | struct arch_timer_cpu *timer; | 152 | struct arch_timer_cpu *timer; |
@@ -132,7 +161,7 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) | |||
132 | * PoV (NTP on the host may have forced it to expire | 161 | * PoV (NTP on the host may have forced it to expire |
133 | * early). If we should have slept longer, restart it. | 162 | * early). If we should have slept longer, restart it. |
134 | */ | 163 | */ |
135 | ns = kvm_timer_compute_delta(vcpu); | 164 | ns = kvm_timer_earliest_exp(vcpu); |
136 | if (unlikely(ns)) { | 165 | if (unlikely(ns)) { |
137 | hrtimer_forward_now(hrt, ns_to_ktime(ns)); | 166 | hrtimer_forward_now(hrt, ns_to_ktime(ns)); |
138 | return HRTIMER_RESTART; | 167 | return HRTIMER_RESTART; |
@@ -142,42 +171,33 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) | |||
142 | return HRTIMER_NORESTART; | 171 | return HRTIMER_NORESTART; |
143 | } | 172 | } |
144 | 173 | ||
145 | static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu) | 174 | bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) |
146 | { | ||
147 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
148 | |||
149 | return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) && | ||
150 | (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE); | ||
151 | } | ||
152 | |||
153 | bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) | ||
154 | { | 175 | { |
155 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
156 | u64 cval, now; | 176 | u64 cval, now; |
157 | 177 | ||
158 | if (!kvm_timer_irq_can_fire(vcpu)) | 178 | if (!kvm_timer_irq_can_fire(timer_ctx)) |
159 | return false; | 179 | return false; |
160 | 180 | ||
161 | cval = timer->cntv_cval; | 181 | cval = timer_ctx->cnt_cval; |
162 | now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; | 182 | now = kvm_phys_timer_read() - timer_ctx->cntvoff; |
163 | 183 | ||
164 | return cval <= now; | 184 | return cval <= now; |
165 | } | 185 | } |
166 | 186 | ||
167 | static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level) | 187 | static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, |
188 | struct arch_timer_context *timer_ctx) | ||
168 | { | 189 | { |
169 | int ret; | 190 | int ret; |
170 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
171 | 191 | ||
172 | BUG_ON(!vgic_initialized(vcpu->kvm)); | 192 | BUG_ON(!vgic_initialized(vcpu->kvm)); |
173 | 193 | ||
174 | timer->active_cleared_last = false; | 194 | timer_ctx->active_cleared_last = false; |
175 | timer->irq.level = new_level; | 195 | timer_ctx->irq.level = new_level; |
176 | trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->irq.irq, | 196 | trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, |
177 | timer->irq.level); | 197 | timer_ctx->irq.level); |
178 | ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, | 198 | |
179 | timer->irq.irq, | 199 | ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, timer_ctx->irq.irq, |
180 | timer->irq.level); | 200 | timer_ctx->irq.level); |
181 | WARN_ON(ret); | 201 | WARN_ON(ret); |
182 | } | 202 | } |
183 | 203 | ||
@@ -188,22 +208,43 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level) | |||
188 | static int kvm_timer_update_state(struct kvm_vcpu *vcpu) | 208 | static int kvm_timer_update_state(struct kvm_vcpu *vcpu) |
189 | { | 209 | { |
190 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 210 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
211 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | ||
212 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
191 | 213 | ||
192 | /* | 214 | /* |
193 | * If userspace modified the timer registers via SET_ONE_REG before | 215 | * If userspace modified the timer registers via SET_ONE_REG before |
194 | * the vgic was initialized, we mustn't set the timer->irq.level value | 216 | * the vgic was initialized, we mustn't set the vtimer->irq.level value |
195 | * because the guest would never see the interrupt. Instead wait | 217 | * because the guest would never see the interrupt. Instead wait |
196 | * until we call this function from kvm_timer_flush_hwstate. | 218 | * until we call this function from kvm_timer_flush_hwstate. |
197 | */ | 219 | */ |
198 | if (!vgic_initialized(vcpu->kvm) || !timer->enabled) | 220 | if (!vgic_initialized(vcpu->kvm) || !timer->enabled) |
199 | return -ENODEV; | 221 | return -ENODEV; |
200 | 222 | ||
201 | if (kvm_timer_should_fire(vcpu) != timer->irq.level) | 223 | if (kvm_timer_should_fire(vtimer) != vtimer->irq.level) |
202 | kvm_timer_update_irq(vcpu, !timer->irq.level); | 224 | kvm_timer_update_irq(vcpu, !vtimer->irq.level, vtimer); |
225 | |||
226 | if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) | ||
227 | kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); | ||
203 | 228 | ||
204 | return 0; | 229 | return 0; |
205 | } | 230 | } |
206 | 231 | ||
232 | /* Schedule the background timer for the emulated timer. */ | ||
233 | static void kvm_timer_emulate(struct kvm_vcpu *vcpu, | ||
234 | struct arch_timer_context *timer_ctx) | ||
235 | { | ||
236 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
237 | |||
238 | if (kvm_timer_should_fire(timer_ctx)) | ||
239 | return; | ||
240 | |||
241 | if (!kvm_timer_irq_can_fire(timer_ctx)) | ||
242 | return; | ||
243 | |||
244 | /* The timer has not yet expired, schedule a background timer */ | ||
245 | timer_arm(timer, kvm_timer_compute_delta(timer_ctx)); | ||
246 | } | ||
247 | |||
207 | /* | 248 | /* |
208 | * Schedule the background timer before calling kvm_vcpu_block, so that this | 249 | * Schedule the background timer before calling kvm_vcpu_block, so that this |
209 | * thread is removed from its waitqueue and made runnable when there's a timer | 250 | * thread is removed from its waitqueue and made runnable when there's a timer |
@@ -212,26 +253,31 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu) | |||
212 | void kvm_timer_schedule(struct kvm_vcpu *vcpu) | 253 | void kvm_timer_schedule(struct kvm_vcpu *vcpu) |
213 | { | 254 | { |
214 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 255 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
256 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | ||
257 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
215 | 258 | ||
216 | BUG_ON(timer_is_armed(timer)); | 259 | BUG_ON(timer_is_armed(timer)); |
217 | 260 | ||
218 | /* | 261 | /* |
219 | * No need to schedule a background timer if the guest timer has | 262 | * No need to schedule a background timer if any guest timer has |
220 | * already expired, because kvm_vcpu_block will return before putting | 263 | * already expired, because kvm_vcpu_block will return before putting |
221 | * the thread to sleep. | 264 | * the thread to sleep. |
222 | */ | 265 | */ |
223 | if (kvm_timer_should_fire(vcpu)) | 266 | if (kvm_timer_should_fire(vtimer) || kvm_timer_should_fire(ptimer)) |
224 | return; | 267 | return; |
225 | 268 | ||
226 | /* | 269 | /* |
227 | * If the timer is not capable of raising interrupts (disabled or | 270 | * If both timers are not capable of raising interrupts (disabled or |
228 | * masked), then there's no more work for us to do. | 271 | * masked), then there's no more work for us to do. |
229 | */ | 272 | */ |
230 | if (!kvm_timer_irq_can_fire(vcpu)) | 273 | if (!kvm_timer_irq_can_fire(vtimer) && !kvm_timer_irq_can_fire(ptimer)) |
231 | return; | 274 | return; |
232 | 275 | ||
233 | /* The timer has not yet expired, schedule a background timer */ | 276 | /* |
234 | timer_arm(timer, kvm_timer_compute_delta(vcpu)); | 277 | * The guest timers have not yet expired, schedule a background timer. |
278 | * Set the earliest expiration time among the guest timers. | ||
279 | */ | ||
280 | timer_arm(timer, kvm_timer_earliest_exp(vcpu)); | ||
235 | } | 281 | } |
236 | 282 | ||
237 | void kvm_timer_unschedule(struct kvm_vcpu *vcpu) | 283 | void kvm_timer_unschedule(struct kvm_vcpu *vcpu) |
@@ -249,13 +295,16 @@ void kvm_timer_unschedule(struct kvm_vcpu *vcpu) | |||
249 | */ | 295 | */ |
250 | void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) | 296 | void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) |
251 | { | 297 | { |
252 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 298 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); |
253 | bool phys_active; | 299 | bool phys_active; |
254 | int ret; | 300 | int ret; |
255 | 301 | ||
256 | if (kvm_timer_update_state(vcpu)) | 302 | if (kvm_timer_update_state(vcpu)) |
257 | return; | 303 | return; |
258 | 304 | ||
305 | /* Set the background timer for the physical timer emulation. */ | ||
306 | kvm_timer_emulate(vcpu, vcpu_ptimer(vcpu)); | ||
307 | |||
259 | /* | 308 | /* |
260 | * If we enter the guest with the virtual input level to the VGIC | 309 | * If we enter the guest with the virtual input level to the VGIC |
261 | * asserted, then we have already told the VGIC what we need to, and | 310 | * asserted, then we have already told the VGIC what we need to, and |
@@ -273,8 +322,8 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) | |||
273 | * to ensure that hardware interrupts from the timer triggers a guest | 322 | * to ensure that hardware interrupts from the timer triggers a guest |
274 | * exit. | 323 | * exit. |
275 | */ | 324 | */ |
276 | phys_active = timer->irq.level || | 325 | phys_active = vtimer->irq.level || |
277 | kvm_vgic_map_is_active(vcpu, timer->irq.irq); | 326 | kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); |
278 | 327 | ||
279 | /* | 328 | /* |
280 | * We want to avoid hitting the (re)distributor as much as | 329 | * We want to avoid hitting the (re)distributor as much as |
@@ -296,7 +345,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) | |||
296 | * - cached value is "active clear" | 345 | * - cached value is "active clear" |
297 | * - value to be programmed is "active clear" | 346 | * - value to be programmed is "active clear" |
298 | */ | 347 | */ |
299 | if (timer->active_cleared_last && !phys_active) | 348 | if (vtimer->active_cleared_last && !phys_active) |
300 | return; | 349 | return; |
301 | 350 | ||
302 | ret = irq_set_irqchip_state(host_vtimer_irq, | 351 | ret = irq_set_irqchip_state(host_vtimer_irq, |
@@ -304,7 +353,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) | |||
304 | phys_active); | 353 | phys_active); |
305 | WARN_ON(ret); | 354 | WARN_ON(ret); |
306 | 355 | ||
307 | timer->active_cleared_last = !phys_active; | 356 | vtimer->active_cleared_last = !phys_active; |
308 | } | 357 | } |
309 | 358 | ||
310 | /** | 359 | /** |
@@ -318,7 +367,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) | |||
318 | { | 367 | { |
319 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 368 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
320 | 369 | ||
321 | BUG_ON(timer_is_armed(timer)); | 370 | /* |
371 | * This is to cancel the background timer for the physical timer | ||
372 | * emulation if it is set. | ||
373 | */ | ||
374 | timer_disarm(timer); | ||
322 | 375 | ||
323 | /* | 376 | /* |
324 | * The guest could have modified the timer registers or the timer | 377 | * The guest could have modified the timer registers or the timer |
@@ -328,9 +381,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) | |||
328 | } | 381 | } |
329 | 382 | ||
330 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, | 383 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, |
331 | const struct kvm_irq_level *irq) | 384 | const struct kvm_irq_level *virt_irq, |
385 | const struct kvm_irq_level *phys_irq) | ||
332 | { | 386 | { |
333 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 387 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); |
388 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
334 | 389 | ||
335 | /* | 390 | /* |
336 | * The vcpu timer irq number cannot be determined in | 391 | * The vcpu timer irq number cannot be determined in |
@@ -338,7 +393,8 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, | |||
338 | * kvm_vcpu_set_target(). To handle this, we determine | 393 | * kvm_vcpu_set_target(). To handle this, we determine |
339 | * vcpu timer irq number when the vcpu is reset. | 394 | * vcpu timer irq number when the vcpu is reset. |
340 | */ | 395 | */ |
341 | timer->irq.irq = irq->irq; | 396 | vtimer->irq.irq = virt_irq->irq; |
397 | ptimer->irq.irq = phys_irq->irq; | ||
342 | 398 | ||
343 | /* | 399 | /* |
344 | * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 | 400 | * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 |
@@ -346,16 +402,40 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, | |||
346 | * resets the timer to be disabled and unmasked and is compliant with | 402 | * resets the timer to be disabled and unmasked and is compliant with |
347 | * the ARMv7 architecture. | 403 | * the ARMv7 architecture. |
348 | */ | 404 | */ |
349 | timer->cntv_ctl = 0; | 405 | vtimer->cnt_ctl = 0; |
406 | ptimer->cnt_ctl = 0; | ||
350 | kvm_timer_update_state(vcpu); | 407 | kvm_timer_update_state(vcpu); |
351 | 408 | ||
352 | return 0; | 409 | return 0; |
353 | } | 410 | } |
354 | 411 | ||
412 | /* Make the updates of cntvoff for all vtimer contexts atomic */ | ||
413 | static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff) | ||
414 | { | ||
415 | int i; | ||
416 | struct kvm *kvm = vcpu->kvm; | ||
417 | struct kvm_vcpu *tmp; | ||
418 | |||
419 | mutex_lock(&kvm->lock); | ||
420 | kvm_for_each_vcpu(i, tmp, kvm) | ||
421 | vcpu_vtimer(tmp)->cntvoff = cntvoff; | ||
422 | |||
423 | /* | ||
424 | * When called from the vcpu create path, the CPU being created is not | ||
425 | * included in the loop above, so we just set it here as well. | ||
426 | */ | ||
427 | vcpu_vtimer(vcpu)->cntvoff = cntvoff; | ||
428 | mutex_unlock(&kvm->lock); | ||
429 | } | ||
430 | |||
355 | void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) | 431 | void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) |
356 | { | 432 | { |
357 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 433 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
358 | 434 | ||
435 | /* Synchronize cntvoff across all vtimers of a VM. */ | ||
436 | update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); | ||
437 | vcpu_ptimer(vcpu)->cntvoff = 0; | ||
438 | |||
359 | INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); | 439 | INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); |
360 | hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 440 | hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
361 | timer->timer.function = kvm_timer_expire; | 441 | timer->timer.function = kvm_timer_expire; |
@@ -368,17 +448,17 @@ static void kvm_timer_init_interrupt(void *info) | |||
368 | 448 | ||
369 | int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) | 449 | int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) |
370 | { | 450 | { |
371 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 451 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); |
372 | 452 | ||
373 | switch (regid) { | 453 | switch (regid) { |
374 | case KVM_REG_ARM_TIMER_CTL: | 454 | case KVM_REG_ARM_TIMER_CTL: |
375 | timer->cntv_ctl = value; | 455 | vtimer->cnt_ctl = value; |
376 | break; | 456 | break; |
377 | case KVM_REG_ARM_TIMER_CNT: | 457 | case KVM_REG_ARM_TIMER_CNT: |
378 | vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - value; | 458 | update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value); |
379 | break; | 459 | break; |
380 | case KVM_REG_ARM_TIMER_CVAL: | 460 | case KVM_REG_ARM_TIMER_CVAL: |
381 | timer->cntv_cval = value; | 461 | vtimer->cnt_cval = value; |
382 | break; | 462 | break; |
383 | default: | 463 | default: |
384 | return -1; | 464 | return -1; |
@@ -390,15 +470,15 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) | |||
390 | 470 | ||
391 | u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) | 471 | u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) |
392 | { | 472 | { |
393 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 473 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); |
394 | 474 | ||
395 | switch (regid) { | 475 | switch (regid) { |
396 | case KVM_REG_ARM_TIMER_CTL: | 476 | case KVM_REG_ARM_TIMER_CTL: |
397 | return timer->cntv_ctl; | 477 | return vtimer->cnt_ctl; |
398 | case KVM_REG_ARM_TIMER_CNT: | 478 | case KVM_REG_ARM_TIMER_CNT: |
399 | return kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; | 479 | return kvm_phys_timer_read() - vtimer->cntvoff; |
400 | case KVM_REG_ARM_TIMER_CVAL: | 480 | case KVM_REG_ARM_TIMER_CVAL: |
401 | return timer->cntv_cval; | 481 | return vtimer->cnt_cval; |
402 | } | 482 | } |
403 | return (u64)-1; | 483 | return (u64)-1; |
404 | } | 484 | } |
@@ -462,14 +542,16 @@ int kvm_timer_hyp_init(void) | |||
462 | void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) | 542 | void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) |
463 | { | 543 | { |
464 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 544 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
545 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | ||
465 | 546 | ||
466 | timer_disarm(timer); | 547 | timer_disarm(timer); |
467 | kvm_vgic_unmap_phys_irq(vcpu, timer->irq.irq); | 548 | kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq); |
468 | } | 549 | } |
469 | 550 | ||
470 | int kvm_timer_enable(struct kvm_vcpu *vcpu) | 551 | int kvm_timer_enable(struct kvm_vcpu *vcpu) |
471 | { | 552 | { |
472 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 553 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
554 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | ||
473 | struct irq_desc *desc; | 555 | struct irq_desc *desc; |
474 | struct irq_data *data; | 556 | struct irq_data *data; |
475 | int phys_irq; | 557 | int phys_irq; |
@@ -497,7 +579,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) | |||
497 | * Tell the VGIC that the virtual interrupt is tied to a | 579 | * Tell the VGIC that the virtual interrupt is tied to a |
498 | * physical interrupt. We do that once per VCPU. | 580 | * physical interrupt. We do that once per VCPU. |
499 | */ | 581 | */ |
500 | ret = kvm_vgic_map_phys_irq(vcpu, timer->irq.irq, phys_irq); | 582 | ret = kvm_vgic_map_phys_irq(vcpu, vtimer->irq.irq, phys_irq); |
501 | if (ret) | 583 | if (ret) |
502 | return ret; | 584 | return ret; |
503 | 585 | ||
@@ -506,11 +588,6 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) | |||
506 | return 0; | 588 | return 0; |
507 | } | 589 | } |
508 | 590 | ||
509 | void kvm_timer_init(struct kvm *kvm) | ||
510 | { | ||
511 | kvm->arch.timer.cntvoff = kvm_phys_timer_read(); | ||
512 | } | ||
513 | |||
514 | /* | 591 | /* |
515 | * On VHE system, we only need to configure trap on physical timer and counter | 592 | * On VHE system, we only need to configure trap on physical timer and counter |
516 | * accesses in EL0 and EL1 once, not for every world switch. | 593 | * accesses in EL0 and EL1 once, not for every world switch. |
diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c index 63e28dd18bb0..4734915ab71f 100644 --- a/virt/kvm/arm/hyp/timer-sr.c +++ b/virt/kvm/arm/hyp/timer-sr.c | |||
@@ -25,11 +25,12 @@ | |||
25 | void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) | 25 | void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) |
26 | { | 26 | { |
27 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 27 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
28 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | ||
28 | u64 val; | 29 | u64 val; |
29 | 30 | ||
30 | if (timer->enabled) { | 31 | if (timer->enabled) { |
31 | timer->cntv_ctl = read_sysreg_el0(cntv_ctl); | 32 | vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); |
32 | timer->cntv_cval = read_sysreg_el0(cntv_cval); | 33 | vtimer->cnt_cval = read_sysreg_el0(cntv_cval); |
33 | } | 34 | } |
34 | 35 | ||
35 | /* Disable the virtual timer */ | 36 | /* Disable the virtual timer */ |
@@ -52,8 +53,8 @@ void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) | |||
52 | 53 | ||
53 | void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu) | 54 | void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu) |
54 | { | 55 | { |
55 | struct kvm *kvm = kern_hyp_va(vcpu->kvm); | ||
56 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 56 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
57 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | ||
57 | u64 val; | 58 | u64 val; |
58 | 59 | ||
59 | /* Those bits are already configured at boot on VHE-system */ | 60 | /* Those bits are already configured at boot on VHE-system */ |
@@ -69,9 +70,9 @@ void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu) | |||
69 | } | 70 | } |
70 | 71 | ||
71 | if (timer->enabled) { | 72 | if (timer->enabled) { |
72 | write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2); | 73 | write_sysreg(vtimer->cntvoff, cntvoff_el2); |
73 | write_sysreg_el0(timer->cntv_cval, cntv_cval); | 74 | write_sysreg_el0(vtimer->cnt_cval, cntv_cval); |
74 | isb(); | 75 | isb(); |
75 | write_sysreg_el0(timer->cntv_ctl, cntv_ctl); | 76 | write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl); |
76 | } | 77 | } |
77 | } | 78 | } |
diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/virt/kvm/arm/vgic/vgic-debug.c new file mode 100644 index 000000000000..7072ab743332 --- /dev/null +++ b/virt/kvm/arm/vgic/vgic-debug.c | |||
@@ -0,0 +1,283 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2016 Linaro | ||
3 | * Author: Christoffer Dall <christoffer.dall@linaro.org> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
16 | */ | ||
17 | |||
18 | #include <linux/cpu.h> | ||
19 | #include <linux/debugfs.h> | ||
20 | #include <linux/interrupt.h> | ||
21 | #include <linux/kvm_host.h> | ||
22 | #include <linux/seq_file.h> | ||
23 | #include <kvm/arm_vgic.h> | ||
24 | #include <asm/kvm_mmu.h> | ||
25 | #include "vgic.h" | ||
26 | |||
27 | /* | ||
28 | * Structure to control looping through the entire vgic state. We start at | ||
29 | * zero for each field and move upwards. So, if dist_id is 0 we print the | ||
30 | * distributor info. When dist_id is 1, we have already printed it and move | ||
31 | * on. | ||
32 | * | ||
33 | * When vcpu_id < nr_cpus we print the vcpu info until vcpu_id == nr_cpus and | ||
34 | * so on. | ||
35 | */ | ||
36 | struct vgic_state_iter { | ||
37 | int nr_cpus; | ||
38 | int nr_spis; | ||
39 | int dist_id; | ||
40 | int vcpu_id; | ||
41 | int intid; | ||
42 | }; | ||
43 | |||
44 | static void iter_next(struct vgic_state_iter *iter) | ||
45 | { | ||
46 | if (iter->dist_id == 0) { | ||
47 | iter->dist_id++; | ||
48 | return; | ||
49 | } | ||
50 | |||
51 | iter->intid++; | ||
52 | if (iter->intid == VGIC_NR_PRIVATE_IRQS && | ||
53 | ++iter->vcpu_id < iter->nr_cpus) | ||
54 | iter->intid = 0; | ||
55 | } | ||
56 | |||
57 | static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter, | ||
58 | loff_t pos) | ||
59 | { | ||
60 | int nr_cpus = atomic_read(&kvm->online_vcpus); | ||
61 | |||
62 | memset(iter, 0, sizeof(*iter)); | ||
63 | |||
64 | iter->nr_cpus = nr_cpus; | ||
65 | iter->nr_spis = kvm->arch.vgic.nr_spis; | ||
66 | |||
67 | /* Fast forward to the right position if needed */ | ||
68 | while (pos--) | ||
69 | iter_next(iter); | ||
70 | } | ||
71 | |||
72 | static bool end_of_vgic(struct vgic_state_iter *iter) | ||
73 | { | ||
74 | return iter->dist_id > 0 && | ||
75 | iter->vcpu_id == iter->nr_cpus && | ||
76 | (iter->intid - VGIC_NR_PRIVATE_IRQS) == iter->nr_spis; | ||
77 | } | ||
78 | |||
79 | static void *vgic_debug_start(struct seq_file *s, loff_t *pos) | ||
80 | { | ||
81 | struct kvm *kvm = (struct kvm *)s->private; | ||
82 | struct vgic_state_iter *iter; | ||
83 | |||
84 | mutex_lock(&kvm->lock); | ||
85 | iter = kvm->arch.vgic.iter; | ||
86 | if (iter) { | ||
87 | iter = ERR_PTR(-EBUSY); | ||
88 | goto out; | ||
89 | } | ||
90 | |||
91 | iter = kmalloc(sizeof(*iter), GFP_KERNEL); | ||
92 | if (!iter) { | ||
93 | iter = ERR_PTR(-ENOMEM); | ||
94 | goto out; | ||
95 | } | ||
96 | |||
97 | iter_init(kvm, iter, *pos); | ||
98 | kvm->arch.vgic.iter = iter; | ||
99 | |||
100 | if (end_of_vgic(iter)) | ||
101 | iter = NULL; | ||
102 | out: | ||
103 | mutex_unlock(&kvm->lock); | ||
104 | return iter; | ||
105 | } | ||
106 | |||
107 | static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos) | ||
108 | { | ||
109 | struct kvm *kvm = (struct kvm *)s->private; | ||
110 | struct vgic_state_iter *iter = kvm->arch.vgic.iter; | ||
111 | |||
112 | ++*pos; | ||
113 | iter_next(iter); | ||
114 | if (end_of_vgic(iter)) | ||
115 | iter = NULL; | ||
116 | return iter; | ||
117 | } | ||
118 | |||
119 | static void vgic_debug_stop(struct seq_file *s, void *v) | ||
120 | { | ||
121 | struct kvm *kvm = (struct kvm *)s->private; | ||
122 | struct vgic_state_iter *iter; | ||
123 | |||
124 | /* | ||
125 | * If the seq file wasn't properly opened, there's nothing to clearn | ||
126 | * up. | ||
127 | */ | ||
128 | if (IS_ERR(v)) | ||
129 | return; | ||
130 | |||
131 | mutex_lock(&kvm->lock); | ||
132 | iter = kvm->arch.vgic.iter; | ||
133 | kfree(iter); | ||
134 | kvm->arch.vgic.iter = NULL; | ||
135 | mutex_unlock(&kvm->lock); | ||
136 | } | ||
137 | |||
138 | static void print_dist_state(struct seq_file *s, struct vgic_dist *dist) | ||
139 | { | ||
140 | seq_printf(s, "Distributor\n"); | ||
141 | seq_printf(s, "===========\n"); | ||
142 | seq_printf(s, "vgic_model:\t%s\n", | ||
143 | (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) ? | ||
144 | "GICv3" : "GICv2"); | ||
145 | seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis); | ||
146 | seq_printf(s, "enabled:\t%d\n", dist->enabled); | ||
147 | seq_printf(s, "\n"); | ||
148 | |||
149 | seq_printf(s, "P=pending_latch, L=line_level, A=active\n"); | ||
150 | seq_printf(s, "E=enabled, H=hw, C=config (level=1, edge=0)\n"); | ||
151 | } | ||
152 | |||
153 | static void print_header(struct seq_file *s, struct vgic_irq *irq, | ||
154 | struct kvm_vcpu *vcpu) | ||
155 | { | ||
156 | int id = 0; | ||
157 | char *hdr = "SPI "; | ||
158 | |||
159 | if (vcpu) { | ||
160 | hdr = "VCPU"; | ||
161 | id = vcpu->vcpu_id; | ||
162 | } | ||
163 | |||
164 | seq_printf(s, "\n"); | ||
165 | seq_printf(s, "%s%2d TYP ID TGT_ID PLAEHC HWID TARGET SRC PRI VCPU_ID\n", hdr, id); | ||
166 | seq_printf(s, "---------------------------------------------------------------\n"); | ||
167 | } | ||
168 | |||
169 | static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, | ||
170 | struct kvm_vcpu *vcpu) | ||
171 | { | ||
172 | char *type; | ||
173 | if (irq->intid < VGIC_NR_SGIS) | ||
174 | type = "SGI"; | ||
175 | else if (irq->intid < VGIC_NR_PRIVATE_IRQS) | ||
176 | type = "PPI"; | ||
177 | else | ||
178 | type = "SPI"; | ||
179 | |||
180 | if (irq->intid ==0 || irq->intid == VGIC_NR_PRIVATE_IRQS) | ||
181 | print_header(s, irq, vcpu); | ||
182 | |||
183 | seq_printf(s, " %s %4d " | ||
184 | " %2d " | ||
185 | "%d%d%d%d%d%d " | ||
186 | "%8d " | ||
187 | "%8x " | ||
188 | " %2x " | ||
189 | "%3d " | ||
190 | " %2d " | ||
191 | "\n", | ||
192 | type, irq->intid, | ||
193 | (irq->target_vcpu) ? irq->target_vcpu->vcpu_id : -1, | ||
194 | irq->pending_latch, | ||
195 | irq->line_level, | ||
196 | irq->active, | ||
197 | irq->enabled, | ||
198 | irq->hw, | ||
199 | irq->config == VGIC_CONFIG_LEVEL, | ||
200 | irq->hwintid, | ||
201 | irq->mpidr, | ||
202 | irq->source, | ||
203 | irq->priority, | ||
204 | (irq->vcpu) ? irq->vcpu->vcpu_id : -1); | ||
205 | |||
206 | } | ||
207 | |||
208 | static int vgic_debug_show(struct seq_file *s, void *v) | ||
209 | { | ||
210 | struct kvm *kvm = (struct kvm *)s->private; | ||
211 | struct vgic_state_iter *iter = (struct vgic_state_iter *)v; | ||
212 | struct vgic_irq *irq; | ||
213 | struct kvm_vcpu *vcpu = NULL; | ||
214 | |||
215 | if (iter->dist_id == 0) { | ||
216 | print_dist_state(s, &kvm->arch.vgic); | ||
217 | return 0; | ||
218 | } | ||
219 | |||
220 | if (!kvm->arch.vgic.initialized) | ||
221 | return 0; | ||
222 | |||
223 | if (iter->vcpu_id < iter->nr_cpus) { | ||
224 | vcpu = kvm_get_vcpu(kvm, iter->vcpu_id); | ||
225 | irq = &vcpu->arch.vgic_cpu.private_irqs[iter->intid]; | ||
226 | } else { | ||
227 | irq = &kvm->arch.vgic.spis[iter->intid - VGIC_NR_PRIVATE_IRQS]; | ||
228 | } | ||
229 | |||
230 | spin_lock(&irq->irq_lock); | ||
231 | print_irq_state(s, irq, vcpu); | ||
232 | spin_unlock(&irq->irq_lock); | ||
233 | |||
234 | return 0; | ||
235 | } | ||
236 | |||
237 | static struct seq_operations vgic_debug_seq_ops = { | ||
238 | .start = vgic_debug_start, | ||
239 | .next = vgic_debug_next, | ||
240 | .stop = vgic_debug_stop, | ||
241 | .show = vgic_debug_show | ||
242 | }; | ||
243 | |||
244 | static int debug_open(struct inode *inode, struct file *file) | ||
245 | { | ||
246 | int ret; | ||
247 | ret = seq_open(file, &vgic_debug_seq_ops); | ||
248 | if (!ret) { | ||
249 | struct seq_file *seq; | ||
250 | /* seq_open will have modified file->private_data */ | ||
251 | seq = file->private_data; | ||
252 | seq->private = inode->i_private; | ||
253 | } | ||
254 | |||
255 | return ret; | ||
256 | }; | ||
257 | |||
258 | static struct file_operations vgic_debug_fops = { | ||
259 | .owner = THIS_MODULE, | ||
260 | .open = debug_open, | ||
261 | .read = seq_read, | ||
262 | .llseek = seq_lseek, | ||
263 | .release = seq_release | ||
264 | }; | ||
265 | |||
266 | int vgic_debug_init(struct kvm *kvm) | ||
267 | { | ||
268 | if (!kvm->debugfs_dentry) | ||
269 | return -ENOENT; | ||
270 | |||
271 | if (!debugfs_create_file("vgic-state", 0444, | ||
272 | kvm->debugfs_dentry, | ||
273 | kvm, | ||
274 | &vgic_debug_fops)) | ||
275 | return -ENOMEM; | ||
276 | |||
277 | return 0; | ||
278 | } | ||
279 | |||
280 | int vgic_debug_destroy(struct kvm *kvm) | ||
281 | { | ||
282 | return 0; | ||
283 | } | ||
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index c737ea0a310a..276139a24e6f 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c | |||
@@ -259,6 +259,8 @@ int vgic_init(struct kvm *kvm) | |||
259 | if (ret) | 259 | if (ret) |
260 | goto out; | 260 | goto out; |
261 | 261 | ||
262 | vgic_debug_init(kvm); | ||
263 | |||
262 | dist->initialized = true; | 264 | dist->initialized = true; |
263 | out: | 265 | out: |
264 | return ret; | 266 | return ret; |
@@ -288,6 +290,8 @@ static void __kvm_vgic_destroy(struct kvm *kvm) | |||
288 | struct kvm_vcpu *vcpu; | 290 | struct kvm_vcpu *vcpu; |
289 | int i; | 291 | int i; |
290 | 292 | ||
293 | vgic_debug_destroy(kvm); | ||
294 | |||
291 | kvm_vgic_dist_destroy(kvm); | 295 | kvm_vgic_dist_destroy(kvm); |
292 | 296 | ||
293 | kvm_for_each_vcpu(i, vcpu, kvm) | 297 | kvm_for_each_vcpu(i, vcpu, kvm) |
diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c index d918dcf26a5a..f138ed2e9c63 100644 --- a/virt/kvm/arm/vgic/vgic-irqfd.c +++ b/virt/kvm/arm/vgic/vgic-irqfd.c | |||
@@ -99,6 +99,9 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | |||
99 | if (!vgic_has_its(kvm)) | 99 | if (!vgic_has_its(kvm)) |
100 | return -ENODEV; | 100 | return -ENODEV; |
101 | 101 | ||
102 | if (!level) | ||
103 | return -1; | ||
104 | |||
102 | return vgic_its_inject_msi(kvm, &msi); | 105 | return vgic_its_inject_msi(kvm, &msi); |
103 | } | 106 | } |
104 | 107 | ||
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 8c2b3cdcb2c5..571b64a01c50 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c | |||
@@ -350,7 +350,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) | |||
350 | 350 | ||
351 | irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]); | 351 | irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]); |
352 | spin_lock(&irq->irq_lock); | 352 | spin_lock(&irq->irq_lock); |
353 | irq->pending = pendmask & (1U << bit_nr); | 353 | irq->pending_latch = pendmask & (1U << bit_nr); |
354 | vgic_queue_irq_unlock(vcpu->kvm, irq); | 354 | vgic_queue_irq_unlock(vcpu->kvm, irq); |
355 | vgic_put_irq(vcpu->kvm, irq); | 355 | vgic_put_irq(vcpu->kvm, irq); |
356 | } | 356 | } |
@@ -465,7 +465,7 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, | |||
465 | return -EBUSY; | 465 | return -EBUSY; |
466 | 466 | ||
467 | spin_lock(&itte->irq->irq_lock); | 467 | spin_lock(&itte->irq->irq_lock); |
468 | itte->irq->pending = true; | 468 | itte->irq->pending_latch = true; |
469 | vgic_queue_irq_unlock(kvm, itte->irq); | 469 | vgic_queue_irq_unlock(kvm, itte->irq); |
470 | 470 | ||
471 | return 0; | 471 | return 0; |
@@ -913,7 +913,7 @@ static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its, | |||
913 | if (!itte) | 913 | if (!itte) |
914 | return E_ITS_CLEAR_UNMAPPED_INTERRUPT; | 914 | return E_ITS_CLEAR_UNMAPPED_INTERRUPT; |
915 | 915 | ||
916 | itte->irq->pending = false; | 916 | itte->irq->pending_latch = false; |
917 | 917 | ||
918 | return 0; | 918 | return 0; |
919 | } | 919 | } |
diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c index fbe87a63d250..d181d2baee9c 100644 --- a/virt/kvm/arm/vgic/vgic-kvm-device.c +++ b/virt/kvm/arm/vgic/vgic-kvm-device.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <kvm/arm_vgic.h> | 17 | #include <kvm/arm_vgic.h> |
18 | #include <linux/uaccess.h> | 18 | #include <linux/uaccess.h> |
19 | #include <asm/kvm_mmu.h> | 19 | #include <asm/kvm_mmu.h> |
20 | #include <asm/cputype.h> | ||
20 | #include "vgic.h" | 21 | #include "vgic.h" |
21 | 22 | ||
22 | /* common helpers */ | 23 | /* common helpers */ |
@@ -230,14 +231,8 @@ int kvm_register_vgic_device(unsigned long type) | |||
230 | return ret; | 231 | return ret; |
231 | } | 232 | } |
232 | 233 | ||
233 | struct vgic_reg_attr { | 234 | int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, |
234 | struct kvm_vcpu *vcpu; | 235 | struct vgic_reg_attr *reg_attr) |
235 | gpa_t addr; | ||
236 | }; | ||
237 | |||
238 | static int parse_vgic_v2_attr(struct kvm_device *dev, | ||
239 | struct kvm_device_attr *attr, | ||
240 | struct vgic_reg_attr *reg_attr) | ||
241 | { | 236 | { |
242 | int cpuid; | 237 | int cpuid; |
243 | 238 | ||
@@ -292,14 +287,14 @@ static bool lock_all_vcpus(struct kvm *kvm) | |||
292 | } | 287 | } |
293 | 288 | ||
294 | /** | 289 | /** |
295 | * vgic_attr_regs_access_v2 - allows user space to access VGIC v2 state | 290 | * vgic_v2_attr_regs_access - allows user space to access VGIC v2 state |
296 | * | 291 | * |
297 | * @dev: kvm device handle | 292 | * @dev: kvm device handle |
298 | * @attr: kvm device attribute | 293 | * @attr: kvm device attribute |
299 | * @reg: address the value is read or written | 294 | * @reg: address the value is read or written |
300 | * @is_write: true if userspace is writing a register | 295 | * @is_write: true if userspace is writing a register |
301 | */ | 296 | */ |
302 | static int vgic_attr_regs_access_v2(struct kvm_device *dev, | 297 | static int vgic_v2_attr_regs_access(struct kvm_device *dev, |
303 | struct kvm_device_attr *attr, | 298 | struct kvm_device_attr *attr, |
304 | u32 *reg, bool is_write) | 299 | u32 *reg, bool is_write) |
305 | { | 300 | { |
@@ -308,7 +303,7 @@ static int vgic_attr_regs_access_v2(struct kvm_device *dev, | |||
308 | struct kvm_vcpu *vcpu; | 303 | struct kvm_vcpu *vcpu; |
309 | int ret; | 304 | int ret; |
310 | 305 | ||
311 | ret = parse_vgic_v2_attr(dev, attr, ®_attr); | 306 | ret = vgic_v2_parse_attr(dev, attr, ®_attr); |
312 | if (ret) | 307 | if (ret) |
313 | return ret; | 308 | return ret; |
314 | 309 | ||
@@ -362,7 +357,7 @@ static int vgic_v2_set_attr(struct kvm_device *dev, | |||
362 | if (get_user(reg, uaddr)) | 357 | if (get_user(reg, uaddr)) |
363 | return -EFAULT; | 358 | return -EFAULT; |
364 | 359 | ||
365 | return vgic_attr_regs_access_v2(dev, attr, ®, true); | 360 | return vgic_v2_attr_regs_access(dev, attr, ®, true); |
366 | } | 361 | } |
367 | } | 362 | } |
368 | 363 | ||
@@ -384,7 +379,7 @@ static int vgic_v2_get_attr(struct kvm_device *dev, | |||
384 | u32 __user *uaddr = (u32 __user *)(long)attr->addr; | 379 | u32 __user *uaddr = (u32 __user *)(long)attr->addr; |
385 | u32 reg = 0; | 380 | u32 reg = 0; |
386 | 381 | ||
387 | ret = vgic_attr_regs_access_v2(dev, attr, ®, false); | 382 | ret = vgic_v2_attr_regs_access(dev, attr, ®, false); |
388 | if (ret) | 383 | if (ret) |
389 | return ret; | 384 | return ret; |
390 | return put_user(reg, uaddr); | 385 | return put_user(reg, uaddr); |
@@ -428,16 +423,211 @@ struct kvm_device_ops kvm_arm_vgic_v2_ops = { | |||
428 | .has_attr = vgic_v2_has_attr, | 423 | .has_attr = vgic_v2_has_attr, |
429 | }; | 424 | }; |
430 | 425 | ||
426 | int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, | ||
427 | struct vgic_reg_attr *reg_attr) | ||
428 | { | ||
429 | unsigned long vgic_mpidr, mpidr_reg; | ||
430 | |||
431 | /* | ||
432 | * For KVM_DEV_ARM_VGIC_GRP_DIST_REGS group, | ||
433 | * attr might not hold MPIDR. Hence assume vcpu0. | ||
434 | */ | ||
435 | if (attr->group != KVM_DEV_ARM_VGIC_GRP_DIST_REGS) { | ||
436 | vgic_mpidr = (attr->attr & KVM_DEV_ARM_VGIC_V3_MPIDR_MASK) >> | ||
437 | KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT; | ||
438 | |||
439 | mpidr_reg = VGIC_TO_MPIDR(vgic_mpidr); | ||
440 | reg_attr->vcpu = kvm_mpidr_to_vcpu(dev->kvm, mpidr_reg); | ||
441 | } else { | ||
442 | reg_attr->vcpu = kvm_get_vcpu(dev->kvm, 0); | ||
443 | } | ||
444 | |||
445 | if (!reg_attr->vcpu) | ||
446 | return -EINVAL; | ||
447 | |||
448 | reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; | ||
449 | |||
450 | return 0; | ||
451 | } | ||
452 | |||
453 | /* | ||
454 | * vgic_v3_attr_regs_access - allows user space to access VGIC v3 state | ||
455 | * | ||
456 | * @dev: kvm device handle | ||
457 | * @attr: kvm device attribute | ||
458 | * @reg: address the value is read or written | ||
459 | * @is_write: true if userspace is writing a register | ||
460 | */ | ||
461 | static int vgic_v3_attr_regs_access(struct kvm_device *dev, | ||
462 | struct kvm_device_attr *attr, | ||
463 | u64 *reg, bool is_write) | ||
464 | { | ||
465 | struct vgic_reg_attr reg_attr; | ||
466 | gpa_t addr; | ||
467 | struct kvm_vcpu *vcpu; | ||
468 | int ret; | ||
469 | u32 tmp32; | ||
470 | |||
471 | ret = vgic_v3_parse_attr(dev, attr, ®_attr); | ||
472 | if (ret) | ||
473 | return ret; | ||
474 | |||
475 | vcpu = reg_attr.vcpu; | ||
476 | addr = reg_attr.addr; | ||
477 | |||
478 | mutex_lock(&dev->kvm->lock); | ||
479 | |||
480 | if (unlikely(!vgic_initialized(dev->kvm))) { | ||
481 | ret = -EBUSY; | ||
482 | goto out; | ||
483 | } | ||
484 | |||
485 | if (!lock_all_vcpus(dev->kvm)) { | ||
486 | ret = -EBUSY; | ||
487 | goto out; | ||
488 | } | ||
489 | |||
490 | switch (attr->group) { | ||
491 | case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: | ||
492 | if (is_write) | ||
493 | tmp32 = *reg; | ||
494 | |||
495 | ret = vgic_v3_dist_uaccess(vcpu, is_write, addr, &tmp32); | ||
496 | if (!is_write) | ||
497 | *reg = tmp32; | ||
498 | break; | ||
499 | case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: | ||
500 | if (is_write) | ||
501 | tmp32 = *reg; | ||
502 | |||
503 | ret = vgic_v3_redist_uaccess(vcpu, is_write, addr, &tmp32); | ||
504 | if (!is_write) | ||
505 | *reg = tmp32; | ||
506 | break; | ||
507 | case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { | ||
508 | u64 regid; | ||
509 | |||
510 | regid = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK); | ||
511 | ret = vgic_v3_cpu_sysregs_uaccess(vcpu, is_write, | ||
512 | regid, reg); | ||
513 | break; | ||
514 | } | ||
515 | case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { | ||
516 | unsigned int info, intid; | ||
517 | |||
518 | info = (attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK) >> | ||
519 | KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT; | ||
520 | if (info == VGIC_LEVEL_INFO_LINE_LEVEL) { | ||
521 | intid = attr->attr & | ||
522 | KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK; | ||
523 | ret = vgic_v3_line_level_info_uaccess(vcpu, is_write, | ||
524 | intid, reg); | ||
525 | } else { | ||
526 | ret = -EINVAL; | ||
527 | } | ||
528 | break; | ||
529 | } | ||
530 | default: | ||
531 | ret = -EINVAL; | ||
532 | break; | ||
533 | } | ||
534 | |||
535 | unlock_all_vcpus(dev->kvm); | ||
536 | out: | ||
537 | mutex_unlock(&dev->kvm->lock); | ||
538 | return ret; | ||
539 | } | ||
540 | |||
431 | static int vgic_v3_set_attr(struct kvm_device *dev, | 541 | static int vgic_v3_set_attr(struct kvm_device *dev, |
432 | struct kvm_device_attr *attr) | 542 | struct kvm_device_attr *attr) |
433 | { | 543 | { |
434 | return vgic_set_common_attr(dev, attr); | 544 | int ret; |
545 | |||
546 | ret = vgic_set_common_attr(dev, attr); | ||
547 | if (ret != -ENXIO) | ||
548 | return ret; | ||
549 | |||
550 | switch (attr->group) { | ||
551 | case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: | ||
552 | case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: { | ||
553 | u32 __user *uaddr = (u32 __user *)(long)attr->addr; | ||
554 | u32 tmp32; | ||
555 | u64 reg; | ||
556 | |||
557 | if (get_user(tmp32, uaddr)) | ||
558 | return -EFAULT; | ||
559 | |||
560 | reg = tmp32; | ||
561 | return vgic_v3_attr_regs_access(dev, attr, ®, true); | ||
562 | } | ||
563 | case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { | ||
564 | u64 __user *uaddr = (u64 __user *)(long)attr->addr; | ||
565 | u64 reg; | ||
566 | |||
567 | if (get_user(reg, uaddr)) | ||
568 | return -EFAULT; | ||
569 | |||
570 | return vgic_v3_attr_regs_access(dev, attr, ®, true); | ||
571 | } | ||
572 | case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { | ||
573 | u32 __user *uaddr = (u32 __user *)(long)attr->addr; | ||
574 | u64 reg; | ||
575 | u32 tmp32; | ||
576 | |||
577 | if (get_user(tmp32, uaddr)) | ||
578 | return -EFAULT; | ||
579 | |||
580 | reg = tmp32; | ||
581 | return vgic_v3_attr_regs_access(dev, attr, ®, true); | ||
582 | } | ||
583 | } | ||
584 | return -ENXIO; | ||
435 | } | 585 | } |
436 | 586 | ||
437 | static int vgic_v3_get_attr(struct kvm_device *dev, | 587 | static int vgic_v3_get_attr(struct kvm_device *dev, |
438 | struct kvm_device_attr *attr) | 588 | struct kvm_device_attr *attr) |
439 | { | 589 | { |
440 | return vgic_get_common_attr(dev, attr); | 590 | int ret; |
591 | |||
592 | ret = vgic_get_common_attr(dev, attr); | ||
593 | if (ret != -ENXIO) | ||
594 | return ret; | ||
595 | |||
596 | switch (attr->group) { | ||
597 | case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: | ||
598 | case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: { | ||
599 | u32 __user *uaddr = (u32 __user *)(long)attr->addr; | ||
600 | u64 reg; | ||
601 | u32 tmp32; | ||
602 | |||
603 | ret = vgic_v3_attr_regs_access(dev, attr, ®, false); | ||
604 | if (ret) | ||
605 | return ret; | ||
606 | tmp32 = reg; | ||
607 | return put_user(tmp32, uaddr); | ||
608 | } | ||
609 | case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { | ||
610 | u64 __user *uaddr = (u64 __user *)(long)attr->addr; | ||
611 | u64 reg; | ||
612 | |||
613 | ret = vgic_v3_attr_regs_access(dev, attr, ®, false); | ||
614 | if (ret) | ||
615 | return ret; | ||
616 | return put_user(reg, uaddr); | ||
617 | } | ||
618 | case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { | ||
619 | u32 __user *uaddr = (u32 __user *)(long)attr->addr; | ||
620 | u64 reg; | ||
621 | u32 tmp32; | ||
622 | |||
623 | ret = vgic_v3_attr_regs_access(dev, attr, ®, false); | ||
624 | if (ret) | ||
625 | return ret; | ||
626 | tmp32 = reg; | ||
627 | return put_user(tmp32, uaddr); | ||
628 | } | ||
629 | } | ||
630 | return -ENXIO; | ||
441 | } | 631 | } |
442 | 632 | ||
443 | static int vgic_v3_has_attr(struct kvm_device *dev, | 633 | static int vgic_v3_has_attr(struct kvm_device *dev, |
@@ -451,8 +641,19 @@ static int vgic_v3_has_attr(struct kvm_device *dev, | |||
451 | return 0; | 641 | return 0; |
452 | } | 642 | } |
453 | break; | 643 | break; |
644 | case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: | ||
645 | case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: | ||
646 | case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: | ||
647 | return vgic_v3_has_attr_regs(dev, attr); | ||
454 | case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: | 648 | case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: |
455 | return 0; | 649 | return 0; |
650 | case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { | ||
651 | if (((attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK) >> | ||
652 | KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) == | ||
653 | VGIC_LEVEL_INFO_LINE_LEVEL) | ||
654 | return 0; | ||
655 | break; | ||
656 | } | ||
456 | case KVM_DEV_ARM_VGIC_GRP_CTRL: | 657 | case KVM_DEV_ARM_VGIC_GRP_CTRL: |
457 | switch (attr->attr) { | 658 | switch (attr->attr) { |
458 | case KVM_DEV_ARM_VGIC_CTRL_INIT: | 659 | case KVM_DEV_ARM_VGIC_CTRL_INIT: |
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index 78e34bc4d89b..a3ad7ff95c9b 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c | |||
@@ -98,7 +98,7 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu, | |||
98 | irq = vgic_get_irq(source_vcpu->kvm, vcpu, intid); | 98 | irq = vgic_get_irq(source_vcpu->kvm, vcpu, intid); |
99 | 99 | ||
100 | spin_lock(&irq->irq_lock); | 100 | spin_lock(&irq->irq_lock); |
101 | irq->pending = true; | 101 | irq->pending_latch = true; |
102 | irq->source |= 1U << source_vcpu->vcpu_id; | 102 | irq->source |= 1U << source_vcpu->vcpu_id; |
103 | 103 | ||
104 | vgic_queue_irq_unlock(source_vcpu->kvm, irq); | 104 | vgic_queue_irq_unlock(source_vcpu->kvm, irq); |
@@ -182,7 +182,7 @@ static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu, | |||
182 | 182 | ||
183 | irq->source &= ~((val >> (i * 8)) & 0xff); | 183 | irq->source &= ~((val >> (i * 8)) & 0xff); |
184 | if (!irq->source) | 184 | if (!irq->source) |
185 | irq->pending = false; | 185 | irq->pending_latch = false; |
186 | 186 | ||
187 | spin_unlock(&irq->irq_lock); | 187 | spin_unlock(&irq->irq_lock); |
188 | vgic_put_irq(vcpu->kvm, irq); | 188 | vgic_put_irq(vcpu->kvm, irq); |
@@ -204,7 +204,7 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu, | |||
204 | irq->source |= (val >> (i * 8)) & 0xff; | 204 | irq->source |= (val >> (i * 8)) & 0xff; |
205 | 205 | ||
206 | if (irq->source) { | 206 | if (irq->source) { |
207 | irq->pending = true; | 207 | irq->pending_latch = true; |
208 | vgic_queue_irq_unlock(vcpu->kvm, irq); | 208 | vgic_queue_irq_unlock(vcpu->kvm, irq); |
209 | } else { | 209 | } else { |
210 | spin_unlock(&irq->irq_lock); | 210 | spin_unlock(&irq->irq_lock); |
@@ -213,22 +213,6 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu, | |||
213 | } | 213 | } |
214 | } | 214 | } |
215 | 215 | ||
216 | static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) | ||
217 | { | ||
218 | if (kvm_vgic_global_state.type == VGIC_V2) | ||
219 | vgic_v2_set_vmcr(vcpu, vmcr); | ||
220 | else | ||
221 | vgic_v3_set_vmcr(vcpu, vmcr); | ||
222 | } | ||
223 | |||
224 | static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) | ||
225 | { | ||
226 | if (kvm_vgic_global_state.type == VGIC_V2) | ||
227 | vgic_v2_get_vmcr(vcpu, vmcr); | ||
228 | else | ||
229 | vgic_v3_get_vmcr(vcpu, vmcr); | ||
230 | } | ||
231 | |||
232 | #define GICC_ARCH_VERSION_V2 0x2 | 216 | #define GICC_ARCH_VERSION_V2 0x2 |
233 | 217 | ||
234 | /* These are for userland accesses only, there is no guest-facing emulation. */ | 218 | /* These are for userland accesses only, there is no guest-facing emulation. */ |
@@ -369,21 +353,30 @@ unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev) | |||
369 | 353 | ||
370 | int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) | 354 | int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) |
371 | { | 355 | { |
372 | int nr_irqs = dev->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; | 356 | const struct vgic_register_region *region; |
373 | const struct vgic_register_region *regions; | 357 | struct vgic_io_device iodev; |
358 | struct vgic_reg_attr reg_attr; | ||
359 | struct kvm_vcpu *vcpu; | ||
374 | gpa_t addr; | 360 | gpa_t addr; |
375 | int nr_regions, i, len; | 361 | int ret; |
362 | |||
363 | ret = vgic_v2_parse_attr(dev, attr, ®_attr); | ||
364 | if (ret) | ||
365 | return ret; | ||
376 | 366 | ||
377 | addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; | 367 | vcpu = reg_attr.vcpu; |
368 | addr = reg_attr.addr; | ||
378 | 369 | ||
379 | switch (attr->group) { | 370 | switch (attr->group) { |
380 | case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: | 371 | case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: |
381 | regions = vgic_v2_dist_registers; | 372 | iodev.regions = vgic_v2_dist_registers; |
382 | nr_regions = ARRAY_SIZE(vgic_v2_dist_registers); | 373 | iodev.nr_regions = ARRAY_SIZE(vgic_v2_dist_registers); |
374 | iodev.base_addr = 0; | ||
383 | break; | 375 | break; |
384 | case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: | 376 | case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: |
385 | regions = vgic_v2_cpu_registers; | 377 | iodev.regions = vgic_v2_cpu_registers; |
386 | nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers); | 378 | iodev.nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers); |
379 | iodev.base_addr = 0; | ||
387 | break; | 380 | break; |
388 | default: | 381 | default: |
389 | return -ENXIO; | 382 | return -ENXIO; |
@@ -393,43 +386,11 @@ int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) | |||
393 | if (addr & 3) | 386 | if (addr & 3) |
394 | return -ENXIO; | 387 | return -ENXIO; |
395 | 388 | ||
396 | for (i = 0; i < nr_regions; i++) { | 389 | region = vgic_get_mmio_region(vcpu, &iodev, addr, sizeof(u32)); |
397 | if (regions[i].bits_per_irq) | 390 | if (!region) |
398 | len = (regions[i].bits_per_irq * nr_irqs) / 8; | 391 | return -ENXIO; |
399 | else | ||
400 | len = regions[i].len; | ||
401 | |||
402 | if (regions[i].reg_offset <= addr && | ||
403 | regions[i].reg_offset + len > addr) | ||
404 | return 0; | ||
405 | } | ||
406 | |||
407 | return -ENXIO; | ||
408 | } | ||
409 | |||
410 | /* | ||
411 | * When userland tries to access the VGIC register handlers, we need to | ||
412 | * create a usable struct vgic_io_device to be passed to the handlers and we | ||
413 | * have to set up a buffer similar to what would have happened if a guest MMIO | ||
414 | * access occurred, including doing endian conversions on BE systems. | ||
415 | */ | ||
416 | static int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev, | ||
417 | bool is_write, int offset, u32 *val) | ||
418 | { | ||
419 | unsigned int len = 4; | ||
420 | u8 buf[4]; | ||
421 | int ret; | ||
422 | |||
423 | if (is_write) { | ||
424 | vgic_data_host_to_mmio_bus(buf, len, *val); | ||
425 | ret = kvm_io_gic_ops.write(vcpu, &dev->dev, offset, len, buf); | ||
426 | } else { | ||
427 | ret = kvm_io_gic_ops.read(vcpu, &dev->dev, offset, len, buf); | ||
428 | if (!ret) | ||
429 | *val = vgic_data_mmio_bus_to_host(buf, len); | ||
430 | } | ||
431 | 392 | ||
432 | return ret; | 393 | return 0; |
433 | } | 394 | } |
434 | 395 | ||
435 | int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write, | 396 | int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write, |
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 50f42f0f8c4f..6afb3b484886 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c | |||
@@ -18,6 +18,8 @@ | |||
18 | #include <kvm/arm_vgic.h> | 18 | #include <kvm/arm_vgic.h> |
19 | 19 | ||
20 | #include <asm/kvm_emulate.h> | 20 | #include <asm/kvm_emulate.h> |
21 | #include <asm/kvm_arm.h> | ||
22 | #include <asm/kvm_mmu.h> | ||
21 | 23 | ||
22 | #include "vgic.h" | 24 | #include "vgic.h" |
23 | #include "vgic-mmio.h" | 25 | #include "vgic-mmio.h" |
@@ -207,6 +209,60 @@ static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu, | |||
207 | return 0; | 209 | return 0; |
208 | } | 210 | } |
209 | 211 | ||
212 | static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu, | ||
213 | gpa_t addr, unsigned int len) | ||
214 | { | ||
215 | u32 intid = VGIC_ADDR_TO_INTID(addr, 1); | ||
216 | u32 value = 0; | ||
217 | int i; | ||
218 | |||
219 | /* | ||
220 | * pending state of interrupt is latched in pending_latch variable. | ||
221 | * Userspace will save and restore pending state and line_level | ||
222 | * separately. | ||
223 | * Refer to Documentation/virtual/kvm/devices/arm-vgic-v3.txt | ||
224 | * for handling of ISPENDR and ICPENDR. | ||
225 | */ | ||
226 | for (i = 0; i < len * 8; i++) { | ||
227 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | ||
228 | |||
229 | if (irq->pending_latch) | ||
230 | value |= (1U << i); | ||
231 | |||
232 | vgic_put_irq(vcpu->kvm, irq); | ||
233 | } | ||
234 | |||
235 | return value; | ||
236 | } | ||
237 | |||
238 | static void vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu, | ||
239 | gpa_t addr, unsigned int len, | ||
240 | unsigned long val) | ||
241 | { | ||
242 | u32 intid = VGIC_ADDR_TO_INTID(addr, 1); | ||
243 | int i; | ||
244 | |||
245 | for (i = 0; i < len * 8; i++) { | ||
246 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | ||
247 | |||
248 | spin_lock(&irq->irq_lock); | ||
249 | if (test_bit(i, &val)) { | ||
250 | /* | ||
251 | * pending_latch is set irrespective of irq type | ||
252 | * (level or edge) to avoid dependency that VM should | ||
253 | * restore irq config before pending info. | ||
254 | */ | ||
255 | irq->pending_latch = true; | ||
256 | vgic_queue_irq_unlock(vcpu->kvm, irq); | ||
257 | } else { | ||
258 | irq->pending_latch = false; | ||
259 | spin_unlock(&irq->irq_lock); | ||
260 | } | ||
261 | |||
262 | vgic_put_irq(vcpu->kvm, irq); | ||
263 | } | ||
264 | } | ||
265 | |||
210 | /* We want to avoid outer shareable. */ | 266 | /* We want to avoid outer shareable. */ |
211 | u64 vgic_sanitise_shareability(u64 field) | 267 | u64 vgic_sanitise_shareability(u64 field) |
212 | { | 268 | { |
@@ -356,7 +412,7 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu, | |||
356 | * We take some special care here to fix the calculation of the register | 412 | * We take some special care here to fix the calculation of the register |
357 | * offset. | 413 | * offset. |
358 | */ | 414 | */ |
359 | #define REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(off, rd, wr, bpi, acc) \ | 415 | #define REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(off, rd, wr, ur, uw, bpi, acc) \ |
360 | { \ | 416 | { \ |
361 | .reg_offset = off, \ | 417 | .reg_offset = off, \ |
362 | .bits_per_irq = bpi, \ | 418 | .bits_per_irq = bpi, \ |
@@ -371,47 +427,54 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu, | |||
371 | .access_flags = acc, \ | 427 | .access_flags = acc, \ |
372 | .read = rd, \ | 428 | .read = rd, \ |
373 | .write = wr, \ | 429 | .write = wr, \ |
430 | .uaccess_read = ur, \ | ||
431 | .uaccess_write = uw, \ | ||
374 | } | 432 | } |
375 | 433 | ||
376 | static const struct vgic_register_region vgic_v3_dist_registers[] = { | 434 | static const struct vgic_register_region vgic_v3_dist_registers[] = { |
377 | REGISTER_DESC_WITH_LENGTH(GICD_CTLR, | 435 | REGISTER_DESC_WITH_LENGTH(GICD_CTLR, |
378 | vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc, 16, | 436 | vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc, 16, |
379 | VGIC_ACCESS_32bit), | 437 | VGIC_ACCESS_32bit), |
438 | REGISTER_DESC_WITH_LENGTH(GICD_STATUSR, | ||
439 | vgic_mmio_read_rao, vgic_mmio_write_wi, 4, | ||
440 | VGIC_ACCESS_32bit), | ||
380 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGROUPR, | 441 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGROUPR, |
381 | vgic_mmio_read_rao, vgic_mmio_write_wi, 1, | 442 | vgic_mmio_read_rao, vgic_mmio_write_wi, NULL, NULL, 1, |
382 | VGIC_ACCESS_32bit), | 443 | VGIC_ACCESS_32bit), |
383 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER, | 444 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER, |
384 | vgic_mmio_read_enable, vgic_mmio_write_senable, 1, | 445 | vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1, |
385 | VGIC_ACCESS_32bit), | 446 | VGIC_ACCESS_32bit), |
386 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICENABLER, | 447 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICENABLER, |
387 | vgic_mmio_read_enable, vgic_mmio_write_cenable, 1, | 448 | vgic_mmio_read_enable, vgic_mmio_write_cenable, NULL, NULL, 1, |
388 | VGIC_ACCESS_32bit), | 449 | VGIC_ACCESS_32bit), |
389 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR, | 450 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR, |
390 | vgic_mmio_read_pending, vgic_mmio_write_spending, 1, | 451 | vgic_mmio_read_pending, vgic_mmio_write_spending, |
452 | vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1, | ||
391 | VGIC_ACCESS_32bit), | 453 | VGIC_ACCESS_32bit), |
392 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR, | 454 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR, |
393 | vgic_mmio_read_pending, vgic_mmio_write_cpending, 1, | 455 | vgic_mmio_read_pending, vgic_mmio_write_cpending, |
456 | vgic_mmio_read_raz, vgic_mmio_write_wi, 1, | ||
394 | VGIC_ACCESS_32bit), | 457 | VGIC_ACCESS_32bit), |
395 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER, | 458 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER, |
396 | vgic_mmio_read_active, vgic_mmio_write_sactive, 1, | 459 | vgic_mmio_read_active, vgic_mmio_write_sactive, NULL, NULL, 1, |
397 | VGIC_ACCESS_32bit), | 460 | VGIC_ACCESS_32bit), |
398 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER, | 461 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER, |
399 | vgic_mmio_read_active, vgic_mmio_write_cactive, 1, | 462 | vgic_mmio_read_active, vgic_mmio_write_cactive, NULL, NULL, 1, |
400 | VGIC_ACCESS_32bit), | 463 | VGIC_ACCESS_32bit), |
401 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR, | 464 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR, |
402 | vgic_mmio_read_priority, vgic_mmio_write_priority, 8, | 465 | vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL, |
403 | VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), | 466 | 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), |
404 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ITARGETSR, | 467 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ITARGETSR, |
405 | vgic_mmio_read_raz, vgic_mmio_write_wi, 8, | 468 | vgic_mmio_read_raz, vgic_mmio_write_wi, NULL, NULL, 8, |
406 | VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), | 469 | VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), |
407 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICFGR, | 470 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICFGR, |
408 | vgic_mmio_read_config, vgic_mmio_write_config, 2, | 471 | vgic_mmio_read_config, vgic_mmio_write_config, NULL, NULL, 2, |
409 | VGIC_ACCESS_32bit), | 472 | VGIC_ACCESS_32bit), |
410 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGRPMODR, | 473 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGRPMODR, |
411 | vgic_mmio_read_raz, vgic_mmio_write_wi, 1, | 474 | vgic_mmio_read_raz, vgic_mmio_write_wi, NULL, NULL, 1, |
412 | VGIC_ACCESS_32bit), | 475 | VGIC_ACCESS_32bit), |
413 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IROUTER, | 476 | REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IROUTER, |
414 | vgic_mmio_read_irouter, vgic_mmio_write_irouter, 64, | 477 | vgic_mmio_read_irouter, vgic_mmio_write_irouter, NULL, NULL, 64, |
415 | VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), | 478 | VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), |
416 | REGISTER_DESC_WITH_LENGTH(GICD_IDREGS, | 479 | REGISTER_DESC_WITH_LENGTH(GICD_IDREGS, |
417 | vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48, | 480 | vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48, |
@@ -422,12 +485,18 @@ static const struct vgic_register_region vgic_v3_rdbase_registers[] = { | |||
422 | REGISTER_DESC_WITH_LENGTH(GICR_CTLR, | 485 | REGISTER_DESC_WITH_LENGTH(GICR_CTLR, |
423 | vgic_mmio_read_v3r_ctlr, vgic_mmio_write_v3r_ctlr, 4, | 486 | vgic_mmio_read_v3r_ctlr, vgic_mmio_write_v3r_ctlr, 4, |
424 | VGIC_ACCESS_32bit), | 487 | VGIC_ACCESS_32bit), |
488 | REGISTER_DESC_WITH_LENGTH(GICR_STATUSR, | ||
489 | vgic_mmio_read_raz, vgic_mmio_write_wi, 4, | ||
490 | VGIC_ACCESS_32bit), | ||
425 | REGISTER_DESC_WITH_LENGTH(GICR_IIDR, | 491 | REGISTER_DESC_WITH_LENGTH(GICR_IIDR, |
426 | vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4, | 492 | vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4, |
427 | VGIC_ACCESS_32bit), | 493 | VGIC_ACCESS_32bit), |
428 | REGISTER_DESC_WITH_LENGTH(GICR_TYPER, | 494 | REGISTER_DESC_WITH_LENGTH(GICR_TYPER, |
429 | vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8, | 495 | vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8, |
430 | VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), | 496 | VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), |
497 | REGISTER_DESC_WITH_LENGTH(GICR_WAKER, | ||
498 | vgic_mmio_read_raz, vgic_mmio_write_wi, 4, | ||
499 | VGIC_ACCESS_32bit), | ||
431 | REGISTER_DESC_WITH_LENGTH(GICR_PROPBASER, | 500 | REGISTER_DESC_WITH_LENGTH(GICR_PROPBASER, |
432 | vgic_mmio_read_propbase, vgic_mmio_write_propbase, 8, | 501 | vgic_mmio_read_propbase, vgic_mmio_write_propbase, 8, |
433 | VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), | 502 | VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), |
@@ -449,11 +518,13 @@ static const struct vgic_register_region vgic_v3_sgibase_registers[] = { | |||
449 | REGISTER_DESC_WITH_LENGTH(GICR_ICENABLER0, | 518 | REGISTER_DESC_WITH_LENGTH(GICR_ICENABLER0, |
450 | vgic_mmio_read_enable, vgic_mmio_write_cenable, 4, | 519 | vgic_mmio_read_enable, vgic_mmio_write_cenable, 4, |
451 | VGIC_ACCESS_32bit), | 520 | VGIC_ACCESS_32bit), |
452 | REGISTER_DESC_WITH_LENGTH(GICR_ISPENDR0, | 521 | REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISPENDR0, |
453 | vgic_mmio_read_pending, vgic_mmio_write_spending, 4, | 522 | vgic_mmio_read_pending, vgic_mmio_write_spending, |
523 | vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4, | ||
454 | VGIC_ACCESS_32bit), | 524 | VGIC_ACCESS_32bit), |
455 | REGISTER_DESC_WITH_LENGTH(GICR_ICPENDR0, | 525 | REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICPENDR0, |
456 | vgic_mmio_read_pending, vgic_mmio_write_cpending, 4, | 526 | vgic_mmio_read_pending, vgic_mmio_write_cpending, |
527 | vgic_mmio_read_raz, vgic_mmio_write_wi, 4, | ||
457 | VGIC_ACCESS_32bit), | 528 | VGIC_ACCESS_32bit), |
458 | REGISTER_DESC_WITH_LENGTH(GICR_ISACTIVER0, | 529 | REGISTER_DESC_WITH_LENGTH(GICR_ISACTIVER0, |
459 | vgic_mmio_read_active, vgic_mmio_write_sactive, 4, | 530 | vgic_mmio_read_active, vgic_mmio_write_sactive, 4, |
@@ -546,6 +617,54 @@ int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t redist_base_address) | |||
546 | return ret; | 617 | return ret; |
547 | } | 618 | } |
548 | 619 | ||
620 | int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) | ||
621 | { | ||
622 | const struct vgic_register_region *region; | ||
623 | struct vgic_io_device iodev; | ||
624 | struct vgic_reg_attr reg_attr; | ||
625 | struct kvm_vcpu *vcpu; | ||
626 | gpa_t addr; | ||
627 | int ret; | ||
628 | |||
629 | ret = vgic_v3_parse_attr(dev, attr, ®_attr); | ||
630 | if (ret) | ||
631 | return ret; | ||
632 | |||
633 | vcpu = reg_attr.vcpu; | ||
634 | addr = reg_attr.addr; | ||
635 | |||
636 | switch (attr->group) { | ||
637 | case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: | ||
638 | iodev.regions = vgic_v3_dist_registers; | ||
639 | iodev.nr_regions = ARRAY_SIZE(vgic_v3_dist_registers); | ||
640 | iodev.base_addr = 0; | ||
641 | break; | ||
642 | case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:{ | ||
643 | iodev.regions = vgic_v3_rdbase_registers; | ||
644 | iodev.nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers); | ||
645 | iodev.base_addr = 0; | ||
646 | break; | ||
647 | } | ||
648 | case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { | ||
649 | u64 reg, id; | ||
650 | |||
651 | id = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK); | ||
652 | return vgic_v3_has_cpu_sysregs_attr(vcpu, 0, id, ®); | ||
653 | } | ||
654 | default: | ||
655 | return -ENXIO; | ||
656 | } | ||
657 | |||
658 | /* We only support aligned 32-bit accesses. */ | ||
659 | if (addr & 3) | ||
660 | return -ENXIO; | ||
661 | |||
662 | region = vgic_get_mmio_region(vcpu, &iodev, addr, sizeof(u32)); | ||
663 | if (!region) | ||
664 | return -ENXIO; | ||
665 | |||
666 | return 0; | ||
667 | } | ||
549 | /* | 668 | /* |
550 | * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI | 669 | * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI |
551 | * generation register ICC_SGI1R_EL1) with a given VCPU. | 670 | * generation register ICC_SGI1R_EL1) with a given VCPU. |
@@ -646,9 +765,55 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) | |||
646 | irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi); | 765 | irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi); |
647 | 766 | ||
648 | spin_lock(&irq->irq_lock); | 767 | spin_lock(&irq->irq_lock); |
649 | irq->pending = true; | 768 | irq->pending_latch = true; |
650 | 769 | ||
651 | vgic_queue_irq_unlock(vcpu->kvm, irq); | 770 | vgic_queue_irq_unlock(vcpu->kvm, irq); |
652 | vgic_put_irq(vcpu->kvm, irq); | 771 | vgic_put_irq(vcpu->kvm, irq); |
653 | } | 772 | } |
654 | } | 773 | } |
774 | |||
775 | int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, | ||
776 | int offset, u32 *val) | ||
777 | { | ||
778 | struct vgic_io_device dev = { | ||
779 | .regions = vgic_v3_dist_registers, | ||
780 | .nr_regions = ARRAY_SIZE(vgic_v3_dist_registers), | ||
781 | }; | ||
782 | |||
783 | return vgic_uaccess(vcpu, &dev, is_write, offset, val); | ||
784 | } | ||
785 | |||
786 | int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write, | ||
787 | int offset, u32 *val) | ||
788 | { | ||
789 | struct vgic_io_device rd_dev = { | ||
790 | .regions = vgic_v3_rdbase_registers, | ||
791 | .nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers), | ||
792 | }; | ||
793 | |||
794 | struct vgic_io_device sgi_dev = { | ||
795 | .regions = vgic_v3_sgibase_registers, | ||
796 | .nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers), | ||
797 | }; | ||
798 | |||
799 | /* SGI_base is the next 64K frame after RD_base */ | ||
800 | if (offset >= SZ_64K) | ||
801 | return vgic_uaccess(vcpu, &sgi_dev, is_write, offset - SZ_64K, | ||
802 | val); | ||
803 | else | ||
804 | return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val); | ||
805 | } | ||
806 | |||
807 | int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write, | ||
808 | u32 intid, u64 *val) | ||
809 | { | ||
810 | if (intid % 32) | ||
811 | return -EINVAL; | ||
812 | |||
813 | if (is_write) | ||
814 | vgic_write_irq_line_level_info(vcpu, intid, *val); | ||
815 | else | ||
816 | *val = vgic_read_irq_line_level_info(vcpu, intid); | ||
817 | |||
818 | return 0; | ||
819 | } | ||
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index ebe1b9fa3c4d..3654b4c835ef 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c | |||
@@ -111,7 +111,7 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, | |||
111 | for (i = 0; i < len * 8; i++) { | 111 | for (i = 0; i < len * 8; i++) { |
112 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | 112 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); |
113 | 113 | ||
114 | if (irq->pending) | 114 | if (irq_is_pending(irq)) |
115 | value |= (1U << i); | 115 | value |= (1U << i); |
116 | 116 | ||
117 | vgic_put_irq(vcpu->kvm, irq); | 117 | vgic_put_irq(vcpu->kvm, irq); |
@@ -131,9 +131,7 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, | |||
131 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | 131 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); |
132 | 132 | ||
133 | spin_lock(&irq->irq_lock); | 133 | spin_lock(&irq->irq_lock); |
134 | irq->pending = true; | 134 | irq->pending_latch = true; |
135 | if (irq->config == VGIC_CONFIG_LEVEL) | ||
136 | irq->soft_pending = true; | ||
137 | 135 | ||
138 | vgic_queue_irq_unlock(vcpu->kvm, irq); | 136 | vgic_queue_irq_unlock(vcpu->kvm, irq); |
139 | vgic_put_irq(vcpu->kvm, irq); | 137 | vgic_put_irq(vcpu->kvm, irq); |
@@ -152,12 +150,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, | |||
152 | 150 | ||
153 | spin_lock(&irq->irq_lock); | 151 | spin_lock(&irq->irq_lock); |
154 | 152 | ||
155 | if (irq->config == VGIC_CONFIG_LEVEL) { | 153 | irq->pending_latch = false; |
156 | irq->soft_pending = false; | ||
157 | irq->pending = irq->line_level; | ||
158 | } else { | ||
159 | irq->pending = false; | ||
160 | } | ||
161 | 154 | ||
162 | spin_unlock(&irq->irq_lock); | 155 | spin_unlock(&irq->irq_lock); |
163 | vgic_put_irq(vcpu->kvm, irq); | 156 | vgic_put_irq(vcpu->kvm, irq); |
@@ -359,18 +352,70 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu, | |||
359 | irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | 352 | irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); |
360 | spin_lock(&irq->irq_lock); | 353 | spin_lock(&irq->irq_lock); |
361 | 354 | ||
362 | if (test_bit(i * 2 + 1, &val)) { | 355 | if (test_bit(i * 2 + 1, &val)) |
363 | irq->config = VGIC_CONFIG_EDGE; | 356 | irq->config = VGIC_CONFIG_EDGE; |
364 | } else { | 357 | else |
365 | irq->config = VGIC_CONFIG_LEVEL; | 358 | irq->config = VGIC_CONFIG_LEVEL; |
366 | irq->pending = irq->line_level | irq->soft_pending; | ||
367 | } | ||
368 | 359 | ||
369 | spin_unlock(&irq->irq_lock); | 360 | spin_unlock(&irq->irq_lock); |
370 | vgic_put_irq(vcpu->kvm, irq); | 361 | vgic_put_irq(vcpu->kvm, irq); |
371 | } | 362 | } |
372 | } | 363 | } |
373 | 364 | ||
365 | u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid) | ||
366 | { | ||
367 | int i; | ||
368 | u64 val = 0; | ||
369 | int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; | ||
370 | |||
371 | for (i = 0; i < 32; i++) { | ||
372 | struct vgic_irq *irq; | ||
373 | |||
374 | if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs) | ||
375 | continue; | ||
376 | |||
377 | irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | ||
378 | if (irq->config == VGIC_CONFIG_LEVEL && irq->line_level) | ||
379 | val |= (1U << i); | ||
380 | |||
381 | vgic_put_irq(vcpu->kvm, irq); | ||
382 | } | ||
383 | |||
384 | return val; | ||
385 | } | ||
386 | |||
387 | void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, | ||
388 | const u64 val) | ||
389 | { | ||
390 | int i; | ||
391 | int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; | ||
392 | |||
393 | for (i = 0; i < 32; i++) { | ||
394 | struct vgic_irq *irq; | ||
395 | bool new_level; | ||
396 | |||
397 | if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs) | ||
398 | continue; | ||
399 | |||
400 | irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); | ||
401 | |||
402 | /* | ||
403 | * Line level is set irrespective of irq type | ||
404 | * (level or edge) to avoid dependency that VM should | ||
405 | * restore irq config before line level. | ||
406 | */ | ||
407 | new_level = !!(val & (1U << i)); | ||
408 | spin_lock(&irq->irq_lock); | ||
409 | irq->line_level = new_level; | ||
410 | if (new_level) | ||
411 | vgic_queue_irq_unlock(vcpu->kvm, irq); | ||
412 | else | ||
413 | spin_unlock(&irq->irq_lock); | ||
414 | |||
415 | vgic_put_irq(vcpu->kvm, irq); | ||
416 | } | ||
417 | } | ||
418 | |||
374 | static int match_region(const void *key, const void *elt) | 419 | static int match_region(const void *key, const void *elt) |
375 | { | 420 | { |
376 | const unsigned int offset = (unsigned long)key; | 421 | const unsigned int offset = (unsigned long)key; |
@@ -394,6 +439,22 @@ vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions, | |||
394 | sizeof(region[0]), match_region); | 439 | sizeof(region[0]), match_region); |
395 | } | 440 | } |
396 | 441 | ||
442 | void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) | ||
443 | { | ||
444 | if (kvm_vgic_global_state.type == VGIC_V2) | ||
445 | vgic_v2_set_vmcr(vcpu, vmcr); | ||
446 | else | ||
447 | vgic_v3_set_vmcr(vcpu, vmcr); | ||
448 | } | ||
449 | |||
450 | void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) | ||
451 | { | ||
452 | if (kvm_vgic_global_state.type == VGIC_V2) | ||
453 | vgic_v2_get_vmcr(vcpu, vmcr); | ||
454 | else | ||
455 | vgic_v3_get_vmcr(vcpu, vmcr); | ||
456 | } | ||
457 | |||
397 | /* | 458 | /* |
398 | * kvm_mmio_read_buf() returns a value in a format where it can be converted | 459 | * kvm_mmio_read_buf() returns a value in a format where it can be converted |
399 | * to a byte array and be directly observed as the guest wanted it to appear | 460 | * to a byte array and be directly observed as the guest wanted it to appear |
@@ -484,6 +545,74 @@ static bool check_region(const struct kvm *kvm, | |||
484 | return false; | 545 | return false; |
485 | } | 546 | } |
486 | 547 | ||
548 | const struct vgic_register_region * | ||
549 | vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev, | ||
550 | gpa_t addr, int len) | ||
551 | { | ||
552 | const struct vgic_register_region *region; | ||
553 | |||
554 | region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, | ||
555 | addr - iodev->base_addr); | ||
556 | if (!region || !check_region(vcpu->kvm, region, addr, len)) | ||
557 | return NULL; | ||
558 | |||
559 | return region; | ||
560 | } | ||
561 | |||
562 | static int vgic_uaccess_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, | ||
563 | gpa_t addr, u32 *val) | ||
564 | { | ||
565 | struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); | ||
566 | const struct vgic_register_region *region; | ||
567 | struct kvm_vcpu *r_vcpu; | ||
568 | |||
569 | region = vgic_get_mmio_region(vcpu, iodev, addr, sizeof(u32)); | ||
570 | if (!region) { | ||
571 | *val = 0; | ||
572 | return 0; | ||
573 | } | ||
574 | |||
575 | r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; | ||
576 | if (region->uaccess_read) | ||
577 | *val = region->uaccess_read(r_vcpu, addr, sizeof(u32)); | ||
578 | else | ||
579 | *val = region->read(r_vcpu, addr, sizeof(u32)); | ||
580 | |||
581 | return 0; | ||
582 | } | ||
583 | |||
584 | static int vgic_uaccess_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, | ||
585 | gpa_t addr, const u32 *val) | ||
586 | { | ||
587 | struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); | ||
588 | const struct vgic_register_region *region; | ||
589 | struct kvm_vcpu *r_vcpu; | ||
590 | |||
591 | region = vgic_get_mmio_region(vcpu, iodev, addr, sizeof(u32)); | ||
592 | if (!region) | ||
593 | return 0; | ||
594 | |||
595 | r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; | ||
596 | if (region->uaccess_write) | ||
597 | region->uaccess_write(r_vcpu, addr, sizeof(u32), *val); | ||
598 | else | ||
599 | region->write(r_vcpu, addr, sizeof(u32), *val); | ||
600 | |||
601 | return 0; | ||
602 | } | ||
603 | |||
604 | /* | ||
605 | * Userland access to VGIC registers. | ||
606 | */ | ||
607 | int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev, | ||
608 | bool is_write, int offset, u32 *val) | ||
609 | { | ||
610 | if (is_write) | ||
611 | return vgic_uaccess_write(vcpu, &dev->dev, offset, val); | ||
612 | else | ||
613 | return vgic_uaccess_read(vcpu, &dev->dev, offset, val); | ||
614 | } | ||
615 | |||
487 | static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, | 616 | static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, |
488 | gpa_t addr, int len, void *val) | 617 | gpa_t addr, int len, void *val) |
489 | { | 618 | { |
@@ -491,9 +620,8 @@ static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, | |||
491 | const struct vgic_register_region *region; | 620 | const struct vgic_register_region *region; |
492 | unsigned long data = 0; | 621 | unsigned long data = 0; |
493 | 622 | ||
494 | region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, | 623 | region = vgic_get_mmio_region(vcpu, iodev, addr, len); |
495 | addr - iodev->base_addr); | 624 | if (!region) { |
496 | if (!region || !check_region(vcpu->kvm, region, addr, len)) { | ||
497 | memset(val, 0, len); | 625 | memset(val, 0, len); |
498 | return 0; | 626 | return 0; |
499 | } | 627 | } |
@@ -524,9 +652,8 @@ static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, | |||
524 | const struct vgic_register_region *region; | 652 | const struct vgic_register_region *region; |
525 | unsigned long data = vgic_data_mmio_bus_to_host(val, len); | 653 | unsigned long data = vgic_data_mmio_bus_to_host(val, len); |
526 | 654 | ||
527 | region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, | 655 | region = vgic_get_mmio_region(vcpu, iodev, addr, len); |
528 | addr - iodev->base_addr); | 656 | if (!region) |
529 | if (!region || !check_region(vcpu->kvm, region, addr, len)) | ||
530 | return 0; | 657 | return 0; |
531 | 658 | ||
532 | switch (iodev->iodev_type) { | 659 | switch (iodev->iodev_type) { |
diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h index 84961b4e4422..98bb566b660a 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.h +++ b/virt/kvm/arm/vgic/vgic-mmio.h | |||
@@ -34,6 +34,10 @@ struct vgic_register_region { | |||
34 | gpa_t addr, unsigned int len, | 34 | gpa_t addr, unsigned int len, |
35 | unsigned long val); | 35 | unsigned long val); |
36 | }; | 36 | }; |
37 | unsigned long (*uaccess_read)(struct kvm_vcpu *vcpu, gpa_t addr, | ||
38 | unsigned int len); | ||
39 | void (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr, | ||
40 | unsigned int len, unsigned long val); | ||
37 | }; | 41 | }; |
38 | 42 | ||
39 | extern struct kvm_io_device_ops kvm_io_gic_ops; | 43 | extern struct kvm_io_device_ops kvm_io_gic_ops; |
@@ -86,6 +90,18 @@ extern struct kvm_io_device_ops kvm_io_gic_ops; | |||
86 | .write = wr, \ | 90 | .write = wr, \ |
87 | } | 91 | } |
88 | 92 | ||
93 | #define REGISTER_DESC_WITH_LENGTH_UACCESS(off, rd, wr, urd, uwr, length, acc) \ | ||
94 | { \ | ||
95 | .reg_offset = off, \ | ||
96 | .bits_per_irq = 0, \ | ||
97 | .len = length, \ | ||
98 | .access_flags = acc, \ | ||
99 | .read = rd, \ | ||
100 | .write = wr, \ | ||
101 | .uaccess_read = urd, \ | ||
102 | .uaccess_write = uwr, \ | ||
103 | } | ||
104 | |||
89 | int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu, | 105 | int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu, |
90 | struct vgic_register_region *reg_desc, | 106 | struct vgic_register_region *reg_desc, |
91 | struct vgic_io_device *region, | 107 | struct vgic_io_device *region, |
@@ -158,6 +174,14 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu, | |||
158 | gpa_t addr, unsigned int len, | 174 | gpa_t addr, unsigned int len, |
159 | unsigned long val); | 175 | unsigned long val); |
160 | 176 | ||
177 | int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev, | ||
178 | bool is_write, int offset, u32 *val); | ||
179 | |||
180 | u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid); | ||
181 | |||
182 | void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, | ||
183 | const u64 val); | ||
184 | |||
161 | unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev); | 185 | unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev); |
162 | 186 | ||
163 | unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev); | 187 | unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev); |
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 834137e7b83f..b834ecdf3225 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c | |||
@@ -104,7 +104,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) | |||
104 | /* Edge is the only case where we preserve the pending bit */ | 104 | /* Edge is the only case where we preserve the pending bit */ |
105 | if (irq->config == VGIC_CONFIG_EDGE && | 105 | if (irq->config == VGIC_CONFIG_EDGE && |
106 | (val & GICH_LR_PENDING_BIT)) { | 106 | (val & GICH_LR_PENDING_BIT)) { |
107 | irq->pending = true; | 107 | irq->pending_latch = true; |
108 | 108 | ||
109 | if (vgic_irq_is_sgi(intid)) { | 109 | if (vgic_irq_is_sgi(intid)) { |
110 | u32 cpuid = val & GICH_LR_PHYSID_CPUID; | 110 | u32 cpuid = val & GICH_LR_PHYSID_CPUID; |
@@ -120,9 +120,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) | |||
120 | */ | 120 | */ |
121 | if (irq->config == VGIC_CONFIG_LEVEL) { | 121 | if (irq->config == VGIC_CONFIG_LEVEL) { |
122 | if (!(val & GICH_LR_PENDING_BIT)) | 122 | if (!(val & GICH_LR_PENDING_BIT)) |
123 | irq->soft_pending = false; | 123 | irq->pending_latch = false; |
124 | |||
125 | irq->pending = irq->line_level || irq->soft_pending; | ||
126 | } | 124 | } |
127 | 125 | ||
128 | spin_unlock(&irq->irq_lock); | 126 | spin_unlock(&irq->irq_lock); |
@@ -145,11 +143,11 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) | |||
145 | { | 143 | { |
146 | u32 val = irq->intid; | 144 | u32 val = irq->intid; |
147 | 145 | ||
148 | if (irq->pending) { | 146 | if (irq_is_pending(irq)) { |
149 | val |= GICH_LR_PENDING_BIT; | 147 | val |= GICH_LR_PENDING_BIT; |
150 | 148 | ||
151 | if (irq->config == VGIC_CONFIG_EDGE) | 149 | if (irq->config == VGIC_CONFIG_EDGE) |
152 | irq->pending = false; | 150 | irq->pending_latch = false; |
153 | 151 | ||
154 | if (vgic_irq_is_sgi(irq->intid)) { | 152 | if (vgic_irq_is_sgi(irq->intid)) { |
155 | u32 src = ffs(irq->source); | 153 | u32 src = ffs(irq->source); |
@@ -158,7 +156,7 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) | |||
158 | val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; | 156 | val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; |
159 | irq->source &= ~(1 << (src - 1)); | 157 | irq->source &= ~(1 << (src - 1)); |
160 | if (irq->source) | 158 | if (irq->source) |
161 | irq->pending = true; | 159 | irq->pending_latch = true; |
162 | } | 160 | } |
163 | } | 161 | } |
164 | 162 | ||
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index e6b03fd8c374..edc6ee2dc852 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c | |||
@@ -94,7 +94,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) | |||
94 | /* Edge is the only case where we preserve the pending bit */ | 94 | /* Edge is the only case where we preserve the pending bit */ |
95 | if (irq->config == VGIC_CONFIG_EDGE && | 95 | if (irq->config == VGIC_CONFIG_EDGE && |
96 | (val & ICH_LR_PENDING_BIT)) { | 96 | (val & ICH_LR_PENDING_BIT)) { |
97 | irq->pending = true; | 97 | irq->pending_latch = true; |
98 | 98 | ||
99 | if (vgic_irq_is_sgi(intid) && | 99 | if (vgic_irq_is_sgi(intid) && |
100 | model == KVM_DEV_TYPE_ARM_VGIC_V2) { | 100 | model == KVM_DEV_TYPE_ARM_VGIC_V2) { |
@@ -111,9 +111,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) | |||
111 | */ | 111 | */ |
112 | if (irq->config == VGIC_CONFIG_LEVEL) { | 112 | if (irq->config == VGIC_CONFIG_LEVEL) { |
113 | if (!(val & ICH_LR_PENDING_BIT)) | 113 | if (!(val & ICH_LR_PENDING_BIT)) |
114 | irq->soft_pending = false; | 114 | irq->pending_latch = false; |
115 | |||
116 | irq->pending = irq->line_level || irq->soft_pending; | ||
117 | } | 115 | } |
118 | 116 | ||
119 | spin_unlock(&irq->irq_lock); | 117 | spin_unlock(&irq->irq_lock); |
@@ -127,11 +125,11 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) | |||
127 | u32 model = vcpu->kvm->arch.vgic.vgic_model; | 125 | u32 model = vcpu->kvm->arch.vgic.vgic_model; |
128 | u64 val = irq->intid; | 126 | u64 val = irq->intid; |
129 | 127 | ||
130 | if (irq->pending) { | 128 | if (irq_is_pending(irq)) { |
131 | val |= ICH_LR_PENDING_BIT; | 129 | val |= ICH_LR_PENDING_BIT; |
132 | 130 | ||
133 | if (irq->config == VGIC_CONFIG_EDGE) | 131 | if (irq->config == VGIC_CONFIG_EDGE) |
134 | irq->pending = false; | 132 | irq->pending_latch = false; |
135 | 133 | ||
136 | if (vgic_irq_is_sgi(irq->intid) && | 134 | if (vgic_irq_is_sgi(irq->intid) && |
137 | model == KVM_DEV_TYPE_ARM_VGIC_V2) { | 135 | model == KVM_DEV_TYPE_ARM_VGIC_V2) { |
@@ -141,7 +139,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) | |||
141 | val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; | 139 | val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; |
142 | irq->source &= ~(1 << (src - 1)); | 140 | irq->source &= ~(1 << (src - 1)); |
143 | if (irq->source) | 141 | if (irq->source) |
144 | irq->pending = true; | 142 | irq->pending_latch = true; |
145 | } | 143 | } |
146 | } | 144 | } |
147 | 145 | ||
@@ -177,10 +175,18 @@ void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) | |||
177 | { | 175 | { |
178 | u32 vmcr; | 176 | u32 vmcr; |
179 | 177 | ||
180 | vmcr = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK; | 178 | /* |
179 | * Ignore the FIQen bit, because GIC emulation always implies | ||
180 | * SRE=1 which means the vFIQEn bit is also RES1. | ||
181 | */ | ||
182 | vmcr = ((vmcrp->ctlr >> ICC_CTLR_EL1_EOImode_SHIFT) << | ||
183 | ICH_VMCR_EOIM_SHIFT) & ICH_VMCR_EOIM_MASK; | ||
184 | vmcr |= (vmcrp->ctlr << ICH_VMCR_CBPR_SHIFT) & ICH_VMCR_CBPR_MASK; | ||
181 | vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK; | 185 | vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK; |
182 | vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK; | 186 | vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK; |
183 | vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK; | 187 | vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK; |
188 | vmcr |= (vmcrp->grpen0 << ICH_VMCR_ENG0_SHIFT) & ICH_VMCR_ENG0_MASK; | ||
189 | vmcr |= (vmcrp->grpen1 << ICH_VMCR_ENG1_SHIFT) & ICH_VMCR_ENG1_MASK; | ||
184 | 190 | ||
185 | vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr; | 191 | vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr; |
186 | } | 192 | } |
@@ -189,10 +195,18 @@ void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) | |||
189 | { | 195 | { |
190 | u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr; | 196 | u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr; |
191 | 197 | ||
192 | vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT; | 198 | /* |
199 | * Ignore the FIQen bit, because GIC emulation always implies | ||
200 | * SRE=1 which means the vFIQEn bit is also RES1. | ||
201 | */ | ||
202 | vmcrp->ctlr = ((vmcr >> ICH_VMCR_EOIM_SHIFT) << | ||
203 | ICC_CTLR_EL1_EOImode_SHIFT) & ICC_CTLR_EL1_EOImode_MASK; | ||
204 | vmcrp->ctlr |= (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT; | ||
193 | vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; | 205 | vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; |
194 | vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; | 206 | vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; |
195 | vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; | 207 | vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; |
208 | vmcrp->grpen0 = (vmcr & ICH_VMCR_ENG0_MASK) >> ICH_VMCR_ENG0_SHIFT; | ||
209 | vmcrp->grpen1 = (vmcr & ICH_VMCR_ENG1_MASK) >> ICH_VMCR_ENG1_SHIFT; | ||
196 | } | 210 | } |
197 | 211 | ||
198 | #define INITIAL_PENDBASER_VALUE \ | 212 | #define INITIAL_PENDBASER_VALUE \ |
@@ -224,6 +238,13 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) | |||
224 | vgic_v3->vgic_sre = 0; | 238 | vgic_v3->vgic_sre = 0; |
225 | } | 239 | } |
226 | 240 | ||
241 | vcpu->arch.vgic_cpu.num_id_bits = (kvm_vgic_global_state.ich_vtr_el2 & | ||
242 | ICH_VTR_ID_BITS_MASK) >> | ||
243 | ICH_VTR_ID_BITS_SHIFT; | ||
244 | vcpu->arch.vgic_cpu.num_pri_bits = ((kvm_vgic_global_state.ich_vtr_el2 & | ||
245 | ICH_VTR_PRI_BITS_MASK) >> | ||
246 | ICH_VTR_PRI_BITS_SHIFT) + 1; | ||
247 | |||
227 | /* Get the show on the road... */ | 248 | /* Get the show on the road... */ |
228 | vgic_v3->vgic_hcr = ICH_HCR_EN; | 249 | vgic_v3->vgic_hcr = ICH_HCR_EN; |
229 | } | 250 | } |
@@ -322,6 +343,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info) | |||
322 | */ | 343 | */ |
323 | kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1; | 344 | kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1; |
324 | kvm_vgic_global_state.can_emulate_gicv2 = false; | 345 | kvm_vgic_global_state.can_emulate_gicv2 = false; |
346 | kvm_vgic_global_state.ich_vtr_el2 = ich_vtr_el2; | ||
325 | 347 | ||
326 | if (!info->vcpu.start) { | 348 | if (!info->vcpu.start) { |
327 | kvm_info("GICv3: no GICV resource entry\n"); | 349 | kvm_info("GICv3: no GICV resource entry\n"); |
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 6440b56ec90e..654dfd40e449 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c | |||
@@ -160,7 +160,7 @@ static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq) | |||
160 | * If the distributor is disabled, pending interrupts shouldn't be | 160 | * If the distributor is disabled, pending interrupts shouldn't be |
161 | * forwarded. | 161 | * forwarded. |
162 | */ | 162 | */ |
163 | if (irq->enabled && irq->pending) { | 163 | if (irq->enabled && irq_is_pending(irq)) { |
164 | if (unlikely(irq->target_vcpu && | 164 | if (unlikely(irq->target_vcpu && |
165 | !irq->target_vcpu->kvm->arch.vgic.enabled)) | 165 | !irq->target_vcpu->kvm->arch.vgic.enabled)) |
166 | return NULL; | 166 | return NULL; |
@@ -204,8 +204,8 @@ static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b) | |||
204 | goto out; | 204 | goto out; |
205 | } | 205 | } |
206 | 206 | ||
207 | penda = irqa->enabled && irqa->pending; | 207 | penda = irqa->enabled && irq_is_pending(irqa); |
208 | pendb = irqb->enabled && irqb->pending; | 208 | pendb = irqb->enabled && irq_is_pending(irqb); |
209 | 209 | ||
210 | if (!penda || !pendb) { | 210 | if (!penda || !pendb) { |
211 | ret = (int)pendb - (int)penda; | 211 | ret = (int)pendb - (int)penda; |
@@ -335,9 +335,22 @@ retry: | |||
335 | return true; | 335 | return true; |
336 | } | 336 | } |
337 | 337 | ||
338 | static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, | 338 | /** |
339 | unsigned int intid, bool level, | 339 | * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic |
340 | bool mapped_irq) | 340 | * @kvm: The VM structure pointer |
341 | * @cpuid: The CPU for PPIs | ||
342 | * @intid: The INTID to inject a new state to. | ||
343 | * @level: Edge-triggered: true: to trigger the interrupt | ||
344 | * false: to ignore the call | ||
345 | * Level-sensitive true: raise the input signal | ||
346 | * false: lower the input signal | ||
347 | * | ||
348 | * The VGIC is not concerned with devices being active-LOW or active-HIGH for | ||
349 | * level-sensitive interrupts. You can think of the level parameter as 1 | ||
350 | * being HIGH and 0 being LOW and all devices being active-HIGH. | ||
351 | */ | ||
352 | int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, | ||
353 | bool level) | ||
341 | { | 354 | { |
342 | struct kvm_vcpu *vcpu; | 355 | struct kvm_vcpu *vcpu; |
343 | struct vgic_irq *irq; | 356 | struct vgic_irq *irq; |
@@ -357,11 +370,6 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, | |||
357 | if (!irq) | 370 | if (!irq) |
358 | return -EINVAL; | 371 | return -EINVAL; |
359 | 372 | ||
360 | if (irq->hw != mapped_irq) { | ||
361 | vgic_put_irq(kvm, irq); | ||
362 | return -EINVAL; | ||
363 | } | ||
364 | |||
365 | spin_lock(&irq->irq_lock); | 373 | spin_lock(&irq->irq_lock); |
366 | 374 | ||
367 | if (!vgic_validate_injection(irq, level)) { | 375 | if (!vgic_validate_injection(irq, level)) { |
@@ -371,12 +379,10 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, | |||
371 | return 0; | 379 | return 0; |
372 | } | 380 | } |
373 | 381 | ||
374 | if (irq->config == VGIC_CONFIG_LEVEL) { | 382 | if (irq->config == VGIC_CONFIG_LEVEL) |
375 | irq->line_level = level; | 383 | irq->line_level = level; |
376 | irq->pending = level || irq->soft_pending; | 384 | else |
377 | } else { | 385 | irq->pending_latch = true; |
378 | irq->pending = true; | ||
379 | } | ||
380 | 386 | ||
381 | vgic_queue_irq_unlock(kvm, irq); | 387 | vgic_queue_irq_unlock(kvm, irq); |
382 | vgic_put_irq(kvm, irq); | 388 | vgic_put_irq(kvm, irq); |
@@ -384,32 +390,6 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, | |||
384 | return 0; | 390 | return 0; |
385 | } | 391 | } |
386 | 392 | ||
387 | /** | ||
388 | * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic | ||
389 | * @kvm: The VM structure pointer | ||
390 | * @cpuid: The CPU for PPIs | ||
391 | * @intid: The INTID to inject a new state to. | ||
392 | * @level: Edge-triggered: true: to trigger the interrupt | ||
393 | * false: to ignore the call | ||
394 | * Level-sensitive true: raise the input signal | ||
395 | * false: lower the input signal | ||
396 | * | ||
397 | * The VGIC is not concerned with devices being active-LOW or active-HIGH for | ||
398 | * level-sensitive interrupts. You can think of the level parameter as 1 | ||
399 | * being HIGH and 0 being LOW and all devices being active-HIGH. | ||
400 | */ | ||
401 | int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, | ||
402 | bool level) | ||
403 | { | ||
404 | return vgic_update_irq_pending(kvm, cpuid, intid, level, false); | ||
405 | } | ||
406 | |||
407 | int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, unsigned int intid, | ||
408 | bool level) | ||
409 | { | ||
410 | return vgic_update_irq_pending(kvm, cpuid, intid, level, true); | ||
411 | } | ||
412 | |||
413 | int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq) | 393 | int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq) |
414 | { | 394 | { |
415 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); | 395 | struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); |
@@ -689,7 +669,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) | |||
689 | 669 | ||
690 | list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { | 670 | list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { |
691 | spin_lock(&irq->irq_lock); | 671 | spin_lock(&irq->irq_lock); |
692 | pending = irq->pending && irq->enabled; | 672 | pending = irq_is_pending(irq) && irq->enabled; |
693 | spin_unlock(&irq->irq_lock); | 673 | spin_unlock(&irq->irq_lock); |
694 | 674 | ||
695 | if (pending) | 675 | if (pending) |
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 859f65c6e056..db28f7cadab2 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h | |||
@@ -30,13 +30,79 @@ | |||
30 | 30 | ||
31 | #define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS) | 31 | #define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS) |
32 | 32 | ||
33 | #define VGIC_AFFINITY_0_SHIFT 0 | ||
34 | #define VGIC_AFFINITY_0_MASK (0xffUL << VGIC_AFFINITY_0_SHIFT) | ||
35 | #define VGIC_AFFINITY_1_SHIFT 8 | ||
36 | #define VGIC_AFFINITY_1_MASK (0xffUL << VGIC_AFFINITY_1_SHIFT) | ||
37 | #define VGIC_AFFINITY_2_SHIFT 16 | ||
38 | #define VGIC_AFFINITY_2_MASK (0xffUL << VGIC_AFFINITY_2_SHIFT) | ||
39 | #define VGIC_AFFINITY_3_SHIFT 24 | ||
40 | #define VGIC_AFFINITY_3_MASK (0xffUL << VGIC_AFFINITY_3_SHIFT) | ||
41 | |||
42 | #define VGIC_AFFINITY_LEVEL(reg, level) \ | ||
43 | ((((reg) & VGIC_AFFINITY_## level ##_MASK) \ | ||
44 | >> VGIC_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level)) | ||
45 | |||
46 | /* | ||
47 | * The Userspace encodes the affinity differently from the MPIDR, | ||
48 | * Below macro converts vgic userspace format to MPIDR reg format. | ||
49 | */ | ||
50 | #define VGIC_TO_MPIDR(val) (VGIC_AFFINITY_LEVEL(val, 0) | \ | ||
51 | VGIC_AFFINITY_LEVEL(val, 1) | \ | ||
52 | VGIC_AFFINITY_LEVEL(val, 2) | \ | ||
53 | VGIC_AFFINITY_LEVEL(val, 3)) | ||
54 | |||
55 | /* | ||
56 | * As per Documentation/virtual/kvm/devices/arm-vgic-v3.txt, | ||
57 | * below macros are defined for CPUREG encoding. | ||
58 | */ | ||
59 | #define KVM_REG_ARM_VGIC_SYSREG_OP0_MASK 0x000000000000c000 | ||
60 | #define KVM_REG_ARM_VGIC_SYSREG_OP0_SHIFT 14 | ||
61 | #define KVM_REG_ARM_VGIC_SYSREG_OP1_MASK 0x0000000000003800 | ||
62 | #define KVM_REG_ARM_VGIC_SYSREG_OP1_SHIFT 11 | ||
63 | #define KVM_REG_ARM_VGIC_SYSREG_CRN_MASK 0x0000000000000780 | ||
64 | #define KVM_REG_ARM_VGIC_SYSREG_CRN_SHIFT 7 | ||
65 | #define KVM_REG_ARM_VGIC_SYSREG_CRM_MASK 0x0000000000000078 | ||
66 | #define KVM_REG_ARM_VGIC_SYSREG_CRM_SHIFT 3 | ||
67 | #define KVM_REG_ARM_VGIC_SYSREG_OP2_MASK 0x0000000000000007 | ||
68 | #define KVM_REG_ARM_VGIC_SYSREG_OP2_SHIFT 0 | ||
69 | |||
70 | #define KVM_DEV_ARM_VGIC_SYSREG_MASK (KVM_REG_ARM_VGIC_SYSREG_OP0_MASK | \ | ||
71 | KVM_REG_ARM_VGIC_SYSREG_OP1_MASK | \ | ||
72 | KVM_REG_ARM_VGIC_SYSREG_CRN_MASK | \ | ||
73 | KVM_REG_ARM_VGIC_SYSREG_CRM_MASK | \ | ||
74 | KVM_REG_ARM_VGIC_SYSREG_OP2_MASK) | ||
75 | |||
76 | static inline bool irq_is_pending(struct vgic_irq *irq) | ||
77 | { | ||
78 | if (irq->config == VGIC_CONFIG_EDGE) | ||
79 | return irq->pending_latch; | ||
80 | else | ||
81 | return irq->pending_latch || irq->line_level; | ||
82 | } | ||
83 | |||
33 | struct vgic_vmcr { | 84 | struct vgic_vmcr { |
34 | u32 ctlr; | 85 | u32 ctlr; |
35 | u32 abpr; | 86 | u32 abpr; |
36 | u32 bpr; | 87 | u32 bpr; |
37 | u32 pmr; | 88 | u32 pmr; |
89 | /* Below member variable are valid only for GICv3 */ | ||
90 | u32 grpen0; | ||
91 | u32 grpen1; | ||
92 | }; | ||
93 | |||
94 | struct vgic_reg_attr { | ||
95 | struct kvm_vcpu *vcpu; | ||
96 | gpa_t addr; | ||
38 | }; | 97 | }; |
39 | 98 | ||
99 | int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, | ||
100 | struct vgic_reg_attr *reg_attr); | ||
101 | int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, | ||
102 | struct vgic_reg_attr *reg_attr); | ||
103 | const struct vgic_register_region * | ||
104 | vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev, | ||
105 | gpa_t addr, int len); | ||
40 | struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, | 106 | struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, |
41 | u32 intid); | 107 | u32 intid); |
42 | void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); | 108 | void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); |
@@ -89,9 +155,24 @@ bool vgic_has_its(struct kvm *kvm); | |||
89 | int kvm_vgic_register_its_device(void); | 155 | int kvm_vgic_register_its_device(void); |
90 | void vgic_enable_lpis(struct kvm_vcpu *vcpu); | 156 | void vgic_enable_lpis(struct kvm_vcpu *vcpu); |
91 | int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi); | 157 | int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi); |
92 | 158 | int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr); | |
159 | int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, | ||
160 | int offset, u32 *val); | ||
161 | int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write, | ||
162 | int offset, u32 *val); | ||
163 | int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, | ||
164 | u64 id, u64 *val); | ||
165 | int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id, | ||
166 | u64 *reg); | ||
167 | int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write, | ||
168 | u32 intid, u64 *val); | ||
93 | int kvm_register_vgic_device(unsigned long type); | 169 | int kvm_register_vgic_device(unsigned long type); |
170 | void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); | ||
171 | void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); | ||
94 | int vgic_lazy_init(struct kvm *kvm); | 172 | int vgic_lazy_init(struct kvm *kvm); |
95 | int vgic_init(struct kvm *kvm); | 173 | int vgic_init(struct kvm *kvm); |
96 | 174 | ||
175 | int vgic_debug_init(struct kvm *kvm); | ||
176 | int vgic_debug_destroy(struct kvm *kvm); | ||
177 | |||
97 | #endif | 178 | #endif |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 482612b4e496..cc4d6e0dd2a2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -506,11 +506,6 @@ static struct kvm_memslots *kvm_alloc_memslots(void) | |||
506 | if (!slots) | 506 | if (!slots) |
507 | return NULL; | 507 | return NULL; |
508 | 508 | ||
509 | /* | ||
510 | * Init kvm generation close to the maximum to easily test the | ||
511 | * code of handling generation number wrap-around. | ||
512 | */ | ||
513 | slots->generation = -150; | ||
514 | for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) | 509 | for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) |
515 | slots->id_to_index[i] = slots->memslots[i].id = i; | 510 | slots->id_to_index[i] = slots->memslots[i].id = i; |
516 | 511 | ||
@@ -641,9 +636,16 @@ static struct kvm *kvm_create_vm(unsigned long type) | |||
641 | 636 | ||
642 | r = -ENOMEM; | 637 | r = -ENOMEM; |
643 | for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { | 638 | for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { |
644 | kvm->memslots[i] = kvm_alloc_memslots(); | 639 | struct kvm_memslots *slots = kvm_alloc_memslots(); |
645 | if (!kvm->memslots[i]) | 640 | if (!slots) |
646 | goto out_err_no_srcu; | 641 | goto out_err_no_srcu; |
642 | /* | ||
643 | * Generations must be different for each address space. | ||
644 | * Init kvm generation close to the maximum to easily test the | ||
645 | * code of handling generation number wrap-around. | ||
646 | */ | ||
647 | slots->generation = i * 2 - 150; | ||
648 | rcu_assign_pointer(kvm->memslots[i], slots); | ||
647 | } | 649 | } |
648 | 650 | ||
649 | if (init_srcu_struct(&kvm->srcu)) | 651 | if (init_srcu_struct(&kvm->srcu)) |
@@ -870,8 +872,14 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, | |||
870 | * Increment the new memslot generation a second time. This prevents | 872 | * Increment the new memslot generation a second time. This prevents |
871 | * vm exits that race with memslot updates from caching a memslot | 873 | * vm exits that race with memslot updates from caching a memslot |
872 | * generation that will (potentially) be valid forever. | 874 | * generation that will (potentially) be valid forever. |
875 | * | ||
876 | * Generations must be unique even across address spaces. We do not need | ||
877 | * a global counter for that, instead the generation space is evenly split | ||
878 | * across address spaces. For example, with two address spaces, address | ||
879 | * space 0 will use generations 0, 4, 8, ... while * address space 1 will | ||
880 | * use generations 2, 6, 10, 14, ... | ||
873 | */ | 881 | */ |
874 | slots->generation++; | 882 | slots->generation += KVM_ADDRESS_SPACE_NUM * 2 - 1; |
875 | 883 | ||
876 | kvm_arch_memslots_updated(kvm, slots); | 884 | kvm_arch_memslots_updated(kvm, slots); |
877 | 885 | ||
@@ -1094,37 +1102,31 @@ int kvm_get_dirty_log(struct kvm *kvm, | |||
1094 | { | 1102 | { |
1095 | struct kvm_memslots *slots; | 1103 | struct kvm_memslots *slots; |
1096 | struct kvm_memory_slot *memslot; | 1104 | struct kvm_memory_slot *memslot; |
1097 | int r, i, as_id, id; | 1105 | int i, as_id, id; |
1098 | unsigned long n; | 1106 | unsigned long n; |
1099 | unsigned long any = 0; | 1107 | unsigned long any = 0; |
1100 | 1108 | ||
1101 | r = -EINVAL; | ||
1102 | as_id = log->slot >> 16; | 1109 | as_id = log->slot >> 16; |
1103 | id = (u16)log->slot; | 1110 | id = (u16)log->slot; |
1104 | if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) | 1111 | if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) |
1105 | goto out; | 1112 | return -EINVAL; |
1106 | 1113 | ||
1107 | slots = __kvm_memslots(kvm, as_id); | 1114 | slots = __kvm_memslots(kvm, as_id); |
1108 | memslot = id_to_memslot(slots, id); | 1115 | memslot = id_to_memslot(slots, id); |
1109 | r = -ENOENT; | ||
1110 | if (!memslot->dirty_bitmap) | 1116 | if (!memslot->dirty_bitmap) |
1111 | goto out; | 1117 | return -ENOENT; |
1112 | 1118 | ||
1113 | n = kvm_dirty_bitmap_bytes(memslot); | 1119 | n = kvm_dirty_bitmap_bytes(memslot); |
1114 | 1120 | ||
1115 | for (i = 0; !any && i < n/sizeof(long); ++i) | 1121 | for (i = 0; !any && i < n/sizeof(long); ++i) |
1116 | any = memslot->dirty_bitmap[i]; | 1122 | any = memslot->dirty_bitmap[i]; |
1117 | 1123 | ||
1118 | r = -EFAULT; | ||
1119 | if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) | 1124 | if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) |
1120 | goto out; | 1125 | return -EFAULT; |
1121 | 1126 | ||
1122 | if (any) | 1127 | if (any) |
1123 | *is_dirty = 1; | 1128 | *is_dirty = 1; |
1124 | 1129 | return 0; | |
1125 | r = 0; | ||
1126 | out: | ||
1127 | return r; | ||
1128 | } | 1130 | } |
1129 | EXPORT_SYMBOL_GPL(kvm_get_dirty_log); | 1131 | EXPORT_SYMBOL_GPL(kvm_get_dirty_log); |
1130 | 1132 | ||
@@ -1156,24 +1158,22 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, | |||
1156 | { | 1158 | { |
1157 | struct kvm_memslots *slots; | 1159 | struct kvm_memslots *slots; |
1158 | struct kvm_memory_slot *memslot; | 1160 | struct kvm_memory_slot *memslot; |
1159 | int r, i, as_id, id; | 1161 | int i, as_id, id; |
1160 | unsigned long n; | 1162 | unsigned long n; |
1161 | unsigned long *dirty_bitmap; | 1163 | unsigned long *dirty_bitmap; |
1162 | unsigned long *dirty_bitmap_buffer; | 1164 | unsigned long *dirty_bitmap_buffer; |
1163 | 1165 | ||
1164 | r = -EINVAL; | ||
1165 | as_id = log->slot >> 16; | 1166 | as_id = log->slot >> 16; |
1166 | id = (u16)log->slot; | 1167 | id = (u16)log->slot; |
1167 | if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) | 1168 | if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) |
1168 | goto out; | 1169 | return -EINVAL; |
1169 | 1170 | ||
1170 | slots = __kvm_memslots(kvm, as_id); | 1171 | slots = __kvm_memslots(kvm, as_id); |
1171 | memslot = id_to_memslot(slots, id); | 1172 | memslot = id_to_memslot(slots, id); |
1172 | 1173 | ||
1173 | dirty_bitmap = memslot->dirty_bitmap; | 1174 | dirty_bitmap = memslot->dirty_bitmap; |
1174 | r = -ENOENT; | ||
1175 | if (!dirty_bitmap) | 1175 | if (!dirty_bitmap) |
1176 | goto out; | 1176 | return -ENOENT; |
1177 | 1177 | ||
1178 | n = kvm_dirty_bitmap_bytes(memslot); | 1178 | n = kvm_dirty_bitmap_bytes(memslot); |
1179 | 1179 | ||
@@ -1202,14 +1202,9 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, | |||
1202 | } | 1202 | } |
1203 | 1203 | ||
1204 | spin_unlock(&kvm->mmu_lock); | 1204 | spin_unlock(&kvm->mmu_lock); |
1205 | |||
1206 | r = -EFAULT; | ||
1207 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) | 1205 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) |
1208 | goto out; | 1206 | return -EFAULT; |
1209 | 1207 | return 0; | |
1210 | r = 0; | ||
1211 | out: | ||
1212 | return r; | ||
1213 | } | 1208 | } |
1214 | EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect); | 1209 | EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect); |
1215 | #endif | 1210 | #endif |
@@ -1937,10 +1932,10 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, | |||
1937 | } | 1932 | } |
1938 | EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest); | 1933 | EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest); |
1939 | 1934 | ||
1940 | int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | 1935 | static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots, |
1941 | gpa_t gpa, unsigned long len) | 1936 | struct gfn_to_hva_cache *ghc, |
1937 | gpa_t gpa, unsigned long len) | ||
1942 | { | 1938 | { |
1943 | struct kvm_memslots *slots = kvm_memslots(kvm); | ||
1944 | int offset = offset_in_page(gpa); | 1939 | int offset = offset_in_page(gpa); |
1945 | gfn_t start_gfn = gpa >> PAGE_SHIFT; | 1940 | gfn_t start_gfn = gpa >> PAGE_SHIFT; |
1946 | gfn_t end_gfn = (gpa + len - 1) >> PAGE_SHIFT; | 1941 | gfn_t end_gfn = (gpa + len - 1) >> PAGE_SHIFT; |
@@ -1950,7 +1945,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | |||
1950 | ghc->gpa = gpa; | 1945 | ghc->gpa = gpa; |
1951 | ghc->generation = slots->generation; | 1946 | ghc->generation = slots->generation; |
1952 | ghc->len = len; | 1947 | ghc->len = len; |
1953 | ghc->memslot = gfn_to_memslot(kvm, start_gfn); | 1948 | ghc->memslot = __gfn_to_memslot(slots, start_gfn); |
1954 | ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, NULL); | 1949 | ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, NULL); |
1955 | if (!kvm_is_error_hva(ghc->hva) && nr_pages_needed <= 1) { | 1950 | if (!kvm_is_error_hva(ghc->hva) && nr_pages_needed <= 1) { |
1956 | ghc->hva += offset; | 1951 | ghc->hva += offset; |
@@ -1960,7 +1955,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | |||
1960 | * verify that the entire region is valid here. | 1955 | * verify that the entire region is valid here. |
1961 | */ | 1956 | */ |
1962 | while (start_gfn <= end_gfn) { | 1957 | while (start_gfn <= end_gfn) { |
1963 | ghc->memslot = gfn_to_memslot(kvm, start_gfn); | 1958 | ghc->memslot = __gfn_to_memslot(slots, start_gfn); |
1964 | ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, | 1959 | ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, |
1965 | &nr_pages_avail); | 1960 | &nr_pages_avail); |
1966 | if (kvm_is_error_hva(ghc->hva)) | 1961 | if (kvm_is_error_hva(ghc->hva)) |
@@ -1972,22 +1967,29 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | |||
1972 | } | 1967 | } |
1973 | return 0; | 1968 | return 0; |
1974 | } | 1969 | } |
1975 | EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init); | ||
1976 | 1970 | ||
1977 | int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | 1971 | int kvm_vcpu_gfn_to_hva_cache_init(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, |
1978 | void *data, int offset, unsigned long len) | 1972 | gpa_t gpa, unsigned long len) |
1979 | { | 1973 | { |
1980 | struct kvm_memslots *slots = kvm_memslots(kvm); | 1974 | struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu); |
1975 | return __kvm_gfn_to_hva_cache_init(slots, ghc, gpa, len); | ||
1976 | } | ||
1977 | EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_hva_cache_init); | ||
1978 | |||
1979 | int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, | ||
1980 | void *data, int offset, unsigned long len) | ||
1981 | { | ||
1982 | struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu); | ||
1981 | int r; | 1983 | int r; |
1982 | gpa_t gpa = ghc->gpa + offset; | 1984 | gpa_t gpa = ghc->gpa + offset; |
1983 | 1985 | ||
1984 | BUG_ON(len + offset > ghc->len); | 1986 | BUG_ON(len + offset > ghc->len); |
1985 | 1987 | ||
1986 | if (slots->generation != ghc->generation) | 1988 | if (slots->generation != ghc->generation) |
1987 | kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len); | 1989 | __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); |
1988 | 1990 | ||
1989 | if (unlikely(!ghc->memslot)) | 1991 | if (unlikely(!ghc->memslot)) |
1990 | return kvm_write_guest(kvm, gpa, data, len); | 1992 | return kvm_vcpu_write_guest(vcpu, gpa, data, len); |
1991 | 1993 | ||
1992 | if (kvm_is_error_hva(ghc->hva)) | 1994 | if (kvm_is_error_hva(ghc->hva)) |
1993 | return -EFAULT; | 1995 | return -EFAULT; |
@@ -1999,28 +2001,28 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | |||
1999 | 2001 | ||
2000 | return 0; | 2002 | return 0; |
2001 | } | 2003 | } |
2002 | EXPORT_SYMBOL_GPL(kvm_write_guest_offset_cached); | 2004 | EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_offset_cached); |
2003 | 2005 | ||
2004 | int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | 2006 | int kvm_vcpu_write_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, |
2005 | void *data, unsigned long len) | 2007 | void *data, unsigned long len) |
2006 | { | 2008 | { |
2007 | return kvm_write_guest_offset_cached(kvm, ghc, data, 0, len); | 2009 | return kvm_vcpu_write_guest_offset_cached(vcpu, ghc, data, 0, len); |
2008 | } | 2010 | } |
2009 | EXPORT_SYMBOL_GPL(kvm_write_guest_cached); | 2011 | EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_cached); |
2010 | 2012 | ||
2011 | int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | 2013 | int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, |
2012 | void *data, unsigned long len) | 2014 | void *data, unsigned long len) |
2013 | { | 2015 | { |
2014 | struct kvm_memslots *slots = kvm_memslots(kvm); | 2016 | struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu); |
2015 | int r; | 2017 | int r; |
2016 | 2018 | ||
2017 | BUG_ON(len > ghc->len); | 2019 | BUG_ON(len > ghc->len); |
2018 | 2020 | ||
2019 | if (slots->generation != ghc->generation) | 2021 | if (slots->generation != ghc->generation) |
2020 | kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len); | 2022 | __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); |
2021 | 2023 | ||
2022 | if (unlikely(!ghc->memslot)) | 2024 | if (unlikely(!ghc->memslot)) |
2023 | return kvm_read_guest(kvm, ghc->gpa, data, len); | 2025 | return kvm_vcpu_read_guest(vcpu, ghc->gpa, data, len); |
2024 | 2026 | ||
2025 | if (kvm_is_error_hva(ghc->hva)) | 2027 | if (kvm_is_error_hva(ghc->hva)) |
2026 | return -EFAULT; | 2028 | return -EFAULT; |
@@ -2031,7 +2033,7 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | |||
2031 | 2033 | ||
2032 | return 0; | 2034 | return 0; |
2033 | } | 2035 | } |
2034 | EXPORT_SYMBOL_GPL(kvm_read_guest_cached); | 2036 | EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_cached); |
2035 | 2037 | ||
2036 | int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) | 2038 | int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) |
2037 | { | 2039 | { |
@@ -3133,10 +3135,9 @@ static long kvm_vm_compat_ioctl(struct file *filp, | |||
3133 | struct compat_kvm_dirty_log compat_log; | 3135 | struct compat_kvm_dirty_log compat_log; |
3134 | struct kvm_dirty_log log; | 3136 | struct kvm_dirty_log log; |
3135 | 3137 | ||
3136 | r = -EFAULT; | ||
3137 | if (copy_from_user(&compat_log, (void __user *)arg, | 3138 | if (copy_from_user(&compat_log, (void __user *)arg, |
3138 | sizeof(compat_log))) | 3139 | sizeof(compat_log))) |
3139 | goto out; | 3140 | return -EFAULT; |
3140 | log.slot = compat_log.slot; | 3141 | log.slot = compat_log.slot; |
3141 | log.padding1 = compat_log.padding1; | 3142 | log.padding1 = compat_log.padding1; |
3142 | log.padding2 = compat_log.padding2; | 3143 | log.padding2 = compat_log.padding2; |
@@ -3148,8 +3149,6 @@ static long kvm_vm_compat_ioctl(struct file *filp, | |||
3148 | default: | 3149 | default: |
3149 | r = kvm_vm_ioctl(filp, ioctl, arg); | 3150 | r = kvm_vm_ioctl(filp, ioctl, arg); |
3150 | } | 3151 | } |
3151 | |||
3152 | out: | ||
3153 | return r; | 3152 | return r; |
3154 | } | 3153 | } |
3155 | #endif | 3154 | #endif |