diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-03 16:21:40 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-03 16:21:40 -0400 |
commit | fe489bf4505ae26d3c6d6a1f1d3064c2a9c5cd85 (patch) | |
tree | 46596fd7edf7c4da1dafdb2c62011841e71cf32d | |
parent | 3e34131a65127e73fbae68c82748f32c8af7e4a4 (diff) | |
parent | a3ff5fbc94a829680d4aa005cd17add1c1a1fb5b (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Paolo Bonzini:
"On the x86 side, there are some optimizations and documentation
updates. The big ARM/KVM change for 3.11, support for AArch64, will
come through Catalin Marinas's tree. s390 and PPC have misc cleanups
and bugfixes"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (87 commits)
KVM: PPC: Ignore PIR writes
KVM: PPC: Book3S PR: Invalidate SLB entries properly
KVM: PPC: Book3S PR: Allow guest to use 1TB segments
KVM: PPC: Book3S PR: Don't keep scanning HPTEG after we find a match
KVM: PPC: Book3S PR: Fix invalidation of SLB entry 0 on guest entry
KVM: PPC: Book3S PR: Fix proto-VSID calculations
KVM: PPC: Guard doorbell exception with CONFIG_PPC_DOORBELL
KVM: Fix RTC interrupt coalescing tracking
kvm: Add a tracepoint write_tsc_offset
KVM: MMU: Inform users of mmio generation wraparound
KVM: MMU: document fast invalidate all mmio sptes
KVM: MMU: document fast invalidate all pages
KVM: MMU: document fast page fault
KVM: MMU: document mmio page fault
KVM: MMU: document write_flooding_count
KVM: MMU: document clear_spte_count
KVM: MMU: drop kvm_mmu_zap_mmio_sptes
KVM: MMU: init kvm generation close to mmio wrap-around value
KVM: MMU: add tracepoint for check_mmio_spte
KVM: MMU: fast invalidate all mmio sptes
...
62 files changed, 1382 insertions, 807 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 9bfadeb8be31..66dd2aa53ba4 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
@@ -2278,7 +2278,7 @@ return indicates the attribute is implemented. It does not necessarily | |||
2278 | indicate that the attribute can be read or written in the device's | 2278 | indicate that the attribute can be read or written in the device's |
2279 | current state. "addr" is ignored. | 2279 | current state. "addr" is ignored. |
2280 | 2280 | ||
2281 | 4.77 KVM_ARM_VCPU_INIT | 2281 | 4.82 KVM_ARM_VCPU_INIT |
2282 | 2282 | ||
2283 | Capability: basic | 2283 | Capability: basic |
2284 | Architectures: arm, arm64 | 2284 | Architectures: arm, arm64 |
@@ -2304,7 +2304,7 @@ Possible features: | |||
2304 | Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only). | 2304 | Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only). |
2305 | 2305 | ||
2306 | 2306 | ||
2307 | 4.78 KVM_GET_REG_LIST | 2307 | 4.83 KVM_GET_REG_LIST |
2308 | 2308 | ||
2309 | Capability: basic | 2309 | Capability: basic |
2310 | Architectures: arm, arm64 | 2310 | Architectures: arm, arm64 |
@@ -2324,7 +2324,7 @@ This ioctl returns the guest registers that are supported for the | |||
2324 | KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. | 2324 | KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. |
2325 | 2325 | ||
2326 | 2326 | ||
2327 | 4.80 KVM_ARM_SET_DEVICE_ADDR | 2327 | 4.84 KVM_ARM_SET_DEVICE_ADDR |
2328 | 2328 | ||
2329 | Capability: KVM_CAP_ARM_SET_DEVICE_ADDR | 2329 | Capability: KVM_CAP_ARM_SET_DEVICE_ADDR |
2330 | Architectures: arm, arm64 | 2330 | Architectures: arm, arm64 |
@@ -2362,7 +2362,7 @@ must be called after calling KVM_CREATE_IRQCHIP, but before calling | |||
2362 | KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the | 2362 | KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the |
2363 | base addresses will return -EEXIST. | 2363 | base addresses will return -EEXIST. |
2364 | 2364 | ||
2365 | 4.82 KVM_PPC_RTAS_DEFINE_TOKEN | 2365 | 4.85 KVM_PPC_RTAS_DEFINE_TOKEN |
2366 | 2366 | ||
2367 | Capability: KVM_CAP_PPC_RTAS | 2367 | Capability: KVM_CAP_PPC_RTAS |
2368 | Architectures: ppc | 2368 | Architectures: ppc |
diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt index 43fcb761ed16..290894176142 100644 --- a/Documentation/virtual/kvm/mmu.txt +++ b/Documentation/virtual/kvm/mmu.txt | |||
@@ -191,12 +191,12 @@ Shadow pages contain the following information: | |||
191 | A counter keeping track of how many hardware registers (guest cr3 or | 191 | A counter keeping track of how many hardware registers (guest cr3 or |
192 | pdptrs) are now pointing at the page. While this counter is nonzero, the | 192 | pdptrs) are now pointing at the page. While this counter is nonzero, the |
193 | page cannot be destroyed. See role.invalid. | 193 | page cannot be destroyed. See role.invalid. |
194 | multimapped: | 194 | parent_ptes: |
195 | Whether there exist multiple sptes pointing at this page. | 195 | The reverse mapping for the pte/ptes pointing at this page's spt. If |
196 | parent_pte/parent_ptes: | 196 | parent_ptes bit 0 is zero, only one spte points at this pages and |
197 | If multimapped is zero, parent_pte points at the single spte that points at | 197 | parent_ptes points at this single spte, otherwise, there exists multiple |
198 | this page's spt. Otherwise, parent_ptes points at a data structure | 198 | sptes pointing at this page and (parent_ptes & ~0x1) points at a data |
199 | with a list of parent_ptes. | 199 | structure with a list of parent_ptes. |
200 | unsync: | 200 | unsync: |
201 | If true, then the translations in this page may not match the guest's | 201 | If true, then the translations in this page may not match the guest's |
202 | translation. This is equivalent to the state of the tlb when a pte is | 202 | translation. This is equivalent to the state of the tlb when a pte is |
@@ -210,6 +210,24 @@ Shadow pages contain the following information: | |||
210 | A bitmap indicating which sptes in spt point (directly or indirectly) at | 210 | A bitmap indicating which sptes in spt point (directly or indirectly) at |
211 | pages that may be unsynchronized. Used to quickly locate all unsychronized | 211 | pages that may be unsynchronized. Used to quickly locate all unsychronized |
212 | pages reachable from a given page. | 212 | pages reachable from a given page. |
213 | mmu_valid_gen: | ||
214 | Generation number of the page. It is compared with kvm->arch.mmu_valid_gen | ||
215 | during hash table lookup, and used to skip invalidated shadow pages (see | ||
216 | "Zapping all pages" below.) | ||
217 | clear_spte_count: | ||
218 | Only present on 32-bit hosts, where a 64-bit spte cannot be written | ||
219 | atomically. The reader uses this while running out of the MMU lock | ||
220 | to detect in-progress updates and retry them until the writer has | ||
221 | finished the write. | ||
222 | write_flooding_count: | ||
223 | A guest may write to a page table many times, causing a lot of | ||
224 | emulations if the page needs to be write-protected (see "Synchronized | ||
225 | and unsynchronized pages" below). Leaf pages can be unsynchronized | ||
226 | so that they do not trigger frequent emulation, but this is not | ||
227 | possible for non-leafs. This field counts the number of emulations | ||
228 | since the last time the page table was actually used; if emulation | ||
229 | is triggered too frequently on this page, KVM will unmap the page | ||
230 | to avoid emulation in the future. | ||
213 | 231 | ||
214 | Reverse map | 232 | Reverse map |
215 | =========== | 233 | =========== |
@@ -258,14 +276,26 @@ This is the most complicated event. The cause of a page fault can be: | |||
258 | 276 | ||
259 | Handling a page fault is performed as follows: | 277 | Handling a page fault is performed as follows: |
260 | 278 | ||
279 | - if the RSV bit of the error code is set, the page fault is caused by guest | ||
280 | accessing MMIO and cached MMIO information is available. | ||
281 | - walk shadow page table | ||
282 | - check for valid generation number in the spte (see "Fast invalidation of | ||
283 | MMIO sptes" below) | ||
284 | - cache the information to vcpu->arch.mmio_gva, vcpu->arch.access and | ||
285 | vcpu->arch.mmio_gfn, and call the emulator | ||
286 | - If both P bit and R/W bit of error code are set, this could possibly | ||
287 | be handled as a "fast page fault" (fixed without taking the MMU lock). See | ||
288 | the description in Documentation/virtual/kvm/locking.txt. | ||
261 | - if needed, walk the guest page tables to determine the guest translation | 289 | - if needed, walk the guest page tables to determine the guest translation |
262 | (gva->gpa or ngpa->gpa) | 290 | (gva->gpa or ngpa->gpa) |
263 | - if permissions are insufficient, reflect the fault back to the guest | 291 | - if permissions are insufficient, reflect the fault back to the guest |
264 | - determine the host page | 292 | - determine the host page |
265 | - if this is an mmio request, there is no host page; call the emulator | 293 | - if this is an mmio request, there is no host page; cache the info to |
266 | to emulate the instruction instead | 294 | vcpu->arch.mmio_gva, vcpu->arch.access and vcpu->arch.mmio_gfn |
267 | - walk the shadow page table to find the spte for the translation, | 295 | - walk the shadow page table to find the spte for the translation, |
268 | instantiating missing intermediate page tables as necessary | 296 | instantiating missing intermediate page tables as necessary |
297 | - If this is an mmio request, cache the mmio info to the spte and set some | ||
298 | reserved bit on the spte (see callers of kvm_mmu_set_mmio_spte_mask) | ||
269 | - try to unsynchronize the page | 299 | - try to unsynchronize the page |
270 | - if successful, we can let the guest continue and modify the gpte | 300 | - if successful, we can let the guest continue and modify the gpte |
271 | - emulate the instruction | 301 | - emulate the instruction |
@@ -351,6 +381,51 @@ causes its write_count to be incremented, thus preventing instantiation of | |||
351 | a large spte. The frames at the end of an unaligned memory slot have | 381 | a large spte. The frames at the end of an unaligned memory slot have |
352 | artificially inflated ->write_counts so they can never be instantiated. | 382 | artificially inflated ->write_counts so they can never be instantiated. |
353 | 383 | ||
384 | Zapping all pages (page generation count) | ||
385 | ========================================= | ||
386 | |||
387 | For the large memory guests, walking and zapping all pages is really slow | ||
388 | (because there are a lot of pages), and also blocks memory accesses of | ||
389 | all VCPUs because it needs to hold the MMU lock. | ||
390 | |||
391 | To make it be more scalable, kvm maintains a global generation number | ||
392 | which is stored in kvm->arch.mmu_valid_gen. Every shadow page stores | ||
393 | the current global generation-number into sp->mmu_valid_gen when it | ||
394 | is created. Pages with a mismatching generation number are "obsolete". | ||
395 | |||
396 | When KVM need zap all shadow pages sptes, it just simply increases the global | ||
397 | generation-number then reload root shadow pages on all vcpus. As the VCPUs | ||
398 | create new shadow page tables, the old pages are not used because of the | ||
399 | mismatching generation number. | ||
400 | |||
401 | KVM then walks through all pages and zaps obsolete pages. While the zap | ||
402 | operation needs to take the MMU lock, the lock can be released periodically | ||
403 | so that the VCPUs can make progress. | ||
404 | |||
405 | Fast invalidation of MMIO sptes | ||
406 | =============================== | ||
407 | |||
408 | As mentioned in "Reaction to events" above, kvm will cache MMIO | ||
409 | information in leaf sptes. When a new memslot is added or an existing | ||
410 | memslot is changed, this information may become stale and needs to be | ||
411 | invalidated. This also needs to hold the MMU lock while walking all | ||
412 | shadow pages, and is made more scalable with a similar technique. | ||
413 | |||
414 | MMIO sptes have a few spare bits, which are used to store a | ||
415 | generation number. The global generation number is stored in | ||
416 | kvm_memslots(kvm)->generation, and increased whenever guest memory info | ||
417 | changes. This generation number is distinct from the one described in | ||
418 | the previous section. | ||
419 | |||
420 | When KVM finds an MMIO spte, it checks the generation number of the spte. | ||
421 | If the generation number of the spte does not equal the global generation | ||
422 | number, it will ignore the cached MMIO information and handle the page | ||
423 | fault through the slow path. | ||
424 | |||
425 | Since only 19 bits are used to store generation-number on mmio spte, all | ||
426 | pages are zapped when there is an overflow. | ||
427 | |||
428 | |||
354 | Further reading | 429 | Further reading |
355 | =============== | 430 | =============== |
356 | 431 | ||
diff --git a/MAINTAINERS b/MAINTAINERS index c85bf69bb321..60c68fbee64a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -4733,10 +4733,10 @@ F: arch/s390/kvm/ | |||
4733 | F: drivers/s390/kvm/ | 4733 | F: drivers/s390/kvm/ |
4734 | 4734 | ||
4735 | KERNEL VIRTUAL MACHINE (KVM) FOR ARM | 4735 | KERNEL VIRTUAL MACHINE (KVM) FOR ARM |
4736 | M: Christoffer Dall <cdall@cs.columbia.edu> | 4736 | M: Christoffer Dall <christoffer.dall@linaro.org> |
4737 | L: kvmarm@lists.cs.columbia.edu | 4737 | L: kvmarm@lists.cs.columbia.edu |
4738 | W: http://systems.cs.columbia.edu/projects/kvm-arm | 4738 | W: http://systems.cs.columbia.edu/projects/kvm-arm |
4739 | S: Maintained | 4739 | S: Supported |
4740 | F: arch/arm/include/uapi/asm/kvm* | 4740 | F: arch/arm/include/uapi/asm/kvm* |
4741 | F: arch/arm/include/asm/kvm* | 4741 | F: arch/arm/include/asm/kvm* |
4742 | F: arch/arm/kvm/ | 4742 | F: arch/arm/kvm/ |
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 124623e5ef14..64e96960de29 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h | |||
@@ -135,7 +135,6 @@ | |||
135 | #define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1ULL) | 135 | #define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1ULL) |
136 | #define PTRS_PER_S2_PGD (1ULL << (KVM_PHYS_SHIFT - 30)) | 136 | #define PTRS_PER_S2_PGD (1ULL << (KVM_PHYS_SHIFT - 30)) |
137 | #define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) | 137 | #define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) |
138 | #define S2_PGD_SIZE (1 << S2_PGD_ORDER) | ||
139 | 138 | ||
140 | /* Virtualization Translation Control Register (VTCR) bits */ | 139 | /* Virtualization Translation Control Register (VTCR) bits */ |
141 | #define VTCR_SH0 (3 << 12) | 140 | #define VTCR_SH0 (3 << 12) |
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 18d50322a9e2..a2f43ddcc300 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h | |||
@@ -37,16 +37,18 @@ | |||
37 | #define c5_AIFSR 15 /* Auxilary Instrunction Fault Status R */ | 37 | #define c5_AIFSR 15 /* Auxilary Instrunction Fault Status R */ |
38 | #define c6_DFAR 16 /* Data Fault Address Register */ | 38 | #define c6_DFAR 16 /* Data Fault Address Register */ |
39 | #define c6_IFAR 17 /* Instruction Fault Address Register */ | 39 | #define c6_IFAR 17 /* Instruction Fault Address Register */ |
40 | #define c9_L2CTLR 18 /* Cortex A15 L2 Control Register */ | 40 | #define c7_PAR 18 /* Physical Address Register */ |
41 | #define c10_PRRR 19 /* Primary Region Remap Register */ | 41 | #define c7_PAR_high 19 /* PAR top 32 bits */ |
42 | #define c10_NMRR 20 /* Normal Memory Remap Register */ | 42 | #define c9_L2CTLR 20 /* Cortex A15 L2 Control Register */ |
43 | #define c12_VBAR 21 /* Vector Base Address Register */ | 43 | #define c10_PRRR 21 /* Primary Region Remap Register */ |
44 | #define c13_CID 22 /* Context ID Register */ | 44 | #define c10_NMRR 22 /* Normal Memory Remap Register */ |
45 | #define c13_TID_URW 23 /* Thread ID, User R/W */ | 45 | #define c12_VBAR 23 /* Vector Base Address Register */ |
46 | #define c13_TID_URO 24 /* Thread ID, User R/O */ | 46 | #define c13_CID 24 /* Context ID Register */ |
47 | #define c13_TID_PRIV 25 /* Thread ID, Privileged */ | 47 | #define c13_TID_URW 25 /* Thread ID, User R/W */ |
48 | #define c14_CNTKCTL 26 /* Timer Control Register (PL1) */ | 48 | #define c13_TID_URO 26 /* Thread ID, User R/O */ |
49 | #define NR_CP15_REGS 27 /* Number of regs (incl. invalid) */ | 49 | #define c13_TID_PRIV 27 /* Thread ID, Privileged */ |
50 | #define c14_CNTKCTL 28 /* Timer Control Register (PL1) */ | ||
51 | #define NR_CP15_REGS 29 /* Number of regs (incl. invalid) */ | ||
50 | 52 | ||
51 | #define ARM_EXCEPTION_RESET 0 | 53 | #define ARM_EXCEPTION_RESET 0 |
52 | #define ARM_EXCEPTION_UNDEFINED 1 | 54 | #define ARM_EXCEPTION_UNDEFINED 1 |
@@ -72,8 +74,6 @@ extern char __kvm_hyp_vector[]; | |||
72 | extern char __kvm_hyp_code_start[]; | 74 | extern char __kvm_hyp_code_start[]; |
73 | extern char __kvm_hyp_code_end[]; | 75 | extern char __kvm_hyp_code_end[]; |
74 | 76 | ||
75 | extern void __kvm_tlb_flush_vmid(struct kvm *kvm); | ||
76 | |||
77 | extern void __kvm_flush_vm_context(void); | 77 | extern void __kvm_flush_vm_context(void); |
78 | extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); | 78 | extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); |
79 | 79 | ||
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 82b4babead2c..a464e8d7b6c5 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h | |||
@@ -65,11 +65,6 @@ static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu) | |||
65 | return cpsr_mode > USR_MODE;; | 65 | return cpsr_mode > USR_MODE;; |
66 | } | 66 | } |
67 | 67 | ||
68 | static inline bool kvm_vcpu_reg_is_pc(struct kvm_vcpu *vcpu, int reg) | ||
69 | { | ||
70 | return reg == 15; | ||
71 | } | ||
72 | |||
73 | static inline u32 kvm_vcpu_get_hsr(struct kvm_vcpu *vcpu) | 68 | static inline u32 kvm_vcpu_get_hsr(struct kvm_vcpu *vcpu) |
74 | { | 69 | { |
75 | return vcpu->arch.fault.hsr; | 70 | return vcpu->arch.fault.hsr; |
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 57cb786a6203..7d22517d8071 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h | |||
@@ -23,9 +23,14 @@ | |||
23 | #include <asm/kvm_asm.h> | 23 | #include <asm/kvm_asm.h> |
24 | #include <asm/kvm_mmio.h> | 24 | #include <asm/kvm_mmio.h> |
25 | #include <asm/fpstate.h> | 25 | #include <asm/fpstate.h> |
26 | #include <asm/kvm_arch_timer.h> | 26 | #include <kvm/arm_arch_timer.h> |
27 | 27 | ||
28 | #if defined(CONFIG_KVM_ARM_MAX_VCPUS) | ||
28 | #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS | 29 | #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS |
30 | #else | ||
31 | #define KVM_MAX_VCPUS 0 | ||
32 | #endif | ||
33 | |||
29 | #define KVM_USER_MEM_SLOTS 32 | 34 | #define KVM_USER_MEM_SLOTS 32 |
30 | #define KVM_PRIVATE_MEM_SLOTS 4 | 35 | #define KVM_PRIVATE_MEM_SLOTS 4 |
31 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 | 36 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 |
@@ -38,7 +43,7 @@ | |||
38 | #define KVM_NR_PAGE_SIZES 1 | 43 | #define KVM_NR_PAGE_SIZES 1 |
39 | #define KVM_PAGES_PER_HPAGE(x) (1UL<<31) | 44 | #define KVM_PAGES_PER_HPAGE(x) (1UL<<31) |
40 | 45 | ||
41 | #include <asm/kvm_vgic.h> | 46 | #include <kvm/arm_vgic.h> |
42 | 47 | ||
43 | struct kvm_vcpu; | 48 | struct kvm_vcpu; |
44 | u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); | 49 | u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); |
@@ -190,8 +195,8 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); | |||
190 | int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, | 195 | int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, |
191 | int exception_index); | 196 | int exception_index); |
192 | 197 | ||
193 | static inline void __cpu_init_hyp_mode(unsigned long long boot_pgd_ptr, | 198 | static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, |
194 | unsigned long long pgd_ptr, | 199 | phys_addr_t pgd_ptr, |
195 | unsigned long hyp_stack_ptr, | 200 | unsigned long hyp_stack_ptr, |
196 | unsigned long vector_ptr) | 201 | unsigned long vector_ptr) |
197 | { | 202 | { |
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 370e1a8af6ac..ebf5015508b5 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig | |||
@@ -41,9 +41,9 @@ config KVM_ARM_HOST | |||
41 | Provides host support for ARM processors. | 41 | Provides host support for ARM processors. |
42 | 42 | ||
43 | config KVM_ARM_MAX_VCPUS | 43 | config KVM_ARM_MAX_VCPUS |
44 | int "Number maximum supported virtual CPUs per VM" if KVM_ARM_HOST | 44 | int "Number maximum supported virtual CPUs per VM" |
45 | default 4 if KVM_ARM_HOST | 45 | depends on KVM_ARM_HOST |
46 | default 0 | 46 | default 4 |
47 | help | 47 | help |
48 | Static number of max supported virtual CPUs per VM. | 48 | Static number of max supported virtual CPUs per VM. |
49 | 49 | ||
@@ -67,6 +67,4 @@ config KVM_ARM_TIMER | |||
67 | ---help--- | 67 | ---help--- |
68 | Adds support for the Architected Timers in virtual machines | 68 | Adds support for the Architected Timers in virtual machines |
69 | 69 | ||
70 | source drivers/virtio/Kconfig | ||
71 | |||
72 | endif # VIRTUALIZATION | 70 | endif # VIRTUALIZATION |
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 53c5ed83d16f..d99bee4950e5 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile | |||
@@ -14,10 +14,11 @@ CFLAGS_mmu.o := -I. | |||
14 | AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) | 14 | AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) |
15 | AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) | 15 | AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) |
16 | 16 | ||
17 | kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) | 17 | KVM := ../../../virt/kvm |
18 | kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o | ||
18 | 19 | ||
19 | obj-y += kvm-arm.o init.o interrupts.o | 20 | obj-y += kvm-arm.o init.o interrupts.o |
20 | obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o | 21 | obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o |
21 | obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o | 22 | obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o |
22 | obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o | 23 | obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o |
23 | obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o | 24 | obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o |
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index ef1703b9587b..741f66a2edbd 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c | |||
@@ -800,8 +800,8 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
800 | 800 | ||
801 | static void cpu_init_hyp_mode(void *dummy) | 801 | static void cpu_init_hyp_mode(void *dummy) |
802 | { | 802 | { |
803 | unsigned long long boot_pgd_ptr; | 803 | phys_addr_t boot_pgd_ptr; |
804 | unsigned long long pgd_ptr; | 804 | phys_addr_t pgd_ptr; |
805 | unsigned long hyp_stack_ptr; | 805 | unsigned long hyp_stack_ptr; |
806 | unsigned long stack_page; | 806 | unsigned long stack_page; |
807 | unsigned long vector_ptr; | 807 | unsigned long vector_ptr; |
@@ -809,8 +809,8 @@ static void cpu_init_hyp_mode(void *dummy) | |||
809 | /* Switch from the HYP stub to our own HYP init vector */ | 809 | /* Switch from the HYP stub to our own HYP init vector */ |
810 | __hyp_set_vectors(kvm_get_idmap_vector()); | 810 | __hyp_set_vectors(kvm_get_idmap_vector()); |
811 | 811 | ||
812 | boot_pgd_ptr = (unsigned long long)kvm_mmu_get_boot_httbr(); | 812 | boot_pgd_ptr = kvm_mmu_get_boot_httbr(); |
813 | pgd_ptr = (unsigned long long)kvm_mmu_get_httbr(); | 813 | pgd_ptr = kvm_mmu_get_httbr(); |
814 | stack_page = __get_cpu_var(kvm_arm_hyp_stack_page); | 814 | stack_page = __get_cpu_var(kvm_arm_hyp_stack_page); |
815 | hyp_stack_ptr = stack_page + PAGE_SIZE; | 815 | hyp_stack_ptr = stack_page + PAGE_SIZE; |
816 | vector_ptr = (unsigned long)__kvm_hyp_vector; | 816 | vector_ptr = (unsigned long)__kvm_hyp_vector; |
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 8eea97be1ed5..4a5199070430 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c | |||
@@ -180,6 +180,10 @@ static const struct coproc_reg cp15_regs[] = { | |||
180 | NULL, reset_unknown, c6_DFAR }, | 180 | NULL, reset_unknown, c6_DFAR }, |
181 | { CRn( 6), CRm( 0), Op1( 0), Op2( 2), is32, | 181 | { CRn( 6), CRm( 0), Op1( 0), Op2( 2), is32, |
182 | NULL, reset_unknown, c6_IFAR }, | 182 | NULL, reset_unknown, c6_IFAR }, |
183 | |||
184 | /* PAR swapped by interrupt.S */ | ||
185 | { CRn( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR }, | ||
186 | |||
183 | /* | 187 | /* |
184 | * DC{C,I,CI}SW operations: | 188 | * DC{C,I,CI}SW operations: |
185 | */ | 189 | */ |
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 3d74a0be47db..df4c82d47ad7 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c | |||
@@ -52,9 +52,6 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
52 | 52 | ||
53 | static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) | 53 | static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) |
54 | { | 54 | { |
55 | if (kvm_psci_call(vcpu)) | ||
56 | return 1; | ||
57 | |||
58 | kvm_inject_undefined(vcpu); | 55 | kvm_inject_undefined(vcpu); |
59 | return 1; | 56 | return 1; |
60 | } | 57 | } |
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index f7793df62f58..16cd4ba5d7fd 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S | |||
@@ -49,6 +49,7 @@ __kvm_hyp_code_start: | |||
49 | ENTRY(__kvm_tlb_flush_vmid_ipa) | 49 | ENTRY(__kvm_tlb_flush_vmid_ipa) |
50 | push {r2, r3} | 50 | push {r2, r3} |
51 | 51 | ||
52 | dsb ishst | ||
52 | add r0, r0, #KVM_VTTBR | 53 | add r0, r0, #KVM_VTTBR |
53 | ldrd r2, r3, [r0] | 54 | ldrd r2, r3, [r0] |
54 | mcrr p15, 6, r2, r3, c2 @ Write VTTBR | 55 | mcrr p15, 6, r2, r3, c2 @ Write VTTBR |
@@ -291,6 +292,7 @@ THUMB( orr r2, r2, #PSR_T_BIT ) | |||
291 | ldr r2, =BSYM(panic) | 292 | ldr r2, =BSYM(panic) |
292 | msr ELR_hyp, r2 | 293 | msr ELR_hyp, r2 |
293 | ldr r0, =\panic_str | 294 | ldr r0, =\panic_str |
295 | clrex @ Clear exclusive monitor | ||
294 | eret | 296 | eret |
295 | .endm | 297 | .endm |
296 | 298 | ||
@@ -414,6 +416,10 @@ guest_trap: | |||
414 | mrcne p15, 4, r2, c6, c0, 4 @ HPFAR | 416 | mrcne p15, 4, r2, c6, c0, 4 @ HPFAR |
415 | bne 3f | 417 | bne 3f |
416 | 418 | ||
419 | /* Preserve PAR */ | ||
420 | mrrc p15, 0, r0, r1, c7 @ PAR | ||
421 | push {r0, r1} | ||
422 | |||
417 | /* Resolve IPA using the xFAR */ | 423 | /* Resolve IPA using the xFAR */ |
418 | mcr p15, 0, r2, c7, c8, 0 @ ATS1CPR | 424 | mcr p15, 0, r2, c7, c8, 0 @ ATS1CPR |
419 | isb | 425 | isb |
@@ -424,13 +430,20 @@ guest_trap: | |||
424 | lsl r2, r2, #4 | 430 | lsl r2, r2, #4 |
425 | orr r2, r2, r1, lsl #24 | 431 | orr r2, r2, r1, lsl #24 |
426 | 432 | ||
433 | /* Restore PAR */ | ||
434 | pop {r0, r1} | ||
435 | mcrr p15, 0, r0, r1, c7 @ PAR | ||
436 | |||
427 | 3: load_vcpu @ Load VCPU pointer to r0 | 437 | 3: load_vcpu @ Load VCPU pointer to r0 |
428 | str r2, [r0, #VCPU_HPFAR] | 438 | str r2, [r0, #VCPU_HPFAR] |
429 | 439 | ||
430 | 1: mov r1, #ARM_EXCEPTION_HVC | 440 | 1: mov r1, #ARM_EXCEPTION_HVC |
431 | b __kvm_vcpu_return | 441 | b __kvm_vcpu_return |
432 | 442 | ||
433 | 4: pop {r0, r1, r2} @ Failed translation, return to guest | 443 | 4: pop {r0, r1} @ Failed translation, return to guest |
444 | mcrr p15, 0, r0, r1, c7 @ PAR | ||
445 | clrex | ||
446 | pop {r0, r1, r2} | ||
434 | eret | 447 | eret |
435 | 448 | ||
436 | /* | 449 | /* |
@@ -456,6 +469,7 @@ switch_to_guest_vfp: | |||
456 | 469 | ||
457 | pop {r3-r7} | 470 | pop {r3-r7} |
458 | pop {r0-r2} | 471 | pop {r0-r2} |
472 | clrex | ||
459 | eret | 473 | eret |
460 | #endif | 474 | #endif |
461 | 475 | ||
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index d43cfb5b37c4..6f18695a09cb 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S | |||
@@ -302,11 +302,14 @@ vcpu .req r0 @ vcpu pointer always in r0 | |||
302 | .endif | 302 | .endif |
303 | 303 | ||
304 | mrc p15, 0, r2, c14, c1, 0 @ CNTKCTL | 304 | mrc p15, 0, r2, c14, c1, 0 @ CNTKCTL |
305 | mrrc p15, 0, r4, r5, c7 @ PAR | ||
305 | 306 | ||
306 | .if \store_to_vcpu == 0 | 307 | .if \store_to_vcpu == 0 |
307 | push {r2} | 308 | push {r2,r4-r5} |
308 | .else | 309 | .else |
309 | str r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] | 310 | str r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] |
311 | add r12, vcpu, #CP15_OFFSET(c7_PAR) | ||
312 | strd r4, r5, [r12] | ||
310 | .endif | 313 | .endif |
311 | .endm | 314 | .endm |
312 | 315 | ||
@@ -319,12 +322,15 @@ vcpu .req r0 @ vcpu pointer always in r0 | |||
319 | */ | 322 | */ |
320 | .macro write_cp15_state read_from_vcpu | 323 | .macro write_cp15_state read_from_vcpu |
321 | .if \read_from_vcpu == 0 | 324 | .if \read_from_vcpu == 0 |
322 | pop {r2} | 325 | pop {r2,r4-r5} |
323 | .else | 326 | .else |
324 | ldr r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] | 327 | ldr r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] |
328 | add r12, vcpu, #CP15_OFFSET(c7_PAR) | ||
329 | ldrd r4, r5, [r12] | ||
325 | .endif | 330 | .endif |
326 | 331 | ||
327 | mcr p15, 0, r2, c14, c1, 0 @ CNTKCTL | 332 | mcr p15, 0, r2, c14, c1, 0 @ CNTKCTL |
333 | mcrr p15, 0, r4, r5, c7 @ PAR | ||
328 | 334 | ||
329 | .if \read_from_vcpu == 0 | 335 | .if \read_from_vcpu == 0 |
330 | pop {r2-r12} | 336 | pop {r2-r12} |
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 72a12f2171b2..b8e06b7a2833 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c | |||
@@ -86,12 +86,6 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | |||
86 | sign_extend = kvm_vcpu_dabt_issext(vcpu); | 86 | sign_extend = kvm_vcpu_dabt_issext(vcpu); |
87 | rt = kvm_vcpu_dabt_get_rd(vcpu); | 87 | rt = kvm_vcpu_dabt_get_rd(vcpu); |
88 | 88 | ||
89 | if (kvm_vcpu_reg_is_pc(vcpu, rt)) { | ||
90 | /* IO memory trying to read/write pc */ | ||
91 | kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); | ||
92 | return 1; | ||
93 | } | ||
94 | |||
95 | mmio->is_write = is_write; | 89 | mmio->is_write = is_write; |
96 | mmio->phys_addr = fault_ipa; | 90 | mmio->phys_addr = fault_ipa; |
97 | mmio->len = len; | 91 | mmio->len = len; |
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 84ba67b982c0..ca6bea4859b4 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c | |||
@@ -382,9 +382,6 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) | |||
382 | if (!pgd) | 382 | if (!pgd) |
383 | return -ENOMEM; | 383 | return -ENOMEM; |
384 | 384 | ||
385 | /* stage-2 pgd must be aligned to its size */ | ||
386 | VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1)); | ||
387 | |||
388 | memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t)); | 385 | memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t)); |
389 | kvm_clean_pgd(pgd); | 386 | kvm_clean_pgd(pgd); |
390 | kvm->arch.pgd = pgd; | 387 | kvm->arch.pgd = pgd; |
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 7ee5bb7a3667..86a693a02ba3 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c | |||
@@ -75,7 +75,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) | |||
75 | * kvm_psci_call - handle PSCI call if r0 value is in range | 75 | * kvm_psci_call - handle PSCI call if r0 value is in range |
76 | * @vcpu: Pointer to the VCPU struct | 76 | * @vcpu: Pointer to the VCPU struct |
77 | * | 77 | * |
78 | * Handle PSCI calls from guests through traps from HVC or SMC instructions. | 78 | * Handle PSCI calls from guests through traps from HVC instructions. |
79 | * The calling convention is similar to SMC calls to the secure world where | 79 | * The calling convention is similar to SMC calls to the secure world where |
80 | * the function number is placed in r0 and this function returns true if the | 80 | * the function number is placed in r0 and this function returns true if the |
81 | * function number specified in r0 is withing the PSCI range, and false | 81 | * function number specified in r0 is withing the PSCI range, and false |
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index b80256b554cd..b7840e7aa452 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c | |||
@@ -27,6 +27,8 @@ | |||
27 | #include <asm/kvm_arm.h> | 27 | #include <asm/kvm_arm.h> |
28 | #include <asm/kvm_coproc.h> | 28 | #include <asm/kvm_coproc.h> |
29 | 29 | ||
30 | #include <kvm/arm_arch_timer.h> | ||
31 | |||
30 | /****************************************************************************** | 32 | /****************************************************************************** |
31 | * Cortex-A15 Reset Values | 33 | * Cortex-A15 Reset Values |
32 | */ | 34 | */ |
@@ -37,6 +39,11 @@ static struct kvm_regs a15_regs_reset = { | |||
37 | .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, | 39 | .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, |
38 | }; | 40 | }; |
39 | 41 | ||
42 | static const struct kvm_irq_level a15_vtimer_irq = { | ||
43 | .irq = 27, | ||
44 | .level = 1, | ||
45 | }; | ||
46 | |||
40 | 47 | ||
41 | /******************************************************************************* | 48 | /******************************************************************************* |
42 | * Exported reset function | 49 | * Exported reset function |
@@ -52,6 +59,7 @@ static struct kvm_regs a15_regs_reset = { | |||
52 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu) | 59 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu) |
53 | { | 60 | { |
54 | struct kvm_regs *cpu_reset; | 61 | struct kvm_regs *cpu_reset; |
62 | const struct kvm_irq_level *cpu_vtimer_irq; | ||
55 | 63 | ||
56 | switch (vcpu->arch.target) { | 64 | switch (vcpu->arch.target) { |
57 | case KVM_ARM_TARGET_CORTEX_A15: | 65 | case KVM_ARM_TARGET_CORTEX_A15: |
@@ -59,6 +67,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) | |||
59 | return -EINVAL; | 67 | return -EINVAL; |
60 | cpu_reset = &a15_regs_reset; | 68 | cpu_reset = &a15_regs_reset; |
61 | vcpu->arch.midr = read_cpuid_id(); | 69 | vcpu->arch.midr = read_cpuid_id(); |
70 | cpu_vtimer_irq = &a15_vtimer_irq; | ||
62 | break; | 71 | break; |
63 | default: | 72 | default: |
64 | return -ENODEV; | 73 | return -ENODEV; |
@@ -70,5 +79,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) | |||
70 | /* Reset CP15 registers */ | 79 | /* Reset CP15 registers */ |
71 | kvm_reset_coprocs(vcpu); | 80 | kvm_reset_coprocs(vcpu); |
72 | 81 | ||
82 | /* Reset arch_timer context */ | ||
83 | kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); | ||
84 | |||
73 | return 0; | 85 | return 0; |
74 | } | 86 | } |
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index 1a4053789d01..18e45ec49bbf 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile | |||
@@ -47,12 +47,13 @@ FORCE : $(obj)/$(offsets-file) | |||
47 | 47 | ||
48 | ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ | 48 | ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ |
49 | asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ | 49 | asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ |
50 | KVM := ../../../virt/kvm | ||
50 | 51 | ||
51 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ | 52 | common-objs = $(KVM)/kvm_main.o $(KVM)/ioapic.o \ |
52 | coalesced_mmio.o irq_comm.o) | 53 | $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o |
53 | 54 | ||
54 | ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y) | 55 | ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y) |
55 | common-objs += $(addprefix ../../../virt/kvm/, assigned-dev.o iommu.o) | 56 | common-objs += $(KVM)/assigned-dev.o $(KVM)/iommu.o |
56 | endif | 57 | endif |
57 | 58 | ||
58 | kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o | 59 | kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o |
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 349ed85c7d61..08891d07aeb6 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h | |||
@@ -107,8 +107,9 @@ struct kvmppc_vcpu_book3s { | |||
107 | #define CONTEXT_GUEST 1 | 107 | #define CONTEXT_GUEST 1 |
108 | #define CONTEXT_GUEST_END 2 | 108 | #define CONTEXT_GUEST_END 2 |
109 | 109 | ||
110 | #define VSID_REAL 0x1fffffffffc00000ULL | 110 | #define VSID_REAL 0x0fffffffffc00000ULL |
111 | #define VSID_BAT 0x1fffffffffb00000ULL | 111 | #define VSID_BAT 0x0fffffffffb00000ULL |
112 | #define VSID_1T 0x1000000000000000ULL | ||
112 | #define VSID_REAL_DR 0x2000000000000000ULL | 113 | #define VSID_REAL_DR 0x2000000000000000ULL |
113 | #define VSID_REAL_IR 0x4000000000000000ULL | 114 | #define VSID_REAL_IR 0x4000000000000000ULL |
114 | #define VSID_PR 0x8000000000000000ULL | 115 | #define VSID_PR 0x8000000000000000ULL |
@@ -123,6 +124,7 @@ extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu); | |||
123 | extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu); | 124 | extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu); |
124 | extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); | 125 | extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); |
125 | extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); | 126 | extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); |
127 | extern void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong eaddr, ulong seg_size); | ||
126 | extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); | 128 | extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); |
127 | extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run, | 129 | extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run, |
128 | struct kvm_vcpu *vcpu, unsigned long addr, | 130 | struct kvm_vcpu *vcpu, unsigned long addr, |
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 422de3f4d46c..008cd856c5b5 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile | |||
@@ -5,9 +5,10 @@ | |||
5 | subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror | 5 | subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror |
6 | 6 | ||
7 | ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm | 7 | ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm |
8 | KVM := ../../../virt/kvm | ||
8 | 9 | ||
9 | common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \ | 10 | common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ |
10 | eventfd.o) | 11 | $(KVM)/eventfd.o |
11 | 12 | ||
12 | CFLAGS_44x_tlb.o := -I. | 13 | CFLAGS_44x_tlb.o := -I. |
13 | CFLAGS_e500_mmu.o := -I. | 14 | CFLAGS_e500_mmu.o := -I. |
@@ -53,7 +54,7 @@ kvm-e500mc-objs := \ | |||
53 | kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs) | 54 | kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs) |
54 | 55 | ||
55 | kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ | 56 | kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ |
56 | ../../../virt/kvm/coalesced_mmio.o \ | 57 | $(KVM)/coalesced_mmio.o \ |
57 | fpu.o \ | 58 | fpu.o \ |
58 | book3s_paired_singles.o \ | 59 | book3s_paired_singles.o \ |
59 | book3s_pr.o \ | 60 | book3s_pr.o \ |
@@ -86,8 +87,8 @@ kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ | |||
86 | book3s_xics.o | 87 | book3s_xics.o |
87 | 88 | ||
88 | kvm-book3s_64-module-objs := \ | 89 | kvm-book3s_64-module-objs := \ |
89 | ../../../virt/kvm/kvm_main.o \ | 90 | $(KVM)/kvm_main.o \ |
90 | ../../../virt/kvm/eventfd.o \ | 91 | $(KVM)/eventfd.o \ |
91 | powerpc.o \ | 92 | powerpc.o \ |
92 | emulate.o \ | 93 | emulate.o \ |
93 | book3s.o \ | 94 | book3s.o \ |
@@ -111,7 +112,7 @@ kvm-book3s_32-objs := \ | |||
111 | kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) | 112 | kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) |
112 | 113 | ||
113 | kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o | 114 | kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o |
114 | kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o) | 115 | kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o |
115 | 116 | ||
116 | kvm-objs := $(kvm-objs-m) $(kvm-objs-y) | 117 | kvm-objs := $(kvm-objs-m) $(kvm-objs-y) |
117 | 118 | ||
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index b871721c0050..739bfbadb85e 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <asm/tlbflush.h> | 26 | #include <asm/tlbflush.h> |
27 | #include <asm/kvm_ppc.h> | 27 | #include <asm/kvm_ppc.h> |
28 | #include <asm/kvm_book3s.h> | 28 | #include <asm/kvm_book3s.h> |
29 | #include <asm/mmu-hash64.h> | ||
29 | 30 | ||
30 | /* #define DEBUG_MMU */ | 31 | /* #define DEBUG_MMU */ |
31 | 32 | ||
@@ -76,6 +77,24 @@ static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe( | |||
76 | return NULL; | 77 | return NULL; |
77 | } | 78 | } |
78 | 79 | ||
80 | static int kvmppc_slb_sid_shift(struct kvmppc_slb *slbe) | ||
81 | { | ||
82 | return slbe->tb ? SID_SHIFT_1T : SID_SHIFT; | ||
83 | } | ||
84 | |||
85 | static u64 kvmppc_slb_offset_mask(struct kvmppc_slb *slbe) | ||
86 | { | ||
87 | return (1ul << kvmppc_slb_sid_shift(slbe)) - 1; | ||
88 | } | ||
89 | |||
90 | static u64 kvmppc_slb_calc_vpn(struct kvmppc_slb *slb, gva_t eaddr) | ||
91 | { | ||
92 | eaddr &= kvmppc_slb_offset_mask(slb); | ||
93 | |||
94 | return (eaddr >> VPN_SHIFT) | | ||
95 | ((slb->vsid) << (kvmppc_slb_sid_shift(slb) - VPN_SHIFT)); | ||
96 | } | ||
97 | |||
79 | static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, | 98 | static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, |
80 | bool data) | 99 | bool data) |
81 | { | 100 | { |
@@ -85,11 +104,7 @@ static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, | |||
85 | if (!slb) | 104 | if (!slb) |
86 | return 0; | 105 | return 0; |
87 | 106 | ||
88 | if (slb->tb) | 107 | return kvmppc_slb_calc_vpn(slb, eaddr); |
89 | return (((u64)eaddr >> 12) & 0xfffffff) | | ||
90 | (((u64)slb->vsid) << 28); | ||
91 | |||
92 | return (((u64)eaddr >> 12) & 0xffff) | (((u64)slb->vsid) << 16); | ||
93 | } | 108 | } |
94 | 109 | ||
95 | static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe) | 110 | static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe) |
@@ -100,7 +115,8 @@ static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe) | |||
100 | static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr) | 115 | static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr) |
101 | { | 116 | { |
102 | int p = kvmppc_mmu_book3s_64_get_pagesize(slbe); | 117 | int p = kvmppc_mmu_book3s_64_get_pagesize(slbe); |
103 | return ((eaddr & 0xfffffff) >> p); | 118 | |
119 | return ((eaddr & kvmppc_slb_offset_mask(slbe)) >> p); | ||
104 | } | 120 | } |
105 | 121 | ||
106 | static hva_t kvmppc_mmu_book3s_64_get_pteg( | 122 | static hva_t kvmppc_mmu_book3s_64_get_pteg( |
@@ -109,13 +125,15 @@ static hva_t kvmppc_mmu_book3s_64_get_pteg( | |||
109 | bool second) | 125 | bool second) |
110 | { | 126 | { |
111 | u64 hash, pteg, htabsize; | 127 | u64 hash, pteg, htabsize; |
112 | u32 page; | 128 | u32 ssize; |
113 | hva_t r; | 129 | hva_t r; |
130 | u64 vpn; | ||
114 | 131 | ||
115 | page = kvmppc_mmu_book3s_64_get_page(slbe, eaddr); | ||
116 | htabsize = ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1); | 132 | htabsize = ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1); |
117 | 133 | ||
118 | hash = slbe->vsid ^ page; | 134 | vpn = kvmppc_slb_calc_vpn(slbe, eaddr); |
135 | ssize = slbe->tb ? MMU_SEGSIZE_1T : MMU_SEGSIZE_256M; | ||
136 | hash = hpt_hash(vpn, kvmppc_mmu_book3s_64_get_pagesize(slbe), ssize); | ||
119 | if (second) | 137 | if (second) |
120 | hash = ~hash; | 138 | hash = ~hash; |
121 | hash &= ((1ULL << 39ULL) - 1ULL); | 139 | hash &= ((1ULL << 39ULL) - 1ULL); |
@@ -146,7 +164,7 @@ static u64 kvmppc_mmu_book3s_64_get_avpn(struct kvmppc_slb *slbe, gva_t eaddr) | |||
146 | u64 avpn; | 164 | u64 avpn; |
147 | 165 | ||
148 | avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr); | 166 | avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr); |
149 | avpn |= slbe->vsid << (28 - p); | 167 | avpn |= slbe->vsid << (kvmppc_slb_sid_shift(slbe) - p); |
150 | 168 | ||
151 | if (p < 24) | 169 | if (p < 24) |
152 | avpn >>= ((80 - p) - 56) - 8; | 170 | avpn >>= ((80 - p) - 56) - 8; |
@@ -167,7 +185,6 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, | |||
167 | int i; | 185 | int i; |
168 | u8 key = 0; | 186 | u8 key = 0; |
169 | bool found = false; | 187 | bool found = false; |
170 | bool perm_err = false; | ||
171 | int second = 0; | 188 | int second = 0; |
172 | ulong mp_ea = vcpu->arch.magic_page_ea; | 189 | ulong mp_ea = vcpu->arch.magic_page_ea; |
173 | 190 | ||
@@ -190,13 +207,15 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, | |||
190 | if (!slbe) | 207 | if (!slbe) |
191 | goto no_seg_found; | 208 | goto no_seg_found; |
192 | 209 | ||
210 | avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr); | ||
211 | if (slbe->tb) | ||
212 | avpn |= SLB_VSID_B_1T; | ||
213 | |||
193 | do_second: | 214 | do_second: |
194 | ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second); | 215 | ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second); |
195 | if (kvm_is_error_hva(ptegp)) | 216 | if (kvm_is_error_hva(ptegp)) |
196 | goto no_page_found; | 217 | goto no_page_found; |
197 | 218 | ||
198 | avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr); | ||
199 | |||
200 | if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) { | 219 | if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) { |
201 | printk(KERN_ERR "KVM can't copy data from 0x%lx!\n", ptegp); | 220 | printk(KERN_ERR "KVM can't copy data from 0x%lx!\n", ptegp); |
202 | goto no_page_found; | 221 | goto no_page_found; |
@@ -219,7 +238,7 @@ do_second: | |||
219 | continue; | 238 | continue; |
220 | 239 | ||
221 | /* AVPN compare */ | 240 | /* AVPN compare */ |
222 | if (HPTE_V_AVPN_VAL(avpn) == HPTE_V_AVPN_VAL(v)) { | 241 | if (HPTE_V_COMPARE(avpn, v)) { |
223 | u8 pp = (r & HPTE_R_PP) | key; | 242 | u8 pp = (r & HPTE_R_PP) | key; |
224 | int eaddr_mask = 0xFFF; | 243 | int eaddr_mask = 0xFFF; |
225 | 244 | ||
@@ -248,11 +267,6 @@ do_second: | |||
248 | break; | 267 | break; |
249 | } | 268 | } |
250 | 269 | ||
251 | if (!gpte->may_read) { | ||
252 | perm_err = true; | ||
253 | continue; | ||
254 | } | ||
255 | |||
256 | dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx " | 270 | dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx " |
257 | "-> 0x%lx\n", | 271 | "-> 0x%lx\n", |
258 | eaddr, avpn, gpte->vpage, gpte->raddr); | 272 | eaddr, avpn, gpte->vpage, gpte->raddr); |
@@ -281,6 +295,8 @@ do_second: | |||
281 | if (pteg[i+1] != oldr) | 295 | if (pteg[i+1] != oldr) |
282 | copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); | 296 | copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); |
283 | 297 | ||
298 | if (!gpte->may_read) | ||
299 | return -EPERM; | ||
284 | return 0; | 300 | return 0; |
285 | } else { | 301 | } else { |
286 | dprintk("KVM MMU: No PTE found (ea=0x%lx sdr1=0x%llx " | 302 | dprintk("KVM MMU: No PTE found (ea=0x%lx sdr1=0x%llx " |
@@ -296,13 +312,7 @@ do_second: | |||
296 | } | 312 | } |
297 | } | 313 | } |
298 | 314 | ||
299 | |||
300 | no_page_found: | 315 | no_page_found: |
301 | |||
302 | |||
303 | if (perm_err) | ||
304 | return -EPERM; | ||
305 | |||
306 | return -ENOENT; | 316 | return -ENOENT; |
307 | 317 | ||
308 | no_seg_found: | 318 | no_seg_found: |
@@ -334,7 +344,7 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb) | |||
334 | slbe->large = (rs & SLB_VSID_L) ? 1 : 0; | 344 | slbe->large = (rs & SLB_VSID_L) ? 1 : 0; |
335 | slbe->tb = (rs & SLB_VSID_B_1T) ? 1 : 0; | 345 | slbe->tb = (rs & SLB_VSID_B_1T) ? 1 : 0; |
336 | slbe->esid = slbe->tb ? esid_1t : esid; | 346 | slbe->esid = slbe->tb ? esid_1t : esid; |
337 | slbe->vsid = rs >> 12; | 347 | slbe->vsid = (rs & ~SLB_VSID_B) >> (kvmppc_slb_sid_shift(slbe) - 16); |
338 | slbe->valid = (rb & SLB_ESID_V) ? 1 : 0; | 348 | slbe->valid = (rb & SLB_ESID_V) ? 1 : 0; |
339 | slbe->Ks = (rs & SLB_VSID_KS) ? 1 : 0; | 349 | slbe->Ks = (rs & SLB_VSID_KS) ? 1 : 0; |
340 | slbe->Kp = (rs & SLB_VSID_KP) ? 1 : 0; | 350 | slbe->Kp = (rs & SLB_VSID_KP) ? 1 : 0; |
@@ -375,6 +385,7 @@ static u64 kvmppc_mmu_book3s_64_slbmfev(struct kvm_vcpu *vcpu, u64 slb_nr) | |||
375 | static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea) | 385 | static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea) |
376 | { | 386 | { |
377 | struct kvmppc_slb *slbe; | 387 | struct kvmppc_slb *slbe; |
388 | u64 seg_size; | ||
378 | 389 | ||
379 | dprintk("KVM MMU: slbie(0x%llx)\n", ea); | 390 | dprintk("KVM MMU: slbie(0x%llx)\n", ea); |
380 | 391 | ||
@@ -386,8 +397,11 @@ static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea) | |||
386 | dprintk("KVM MMU: slbie(0x%llx, 0x%llx)\n", ea, slbe->esid); | 397 | dprintk("KVM MMU: slbie(0x%llx, 0x%llx)\n", ea, slbe->esid); |
387 | 398 | ||
388 | slbe->valid = false; | 399 | slbe->valid = false; |
400 | slbe->orige = 0; | ||
401 | slbe->origv = 0; | ||
389 | 402 | ||
390 | kvmppc_mmu_map_segment(vcpu, ea); | 403 | seg_size = 1ull << kvmppc_slb_sid_shift(slbe); |
404 | kvmppc_mmu_flush_segment(vcpu, ea & ~(seg_size - 1), seg_size); | ||
391 | } | 405 | } |
392 | 406 | ||
393 | static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu) | 407 | static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu) |
@@ -396,8 +410,11 @@ static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu) | |||
396 | 410 | ||
397 | dprintk("KVM MMU: slbia()\n"); | 411 | dprintk("KVM MMU: slbia()\n"); |
398 | 412 | ||
399 | for (i = 1; i < vcpu->arch.slb_nr; i++) | 413 | for (i = 1; i < vcpu->arch.slb_nr; i++) { |
400 | vcpu->arch.slb[i].valid = false; | 414 | vcpu->arch.slb[i].valid = false; |
415 | vcpu->arch.slb[i].orige = 0; | ||
416 | vcpu->arch.slb[i].origv = 0; | ||
417 | } | ||
401 | 418 | ||
402 | if (vcpu->arch.shared->msr & MSR_IR) { | 419 | if (vcpu->arch.shared->msr & MSR_IR) { |
403 | kvmppc_mmu_flush_segments(vcpu); | 420 | kvmppc_mmu_flush_segments(vcpu); |
@@ -467,8 +484,14 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, | |||
467 | 484 | ||
468 | if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { | 485 | if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { |
469 | slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea); | 486 | slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea); |
470 | if (slb) | 487 | if (slb) { |
471 | gvsid = slb->vsid; | 488 | gvsid = slb->vsid; |
489 | if (slb->tb) { | ||
490 | gvsid <<= SID_SHIFT_1T - SID_SHIFT; | ||
491 | gvsid |= esid & ((1ul << (SID_SHIFT_1T - SID_SHIFT)) - 1); | ||
492 | gvsid |= VSID_1T; | ||
493 | } | ||
494 | } | ||
472 | } | 495 | } |
473 | 496 | ||
474 | switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { | 497 | switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 3a9a1aceb14f..b350d9494b26 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c | |||
@@ -301,6 +301,23 @@ out: | |||
301 | return r; | 301 | return r; |
302 | } | 302 | } |
303 | 303 | ||
304 | void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong ea, ulong seg_size) | ||
305 | { | ||
306 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); | ||
307 | ulong seg_mask = -seg_size; | ||
308 | int i; | ||
309 | |||
310 | for (i = 1; i < svcpu->slb_max; i++) { | ||
311 | if ((svcpu->slb[i].esid & SLB_ESID_V) && | ||
312 | (svcpu->slb[i].esid & seg_mask) == ea) { | ||
313 | /* Invalidate this entry */ | ||
314 | svcpu->slb[i].esid = 0; | ||
315 | } | ||
316 | } | ||
317 | |||
318 | svcpu_put(svcpu); | ||
319 | } | ||
320 | |||
304 | void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) | 321 | void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) |
305 | { | 322 | { |
306 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); | 323 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); |
@@ -325,9 +342,9 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu) | |||
325 | return -1; | 342 | return -1; |
326 | vcpu3s->context_id[0] = err; | 343 | vcpu3s->context_id[0] = err; |
327 | 344 | ||
328 | vcpu3s->proto_vsid_max = ((vcpu3s->context_id[0] + 1) | 345 | vcpu3s->proto_vsid_max = ((u64)(vcpu3s->context_id[0] + 1) |
329 | << ESID_BITS) - 1; | 346 | << ESID_BITS) - 1; |
330 | vcpu3s->proto_vsid_first = vcpu3s->context_id[0] << ESID_BITS; | 347 | vcpu3s->proto_vsid_first = (u64)vcpu3s->context_id[0] << ESID_BITS; |
331 | vcpu3s->proto_vsid_next = vcpu3s->proto_vsid_first; | 348 | vcpu3s->proto_vsid_next = vcpu3s->proto_vsid_first; |
332 | 349 | ||
333 | kvmppc_mmu_hpte_init(vcpu); | 350 | kvmppc_mmu_hpte_init(vcpu); |
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S index 56b983e7b738..4f0caecc0f9d 100644 --- a/arch/powerpc/kvm/book3s_64_slb.S +++ b/arch/powerpc/kvm/book3s_64_slb.S | |||
@@ -66,10 +66,6 @@ slb_exit_skip_ ## num: | |||
66 | 66 | ||
67 | ld r12, PACA_SLBSHADOWPTR(r13) | 67 | ld r12, PACA_SLBSHADOWPTR(r13) |
68 | 68 | ||
69 | /* Save off the first entry so we can slbie it later */ | ||
70 | ld r10, SHADOW_SLB_ESID(0)(r12) | ||
71 | ld r11, SHADOW_SLB_VSID(0)(r12) | ||
72 | |||
73 | /* Remove bolted entries */ | 69 | /* Remove bolted entries */ |
74 | UNBOLT_SLB_ENTRY(0) | 70 | UNBOLT_SLB_ENTRY(0) |
75 | UNBOLT_SLB_ENTRY(1) | 71 | UNBOLT_SLB_ENTRY(1) |
@@ -81,15 +77,10 @@ slb_exit_skip_ ## num: | |||
81 | 77 | ||
82 | /* Flush SLB */ | 78 | /* Flush SLB */ |
83 | 79 | ||
80 | li r10, 0 | ||
81 | slbmte r10, r10 | ||
84 | slbia | 82 | slbia |
85 | 83 | ||
86 | /* r0 = esid & ESID_MASK */ | ||
87 | rldicr r10, r10, 0, 35 | ||
88 | /* r0 |= CLASS_BIT(VSID) */ | ||
89 | rldic r12, r11, 56 - 36, 36 | ||
90 | or r10, r10, r12 | ||
91 | slbie r10 | ||
92 | |||
93 | /* Fill SLB with our shadow */ | 84 | /* Fill SLB with our shadow */ |
94 | 85 | ||
95 | lbz r12, SVCPU_SLB_MAX(r3) | 86 | lbz r12, SVCPU_SLB_MAX(r3) |
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index bdc40b8e77d9..19498a567a81 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c | |||
@@ -1239,8 +1239,7 @@ out: | |||
1239 | #ifdef CONFIG_PPC64 | 1239 | #ifdef CONFIG_PPC64 |
1240 | int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) | 1240 | int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) |
1241 | { | 1241 | { |
1242 | /* No flags */ | 1242 | info->flags = KVM_PPC_1T_SEGMENTS; |
1243 | info->flags = 0; | ||
1244 | 1243 | ||
1245 | /* SLB is always 64 entries */ | 1244 | /* SLB is always 64 entries */ |
1246 | info->slb_size = 64; | 1245 | info->slb_size = 64; |
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 1a1b51189773..dcc94f016007 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c | |||
@@ -796,7 +796,7 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu, | |||
796 | kvmppc_fill_pt_regs(®s); | 796 | kvmppc_fill_pt_regs(®s); |
797 | timer_interrupt(®s); | 797 | timer_interrupt(®s); |
798 | break; | 798 | break; |
799 | #if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3E_64) | 799 | #if defined(CONFIG_PPC_DOORBELL) |
800 | case BOOKE_INTERRUPT_DOORBELL: | 800 | case BOOKE_INTERRUPT_DOORBELL: |
801 | kvmppc_fill_pt_regs(®s); | 801 | kvmppc_fill_pt_regs(®s); |
802 | doorbell_exception(®s); | 802 | doorbell_exception(®s); |
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 631a2650e4e4..2c52ada30775 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c | |||
@@ -169,6 +169,9 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) | |||
169 | vcpu->arch.shared->sprg3 = spr_val; | 169 | vcpu->arch.shared->sprg3 = spr_val; |
170 | break; | 170 | break; |
171 | 171 | ||
172 | /* PIR can legally be written, but we ignore it */ | ||
173 | case SPRN_PIR: break; | ||
174 | |||
172 | default: | 175 | default: |
173 | emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, | 176 | emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, |
174 | spr_val); | 177 | spr_val); |
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 16bd5d169cdb..3238d4004e84 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h | |||
@@ -62,13 +62,20 @@ struct sca_block { | |||
62 | #define CPUSTAT_MCDS 0x00000100 | 62 | #define CPUSTAT_MCDS 0x00000100 |
63 | #define CPUSTAT_SM 0x00000080 | 63 | #define CPUSTAT_SM 0x00000080 |
64 | #define CPUSTAT_G 0x00000008 | 64 | #define CPUSTAT_G 0x00000008 |
65 | #define CPUSTAT_GED 0x00000004 | ||
65 | #define CPUSTAT_J 0x00000002 | 66 | #define CPUSTAT_J 0x00000002 |
66 | #define CPUSTAT_P 0x00000001 | 67 | #define CPUSTAT_P 0x00000001 |
67 | 68 | ||
68 | struct kvm_s390_sie_block { | 69 | struct kvm_s390_sie_block { |
69 | atomic_t cpuflags; /* 0x0000 */ | 70 | atomic_t cpuflags; /* 0x0000 */ |
70 | __u32 prefix; /* 0x0004 */ | 71 | __u32 prefix; /* 0x0004 */ |
71 | __u8 reserved8[32]; /* 0x0008 */ | 72 | __u8 reserved08[4]; /* 0x0008 */ |
73 | #define PROG_IN_SIE (1<<0) | ||
74 | __u32 prog0c; /* 0x000c */ | ||
75 | __u8 reserved10[16]; /* 0x0010 */ | ||
76 | #define PROG_BLOCK_SIE 0x00000001 | ||
77 | atomic_t prog20; /* 0x0020 */ | ||
78 | __u8 reserved24[4]; /* 0x0024 */ | ||
72 | __u64 cputm; /* 0x0028 */ | 79 | __u64 cputm; /* 0x0028 */ |
73 | __u64 ckc; /* 0x0030 */ | 80 | __u64 ckc; /* 0x0030 */ |
74 | __u64 epoch; /* 0x0038 */ | 81 | __u64 epoch; /* 0x0038 */ |
@@ -90,7 +97,8 @@ struct kvm_s390_sie_block { | |||
90 | __u32 scaoh; /* 0x005c */ | 97 | __u32 scaoh; /* 0x005c */ |
91 | __u8 reserved60; /* 0x0060 */ | 98 | __u8 reserved60; /* 0x0060 */ |
92 | __u8 ecb; /* 0x0061 */ | 99 | __u8 ecb; /* 0x0061 */ |
93 | __u8 reserved62[2]; /* 0x0062 */ | 100 | __u8 ecb2; /* 0x0062 */ |
101 | __u8 reserved63[1]; /* 0x0063 */ | ||
94 | __u32 scaol; /* 0x0064 */ | 102 | __u32 scaol; /* 0x0064 */ |
95 | __u8 reserved68[4]; /* 0x0068 */ | 103 | __u8 reserved68[4]; /* 0x0068 */ |
96 | __u32 todpr; /* 0x006c */ | 104 | __u32 todpr; /* 0x006c */ |
@@ -130,6 +138,7 @@ struct kvm_vcpu_stat { | |||
130 | u32 deliver_program_int; | 138 | u32 deliver_program_int; |
131 | u32 deliver_io_int; | 139 | u32 deliver_io_int; |
132 | u32 exit_wait_state; | 140 | u32 exit_wait_state; |
141 | u32 instruction_pfmf; | ||
133 | u32 instruction_stidp; | 142 | u32 instruction_stidp; |
134 | u32 instruction_spx; | 143 | u32 instruction_spx; |
135 | u32 instruction_stpx; | 144 | u32 instruction_stpx; |
@@ -166,7 +175,7 @@ struct kvm_s390_ext_info { | |||
166 | }; | 175 | }; |
167 | 176 | ||
168 | #define PGM_OPERATION 0x01 | 177 | #define PGM_OPERATION 0x01 |
169 | #define PGM_PRIVILEGED_OPERATION 0x02 | 178 | #define PGM_PRIVILEGED_OP 0x02 |
170 | #define PGM_EXECUTE 0x03 | 179 | #define PGM_EXECUTE 0x03 |
171 | #define PGM_PROTECTION 0x04 | 180 | #define PGM_PROTECTION 0x04 |
172 | #define PGM_ADDRESSING 0x05 | 181 | #define PGM_ADDRESSING 0x05 |
@@ -219,7 +228,7 @@ struct kvm_s390_local_interrupt { | |||
219 | atomic_t active; | 228 | atomic_t active; |
220 | struct kvm_s390_float_interrupt *float_int; | 229 | struct kvm_s390_float_interrupt *float_int; |
221 | int timer_due; /* event indicator for waitqueue below */ | 230 | int timer_due; /* event indicator for waitqueue below */ |
222 | wait_queue_head_t wq; | 231 | wait_queue_head_t *wq; |
223 | atomic_t *cpuflags; | 232 | atomic_t *cpuflags; |
224 | unsigned int action_bits; | 233 | unsigned int action_bits; |
225 | }; | 234 | }; |
@@ -266,4 +275,5 @@ struct kvm_arch{ | |||
266 | }; | 275 | }; |
267 | 276 | ||
268 | extern int sie64a(struct kvm_s390_sie_block *, u64 *); | 277 | extern int sie64a(struct kvm_s390_sie_block *, u64 *); |
278 | extern char sie_exit; | ||
269 | #endif | 279 | #endif |
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 5f0173a31693..1141fb3e7b21 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h | |||
@@ -14,3 +14,13 @@ | |||
14 | /* Per-CPU flags for PMU states */ | 14 | /* Per-CPU flags for PMU states */ |
15 | #define PMU_F_RESERVED 0x1000 | 15 | #define PMU_F_RESERVED 0x1000 |
16 | #define PMU_F_ENABLED 0x2000 | 16 | #define PMU_F_ENABLED 0x2000 |
17 | |||
18 | #ifdef CONFIG_64BIT | ||
19 | |||
20 | /* Perf callbacks */ | ||
21 | struct pt_regs; | ||
22 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); | ||
23 | extern unsigned long perf_misc_flags(struct pt_regs *regs); | ||
24 | #define perf_misc_flags(regs) perf_misc_flags(regs) | ||
25 | |||
26 | #endif /* CONFIG_64BIT */ | ||
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 9aefa3c64eb2..0ea4e591fa78 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h | |||
@@ -296,18 +296,16 @@ extern unsigned long MODULES_END; | |||
296 | #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) | 296 | #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) |
297 | 297 | ||
298 | /* Page status table bits for virtualization */ | 298 | /* Page status table bits for virtualization */ |
299 | #define RCP_ACC_BITS 0xf0000000UL | 299 | #define PGSTE_ACC_BITS 0xf0000000UL |
300 | #define RCP_FP_BIT 0x08000000UL | 300 | #define PGSTE_FP_BIT 0x08000000UL |
301 | #define RCP_PCL_BIT 0x00800000UL | 301 | #define PGSTE_PCL_BIT 0x00800000UL |
302 | #define RCP_HR_BIT 0x00400000UL | 302 | #define PGSTE_HR_BIT 0x00400000UL |
303 | #define RCP_HC_BIT 0x00200000UL | 303 | #define PGSTE_HC_BIT 0x00200000UL |
304 | #define RCP_GR_BIT 0x00040000UL | 304 | #define PGSTE_GR_BIT 0x00040000UL |
305 | #define RCP_GC_BIT 0x00020000UL | 305 | #define PGSTE_GC_BIT 0x00020000UL |
306 | #define RCP_IN_BIT 0x00002000UL /* IPTE notify bit */ | 306 | #define PGSTE_UR_BIT 0x00008000UL |
307 | 307 | #define PGSTE_UC_BIT 0x00004000UL /* user dirty (migration) */ | |
308 | /* User dirty / referenced bit for KVM's migration feature */ | 308 | #define PGSTE_IN_BIT 0x00002000UL /* IPTE notify bit */ |
309 | #define KVM_UR_BIT 0x00008000UL | ||
310 | #define KVM_UC_BIT 0x00004000UL | ||
311 | 309 | ||
312 | #else /* CONFIG_64BIT */ | 310 | #else /* CONFIG_64BIT */ |
313 | 311 | ||
@@ -364,18 +362,16 @@ extern unsigned long MODULES_END; | |||
364 | | _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO) | 362 | | _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO) |
365 | 363 | ||
366 | /* Page status table bits for virtualization */ | 364 | /* Page status table bits for virtualization */ |
367 | #define RCP_ACC_BITS 0xf000000000000000UL | 365 | #define PGSTE_ACC_BITS 0xf000000000000000UL |
368 | #define RCP_FP_BIT 0x0800000000000000UL | 366 | #define PGSTE_FP_BIT 0x0800000000000000UL |
369 | #define RCP_PCL_BIT 0x0080000000000000UL | 367 | #define PGSTE_PCL_BIT 0x0080000000000000UL |
370 | #define RCP_HR_BIT 0x0040000000000000UL | 368 | #define PGSTE_HR_BIT 0x0040000000000000UL |
371 | #define RCP_HC_BIT 0x0020000000000000UL | 369 | #define PGSTE_HC_BIT 0x0020000000000000UL |
372 | #define RCP_GR_BIT 0x0004000000000000UL | 370 | #define PGSTE_GR_BIT 0x0004000000000000UL |
373 | #define RCP_GC_BIT 0x0002000000000000UL | 371 | #define PGSTE_GC_BIT 0x0002000000000000UL |
374 | #define RCP_IN_BIT 0x0000200000000000UL /* IPTE notify bit */ | 372 | #define PGSTE_UR_BIT 0x0000800000000000UL |
375 | 373 | #define PGSTE_UC_BIT 0x0000400000000000UL /* user dirty (migration) */ | |
376 | /* User dirty / referenced bit for KVM's migration feature */ | 374 | #define PGSTE_IN_BIT 0x0000200000000000UL /* IPTE notify bit */ |
377 | #define KVM_UR_BIT 0x0000800000000000UL | ||
378 | #define KVM_UC_BIT 0x0000400000000000UL | ||
379 | 375 | ||
380 | #endif /* CONFIG_64BIT */ | 376 | #endif /* CONFIG_64BIT */ |
381 | 377 | ||
@@ -615,8 +611,8 @@ static inline pgste_t pgste_get_lock(pte_t *ptep) | |||
615 | asm( | 611 | asm( |
616 | " lg %0,%2\n" | 612 | " lg %0,%2\n" |
617 | "0: lgr %1,%0\n" | 613 | "0: lgr %1,%0\n" |
618 | " nihh %0,0xff7f\n" /* clear RCP_PCL_BIT in old */ | 614 | " nihh %0,0xff7f\n" /* clear PCL bit in old */ |
619 | " oihh %1,0x0080\n" /* set RCP_PCL_BIT in new */ | 615 | " oihh %1,0x0080\n" /* set PCL bit in new */ |
620 | " csg %0,%1,%2\n" | 616 | " csg %0,%1,%2\n" |
621 | " jl 0b\n" | 617 | " jl 0b\n" |
622 | : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) | 618 | : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) |
@@ -629,7 +625,7 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) | |||
629 | { | 625 | { |
630 | #ifdef CONFIG_PGSTE | 626 | #ifdef CONFIG_PGSTE |
631 | asm( | 627 | asm( |
632 | " nihh %1,0xff7f\n" /* clear RCP_PCL_BIT */ | 628 | " nihh %1,0xff7f\n" /* clear PCL bit */ |
633 | " stg %1,%0\n" | 629 | " stg %1,%0\n" |
634 | : "=Q" (ptep[PTRS_PER_PTE]) | 630 | : "=Q" (ptep[PTRS_PER_PTE]) |
635 | : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) | 631 | : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) |
@@ -662,14 +658,14 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) | |||
662 | else if (bits) | 658 | else if (bits) |
663 | page_reset_referenced(address); | 659 | page_reset_referenced(address); |
664 | /* Transfer page changed & referenced bit to guest bits in pgste */ | 660 | /* Transfer page changed & referenced bit to guest bits in pgste */ |
665 | pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */ | 661 | pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ |
666 | /* Get host changed & referenced bits from pgste */ | 662 | /* Get host changed & referenced bits from pgste */ |
667 | bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52; | 663 | bits |= (pgste_val(pgste) & (PGSTE_HR_BIT | PGSTE_HC_BIT)) >> 52; |
668 | /* Transfer page changed & referenced bit to kvm user bits */ | 664 | /* Transfer page changed & referenced bit to kvm user bits */ |
669 | pgste_val(pgste) |= bits << 45; /* KVM_UR_BIT & KVM_UC_BIT */ | 665 | pgste_val(pgste) |= bits << 45; /* PGSTE_UR_BIT & PGSTE_UC_BIT */ |
670 | /* Clear relevant host bits in pgste. */ | 666 | /* Clear relevant host bits in pgste. */ |
671 | pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT); | 667 | pgste_val(pgste) &= ~(PGSTE_HR_BIT | PGSTE_HC_BIT); |
672 | pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT); | 668 | pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); |
673 | /* Copy page access key and fetch protection bit to pgste */ | 669 | /* Copy page access key and fetch protection bit to pgste */ |
674 | pgste_val(pgste) |= | 670 | pgste_val(pgste) |= |
675 | (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; | 671 | (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; |
@@ -690,15 +686,15 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) | |||
690 | /* Get referenced bit from storage key */ | 686 | /* Get referenced bit from storage key */ |
691 | young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); | 687 | young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); |
692 | if (young) | 688 | if (young) |
693 | pgste_val(pgste) |= RCP_GR_BIT; | 689 | pgste_val(pgste) |= PGSTE_GR_BIT; |
694 | /* Get host referenced bit from pgste */ | 690 | /* Get host referenced bit from pgste */ |
695 | if (pgste_val(pgste) & RCP_HR_BIT) { | 691 | if (pgste_val(pgste) & PGSTE_HR_BIT) { |
696 | pgste_val(pgste) &= ~RCP_HR_BIT; | 692 | pgste_val(pgste) &= ~PGSTE_HR_BIT; |
697 | young = 1; | 693 | young = 1; |
698 | } | 694 | } |
699 | /* Transfer referenced bit to kvm user bits and pte */ | 695 | /* Transfer referenced bit to kvm user bits and pte */ |
700 | if (young) { | 696 | if (young) { |
701 | pgste_val(pgste) |= KVM_UR_BIT; | 697 | pgste_val(pgste) |= PGSTE_UR_BIT; |
702 | pte_val(*ptep) |= _PAGE_SWR; | 698 | pte_val(*ptep) |= _PAGE_SWR; |
703 | } | 699 | } |
704 | #endif | 700 | #endif |
@@ -720,7 +716,7 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry) | |||
720 | * The guest C/R information is still in the PGSTE, set real | 716 | * The guest C/R information is still in the PGSTE, set real |
721 | * key C/R to 0. | 717 | * key C/R to 0. |
722 | */ | 718 | */ |
723 | nkey = (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56; | 719 | nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; |
724 | page_set_storage_key(address, nkey, 0); | 720 | page_set_storage_key(address, nkey, 0); |
725 | #endif | 721 | #endif |
726 | } | 722 | } |
@@ -750,6 +746,7 @@ struct gmap { | |||
750 | struct mm_struct *mm; | 746 | struct mm_struct *mm; |
751 | unsigned long *table; | 747 | unsigned long *table; |
752 | unsigned long asce; | 748 | unsigned long asce; |
749 | void *private; | ||
753 | struct list_head crst_list; | 750 | struct list_head crst_list; |
754 | }; | 751 | }; |
755 | 752 | ||
@@ -808,8 +805,8 @@ static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, | |||
808 | pte_t *ptep, pgste_t pgste) | 805 | pte_t *ptep, pgste_t pgste) |
809 | { | 806 | { |
810 | #ifdef CONFIG_PGSTE | 807 | #ifdef CONFIG_PGSTE |
811 | if (pgste_val(pgste) & RCP_IN_BIT) { | 808 | if (pgste_val(pgste) & PGSTE_IN_BIT) { |
812 | pgste_val(pgste) &= ~RCP_IN_BIT; | 809 | pgste_val(pgste) &= ~PGSTE_IN_BIT; |
813 | gmap_do_ipte_notify(mm, addr, ptep); | 810 | gmap_do_ipte_notify(mm, addr, ptep); |
814 | } | 811 | } |
815 | #endif | 812 | #endif |
@@ -977,8 +974,8 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, | |||
977 | if (mm_has_pgste(mm)) { | 974 | if (mm_has_pgste(mm)) { |
978 | pgste = pgste_get_lock(ptep); | 975 | pgste = pgste_get_lock(ptep); |
979 | pgste = pgste_update_all(ptep, pgste); | 976 | pgste = pgste_update_all(ptep, pgste); |
980 | dirty = !!(pgste_val(pgste) & KVM_UC_BIT); | 977 | dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); |
981 | pgste_val(pgste) &= ~KVM_UC_BIT; | 978 | pgste_val(pgste) &= ~PGSTE_UC_BIT; |
982 | pgste_set_unlock(ptep, pgste); | 979 | pgste_set_unlock(ptep, pgste); |
983 | return dirty; | 980 | return dirty; |
984 | } | 981 | } |
@@ -997,8 +994,8 @@ static inline int ptep_test_and_clear_user_young(struct mm_struct *mm, | |||
997 | if (mm_has_pgste(mm)) { | 994 | if (mm_has_pgste(mm)) { |
998 | pgste = pgste_get_lock(ptep); | 995 | pgste = pgste_get_lock(ptep); |
999 | pgste = pgste_update_young(ptep, pgste); | 996 | pgste = pgste_update_young(ptep, pgste); |
1000 | young = !!(pgste_val(pgste) & KVM_UR_BIT); | 997 | young = !!(pgste_val(pgste) & PGSTE_UR_BIT); |
1001 | pgste_val(pgste) &= ~KVM_UR_BIT; | 998 | pgste_val(pgste) &= ~PGSTE_UR_BIT; |
1002 | pgste_set_unlock(ptep, pgste); | 999 | pgste_set_unlock(ptep, pgste); |
1003 | } | 1000 | } |
1004 | return young; | 1001 | return young; |
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index d6de844bc30a..2416138ebd3e 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #define ASM_OFFSETS_C | 7 | #define ASM_OFFSETS_C |
8 | 8 | ||
9 | #include <linux/kbuild.h> | 9 | #include <linux/kbuild.h> |
10 | #include <linux/kvm_host.h> | ||
10 | #include <linux/sched.h> | 11 | #include <linux/sched.h> |
11 | #include <asm/cputime.h> | 12 | #include <asm/cputime.h> |
12 | #include <asm/vdso.h> | 13 | #include <asm/vdso.h> |
@@ -162,6 +163,8 @@ int main(void) | |||
162 | DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb)); | 163 | DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb)); |
163 | DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb)); | 164 | DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb)); |
164 | DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce)); | 165 | DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce)); |
166 | DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c)); | ||
167 | DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20)); | ||
165 | #endif /* CONFIG_32BIT */ | 168 | #endif /* CONFIG_32BIT */ |
166 | return 0; | 169 | return 0; |
167 | } | 170 | } |
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index bc5864c5148b..1c039d0c24c7 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S | |||
@@ -47,7 +47,6 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ | |||
47 | _TIF_MCCK_PENDING) | 47 | _TIF_MCCK_PENDING) |
48 | _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ | 48 | _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ |
49 | _TIF_SYSCALL_TRACEPOINT) | 49 | _TIF_SYSCALL_TRACEPOINT) |
50 | _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) | ||
51 | 50 | ||
52 | #define BASED(name) name-system_call(%r13) | 51 | #define BASED(name) name-system_call(%r13) |
53 | 52 | ||
@@ -81,23 +80,27 @@ _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) | |||
81 | #endif | 80 | #endif |
82 | .endm | 81 | .endm |
83 | 82 | ||
84 | .macro HANDLE_SIE_INTERCEPT scratch,pgmcheck | 83 | .macro HANDLE_SIE_INTERCEPT scratch,reason |
85 | #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) | 84 | #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) |
86 | tmhh %r8,0x0001 # interrupting from user ? | 85 | tmhh %r8,0x0001 # interrupting from user ? |
87 | jnz .+42 | 86 | jnz .+62 |
88 | lgr \scratch,%r9 | 87 | lgr \scratch,%r9 |
89 | slg \scratch,BASED(.Lsie_loop) | 88 | slg \scratch,BASED(.Lsie_critical) |
90 | clg \scratch,BASED(.Lsie_length) | 89 | clg \scratch,BASED(.Lsie_critical_length) |
91 | .if \pgmcheck | 90 | .if \reason==1 |
92 | # Some program interrupts are suppressing (e.g. protection). | 91 | # Some program interrupts are suppressing (e.g. protection). |
93 | # We must also check the instruction after SIE in that case. | 92 | # We must also check the instruction after SIE in that case. |
94 | # do_protection_exception will rewind to rewind_pad | 93 | # do_protection_exception will rewind to rewind_pad |
95 | jh .+22 | 94 | jh .+42 |
96 | .else | 95 | .else |
97 | jhe .+22 | 96 | jhe .+42 |
98 | .endif | 97 | .endif |
99 | lg %r9,BASED(.Lsie_loop) | 98 | lg %r14,__SF_EMPTY(%r15) # get control block pointer |
100 | LPP BASED(.Lhost_id) # set host id | 99 | LPP __SF_EMPTY+16(%r15) # set host id |
100 | ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE | ||
101 | lctlg %c1,%c1,__LC_USER_ASCE # load primary asce | ||
102 | larl %r9,sie_exit # skip forward to sie_exit | ||
103 | mvi __SF_EMPTY+31(%r15),\reason # set exit reason | ||
101 | #endif | 104 | #endif |
102 | .endm | 105 | .endm |
103 | 106 | ||
@@ -450,7 +453,7 @@ ENTRY(io_int_handler) | |||
450 | lg %r12,__LC_THREAD_INFO | 453 | lg %r12,__LC_THREAD_INFO |
451 | larl %r13,system_call | 454 | larl %r13,system_call |
452 | lmg %r8,%r9,__LC_IO_OLD_PSW | 455 | lmg %r8,%r9,__LC_IO_OLD_PSW |
453 | HANDLE_SIE_INTERCEPT %r14,0 | 456 | HANDLE_SIE_INTERCEPT %r14,2 |
454 | SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT | 457 | SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT |
455 | tmhh %r8,0x0001 # interrupting from user? | 458 | tmhh %r8,0x0001 # interrupting from user? |
456 | jz io_skip | 459 | jz io_skip |
@@ -603,7 +606,7 @@ ENTRY(ext_int_handler) | |||
603 | lg %r12,__LC_THREAD_INFO | 606 | lg %r12,__LC_THREAD_INFO |
604 | larl %r13,system_call | 607 | larl %r13,system_call |
605 | lmg %r8,%r9,__LC_EXT_OLD_PSW | 608 | lmg %r8,%r9,__LC_EXT_OLD_PSW |
606 | HANDLE_SIE_INTERCEPT %r14,0 | 609 | HANDLE_SIE_INTERCEPT %r14,3 |
607 | SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT | 610 | SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT |
608 | tmhh %r8,0x0001 # interrupting from user ? | 611 | tmhh %r8,0x0001 # interrupting from user ? |
609 | jz ext_skip | 612 | jz ext_skip |
@@ -651,7 +654,7 @@ ENTRY(mcck_int_handler) | |||
651 | lg %r12,__LC_THREAD_INFO | 654 | lg %r12,__LC_THREAD_INFO |
652 | larl %r13,system_call | 655 | larl %r13,system_call |
653 | lmg %r8,%r9,__LC_MCK_OLD_PSW | 656 | lmg %r8,%r9,__LC_MCK_OLD_PSW |
654 | HANDLE_SIE_INTERCEPT %r14,0 | 657 | HANDLE_SIE_INTERCEPT %r14,4 |
655 | tm __LC_MCCK_CODE,0x80 # system damage? | 658 | tm __LC_MCCK_CODE,0x80 # system damage? |
656 | jo mcck_panic # yes -> rest of mcck code invalid | 659 | jo mcck_panic # yes -> rest of mcck code invalid |
657 | lghi %r14,__LC_CPU_TIMER_SAVE_AREA | 660 | lghi %r14,__LC_CPU_TIMER_SAVE_AREA |
@@ -945,56 +948,50 @@ ENTRY(sie64a) | |||
945 | stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers | 948 | stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers |
946 | stg %r2,__SF_EMPTY(%r15) # save control block pointer | 949 | stg %r2,__SF_EMPTY(%r15) # save control block pointer |
947 | stg %r3,__SF_EMPTY+8(%r15) # save guest register save area | 950 | stg %r3,__SF_EMPTY+8(%r15) # save guest register save area |
948 | xc __SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # host id == 0 | 951 | xc __SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason |
949 | lmg %r0,%r13,0(%r3) # load guest gprs 0-13 | 952 | lmg %r0,%r13,0(%r3) # load guest gprs 0-13 |
950 | # some program checks are suppressing. C code (e.g. do_protection_exception) | ||
951 | # will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other | ||
952 | # instructions in the sie_loop should not cause program interrupts. So | ||
953 | # lets use a nop (47 00 00 00) as a landing pad. | ||
954 | # See also HANDLE_SIE_INTERCEPT | ||
955 | rewind_pad: | ||
956 | nop 0 | ||
957 | sie_loop: | ||
958 | lg %r14,__LC_THREAD_INFO # pointer thread_info struct | ||
959 | tm __TI_flags+7(%r14),_TIF_EXIT_SIE | ||
960 | jnz sie_exit | ||
961 | lg %r14,__LC_GMAP # get gmap pointer | 953 | lg %r14,__LC_GMAP # get gmap pointer |
962 | ltgr %r14,%r14 | 954 | ltgr %r14,%r14 |
963 | jz sie_gmap | 955 | jz sie_gmap |
964 | lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce | 956 | lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce |
965 | sie_gmap: | 957 | sie_gmap: |
966 | lg %r14,__SF_EMPTY(%r15) # get control block pointer | 958 | lg %r14,__SF_EMPTY(%r15) # get control block pointer |
959 | oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now | ||
960 | tm __SIE_PROG20+3(%r14),1 # last exit... | ||
961 | jnz sie_done | ||
967 | LPP __SF_EMPTY(%r15) # set guest id | 962 | LPP __SF_EMPTY(%r15) # set guest id |
968 | sie 0(%r14) | 963 | sie 0(%r14) |
969 | sie_done: | 964 | sie_done: |
970 | LPP __SF_EMPTY+16(%r15) # set host id | 965 | LPP __SF_EMPTY+16(%r15) # set host id |
971 | lg %r14,__LC_THREAD_INFO # pointer thread_info struct | 966 | ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE |
972 | sie_exit: | ||
973 | lctlg %c1,%c1,__LC_USER_ASCE # load primary asce | 967 | lctlg %c1,%c1,__LC_USER_ASCE # load primary asce |
968 | # some program checks are suppressing. C code (e.g. do_protection_exception) | ||
969 | # will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other | ||
970 | # instructions beween sie64a and sie_done should not cause program | ||
971 | # interrupts. So lets use a nop (47 00 00 00) as a landing pad. | ||
972 | # See also HANDLE_SIE_INTERCEPT | ||
973 | rewind_pad: | ||
974 | nop 0 | ||
975 | .globl sie_exit | ||
976 | sie_exit: | ||
974 | lg %r14,__SF_EMPTY+8(%r15) # load guest register save area | 977 | lg %r14,__SF_EMPTY+8(%r15) # load guest register save area |
975 | stmg %r0,%r13,0(%r14) # save guest gprs 0-13 | 978 | stmg %r0,%r13,0(%r14) # save guest gprs 0-13 |
976 | lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers | 979 | lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers |
977 | lghi %r2,0 | 980 | lg %r2,__SF_EMPTY+24(%r15) # return exit reason code |
978 | br %r14 | 981 | br %r14 |
979 | sie_fault: | 982 | sie_fault: |
980 | lctlg %c1,%c1,__LC_USER_ASCE # load primary asce | 983 | lghi %r14,-EFAULT |
981 | lg %r14,__LC_THREAD_INFO # pointer thread_info struct | 984 | stg %r14,__SF_EMPTY+24(%r15) # set exit reason code |
982 | lg %r14,__SF_EMPTY+8(%r15) # load guest register save area | 985 | j sie_exit |
983 | stmg %r0,%r13,0(%r14) # save guest gprs 0-13 | ||
984 | lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers | ||
985 | lghi %r2,-EFAULT | ||
986 | br %r14 | ||
987 | 986 | ||
988 | .align 8 | 987 | .align 8 |
989 | .Lsie_loop: | 988 | .Lsie_critical: |
990 | .quad sie_loop | 989 | .quad sie_gmap |
991 | .Lsie_length: | 990 | .Lsie_critical_length: |
992 | .quad sie_done - sie_loop | 991 | .quad sie_done - sie_gmap |
993 | .Lhost_id: | ||
994 | .quad 0 | ||
995 | 992 | ||
996 | EX_TABLE(rewind_pad,sie_fault) | 993 | EX_TABLE(rewind_pad,sie_fault) |
997 | EX_TABLE(sie_loop,sie_fault) | 994 | EX_TABLE(sie_exit,sie_fault) |
998 | #endif | 995 | #endif |
999 | 996 | ||
1000 | .section .rodata, "a" | 997 | .section .rodata, "a" |
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index f58f37f66824..a6fc037671b1 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c | |||
@@ -13,6 +13,7 @@ | |||
13 | 13 | ||
14 | #include <linux/kernel.h> | 14 | #include <linux/kernel.h> |
15 | #include <linux/perf_event.h> | 15 | #include <linux/perf_event.h> |
16 | #include <linux/kvm_host.h> | ||
16 | #include <linux/percpu.h> | 17 | #include <linux/percpu.h> |
17 | #include <linux/export.h> | 18 | #include <linux/export.h> |
18 | #include <asm/irq.h> | 19 | #include <asm/irq.h> |
@@ -39,6 +40,57 @@ int perf_num_counters(void) | |||
39 | } | 40 | } |
40 | EXPORT_SYMBOL(perf_num_counters); | 41 | EXPORT_SYMBOL(perf_num_counters); |
41 | 42 | ||
43 | static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs) | ||
44 | { | ||
45 | struct stack_frame *stack = (struct stack_frame *) regs->gprs[15]; | ||
46 | |||
47 | if (!stack) | ||
48 | return NULL; | ||
49 | |||
50 | return (struct kvm_s390_sie_block *) stack->empty1[0]; | ||
51 | } | ||
52 | |||
53 | static bool is_in_guest(struct pt_regs *regs) | ||
54 | { | ||
55 | unsigned long ip = instruction_pointer(regs); | ||
56 | |||
57 | if (user_mode(regs)) | ||
58 | return false; | ||
59 | |||
60 | return ip == (unsigned long) &sie_exit; | ||
61 | } | ||
62 | |||
63 | static unsigned long guest_is_user_mode(struct pt_regs *regs) | ||
64 | { | ||
65 | return sie_block(regs)->gpsw.mask & PSW_MASK_PSTATE; | ||
66 | } | ||
67 | |||
68 | static unsigned long instruction_pointer_guest(struct pt_regs *regs) | ||
69 | { | ||
70 | return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN; | ||
71 | } | ||
72 | |||
73 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | ||
74 | { | ||
75 | return is_in_guest(regs) ? instruction_pointer_guest(regs) | ||
76 | : instruction_pointer(regs); | ||
77 | } | ||
78 | |||
79 | static unsigned long perf_misc_guest_flags(struct pt_regs *regs) | ||
80 | { | ||
81 | return guest_is_user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER | ||
82 | : PERF_RECORD_MISC_GUEST_KERNEL; | ||
83 | } | ||
84 | |||
85 | unsigned long perf_misc_flags(struct pt_regs *regs) | ||
86 | { | ||
87 | if (is_in_guest(regs)) | ||
88 | return perf_misc_guest_flags(regs); | ||
89 | |||
90 | return user_mode(regs) ? PERF_RECORD_MISC_USER | ||
91 | : PERF_RECORD_MISC_KERNEL; | ||
92 | } | ||
93 | |||
42 | void perf_event_print_debug(void) | 94 | void perf_event_print_debug(void) |
43 | { | 95 | { |
44 | struct cpumf_ctr_info cf_info; | 96 | struct cpumf_ctr_info cf_info; |
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c index 9bdbcef1da9e..3bac589844a7 100644 --- a/arch/s390/kernel/s390_ksyms.c +++ b/arch/s390/kernel/s390_ksyms.c | |||
@@ -7,6 +7,7 @@ EXPORT_SYMBOL(_mcount); | |||
7 | #endif | 7 | #endif |
8 | #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) | 8 | #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) |
9 | EXPORT_SYMBOL(sie64a); | 9 | EXPORT_SYMBOL(sie64a); |
10 | EXPORT_SYMBOL(sie_exit); | ||
10 | #endif | 11 | #endif |
11 | EXPORT_SYMBOL(memcpy); | 12 | EXPORT_SYMBOL(memcpy); |
12 | EXPORT_SYMBOL(memset); | 13 | EXPORT_SYMBOL(memset); |
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 8fe9d65a4585..40b4c6470f88 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile | |||
@@ -6,7 +6,8 @@ | |||
6 | # it under the terms of the GNU General Public License (version 2 only) | 6 | # it under the terms of the GNU General Public License (version 2 only) |
7 | # as published by the Free Software Foundation. | 7 | # as published by the Free Software Foundation. |
8 | 8 | ||
9 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o) | 9 | KVM := ../../../virt/kvm |
10 | common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o | ||
10 | 11 | ||
11 | ccflags-y := -Ivirt/kvm -Iarch/s390/kvm | 12 | ccflags-y := -Ivirt/kvm -Iarch/s390/kvm |
12 | 13 | ||
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 1c01a9912989..3074475c8ae0 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c | |||
@@ -132,6 +132,9 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) | |||
132 | { | 132 | { |
133 | int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; | 133 | int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; |
134 | 134 | ||
135 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | ||
136 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | ||
137 | |||
135 | trace_kvm_s390_handle_diag(vcpu, code); | 138 | trace_kvm_s390_handle_diag(vcpu, code); |
136 | switch (code) { | 139 | switch (code) { |
137 | case 0x10: | 140 | case 0x10: |
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index b7d1b2edeeb3..5ee56e5acc23 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c | |||
@@ -22,87 +22,6 @@ | |||
22 | #include "trace.h" | 22 | #include "trace.h" |
23 | #include "trace-s390.h" | 23 | #include "trace-s390.h" |
24 | 24 | ||
25 | static int handle_lctlg(struct kvm_vcpu *vcpu) | ||
26 | { | ||
27 | int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; | ||
28 | int reg3 = vcpu->arch.sie_block->ipa & 0x000f; | ||
29 | u64 useraddr; | ||
30 | int reg, rc; | ||
31 | |||
32 | vcpu->stat.instruction_lctlg++; | ||
33 | |||
34 | useraddr = kvm_s390_get_base_disp_rsy(vcpu); | ||
35 | |||
36 | if (useraddr & 7) | ||
37 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
38 | |||
39 | reg = reg1; | ||
40 | |||
41 | VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, | ||
42 | useraddr); | ||
43 | trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr); | ||
44 | |||
45 | do { | ||
46 | rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg], | ||
47 | (u64 __user *) useraddr); | ||
48 | if (rc) | ||
49 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
50 | useraddr += 8; | ||
51 | if (reg == reg3) | ||
52 | break; | ||
53 | reg = (reg + 1) % 16; | ||
54 | } while (1); | ||
55 | return 0; | ||
56 | } | ||
57 | |||
58 | static int handle_lctl(struct kvm_vcpu *vcpu) | ||
59 | { | ||
60 | int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; | ||
61 | int reg3 = vcpu->arch.sie_block->ipa & 0x000f; | ||
62 | u64 useraddr; | ||
63 | u32 val = 0; | ||
64 | int reg, rc; | ||
65 | |||
66 | vcpu->stat.instruction_lctl++; | ||
67 | |||
68 | useraddr = kvm_s390_get_base_disp_rs(vcpu); | ||
69 | |||
70 | if (useraddr & 3) | ||
71 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
72 | |||
73 | VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, | ||
74 | useraddr); | ||
75 | trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr); | ||
76 | |||
77 | reg = reg1; | ||
78 | do { | ||
79 | rc = get_guest(vcpu, val, (u32 __user *) useraddr); | ||
80 | if (rc) | ||
81 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
82 | vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; | ||
83 | vcpu->arch.sie_block->gcr[reg] |= val; | ||
84 | useraddr += 4; | ||
85 | if (reg == reg3) | ||
86 | break; | ||
87 | reg = (reg + 1) % 16; | ||
88 | } while (1); | ||
89 | return 0; | ||
90 | } | ||
91 | |||
92 | static const intercept_handler_t eb_handlers[256] = { | ||
93 | [0x2f] = handle_lctlg, | ||
94 | [0x8a] = kvm_s390_handle_priv_eb, | ||
95 | }; | ||
96 | |||
97 | static int handle_eb(struct kvm_vcpu *vcpu) | ||
98 | { | ||
99 | intercept_handler_t handler; | ||
100 | |||
101 | handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff]; | ||
102 | if (handler) | ||
103 | return handler(vcpu); | ||
104 | return -EOPNOTSUPP; | ||
105 | } | ||
106 | 25 | ||
107 | static const intercept_handler_t instruction_handlers[256] = { | 26 | static const intercept_handler_t instruction_handlers[256] = { |
108 | [0x01] = kvm_s390_handle_01, | 27 | [0x01] = kvm_s390_handle_01, |
@@ -110,10 +29,10 @@ static const intercept_handler_t instruction_handlers[256] = { | |||
110 | [0x83] = kvm_s390_handle_diag, | 29 | [0x83] = kvm_s390_handle_diag, |
111 | [0xae] = kvm_s390_handle_sigp, | 30 | [0xae] = kvm_s390_handle_sigp, |
112 | [0xb2] = kvm_s390_handle_b2, | 31 | [0xb2] = kvm_s390_handle_b2, |
113 | [0xb7] = handle_lctl, | 32 | [0xb7] = kvm_s390_handle_lctl, |
114 | [0xb9] = kvm_s390_handle_b9, | 33 | [0xb9] = kvm_s390_handle_b9, |
115 | [0xe5] = kvm_s390_handle_e5, | 34 | [0xe5] = kvm_s390_handle_e5, |
116 | [0xeb] = handle_eb, | 35 | [0xeb] = kvm_s390_handle_eb, |
117 | }; | 36 | }; |
118 | 37 | ||
119 | static int handle_noop(struct kvm_vcpu *vcpu) | 38 | static int handle_noop(struct kvm_vcpu *vcpu) |
@@ -174,47 +93,12 @@ static int handle_stop(struct kvm_vcpu *vcpu) | |||
174 | 93 | ||
175 | static int handle_validity(struct kvm_vcpu *vcpu) | 94 | static int handle_validity(struct kvm_vcpu *vcpu) |
176 | { | 95 | { |
177 | unsigned long vmaddr; | ||
178 | int viwhy = vcpu->arch.sie_block->ipb >> 16; | 96 | int viwhy = vcpu->arch.sie_block->ipb >> 16; |
179 | int rc; | ||
180 | 97 | ||
181 | vcpu->stat.exit_validity++; | 98 | vcpu->stat.exit_validity++; |
182 | trace_kvm_s390_intercept_validity(vcpu, viwhy); | 99 | trace_kvm_s390_intercept_validity(vcpu, viwhy); |
183 | if (viwhy == 0x37) { | 100 | WARN_ONCE(true, "kvm: unhandled validity intercept 0x%x\n", viwhy); |
184 | vmaddr = gmap_fault(vcpu->arch.sie_block->prefix, | 101 | return -EOPNOTSUPP; |
185 | vcpu->arch.gmap); | ||
186 | if (IS_ERR_VALUE(vmaddr)) { | ||
187 | rc = -EOPNOTSUPP; | ||
188 | goto out; | ||
189 | } | ||
190 | rc = fault_in_pages_writeable((char __user *) vmaddr, | ||
191 | PAGE_SIZE); | ||
192 | if (rc) { | ||
193 | /* user will receive sigsegv, exit to user */ | ||
194 | rc = -EOPNOTSUPP; | ||
195 | goto out; | ||
196 | } | ||
197 | vmaddr = gmap_fault(vcpu->arch.sie_block->prefix + PAGE_SIZE, | ||
198 | vcpu->arch.gmap); | ||
199 | if (IS_ERR_VALUE(vmaddr)) { | ||
200 | rc = -EOPNOTSUPP; | ||
201 | goto out; | ||
202 | } | ||
203 | rc = fault_in_pages_writeable((char __user *) vmaddr, | ||
204 | PAGE_SIZE); | ||
205 | if (rc) { | ||
206 | /* user will receive sigsegv, exit to user */ | ||
207 | rc = -EOPNOTSUPP; | ||
208 | goto out; | ||
209 | } | ||
210 | } else | ||
211 | rc = -EOPNOTSUPP; | ||
212 | |||
213 | out: | ||
214 | if (rc) | ||
215 | VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d", | ||
216 | viwhy); | ||
217 | return rc; | ||
218 | } | 102 | } |
219 | 103 | ||
220 | static int handle_instruction(struct kvm_vcpu *vcpu) | 104 | static int handle_instruction(struct kvm_vcpu *vcpu) |
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 5c948177529e..7f35cb33e510 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c | |||
@@ -438,7 +438,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) | |||
438 | no_timer: | 438 | no_timer: |
439 | spin_lock(&vcpu->arch.local_int.float_int->lock); | 439 | spin_lock(&vcpu->arch.local_int.float_int->lock); |
440 | spin_lock_bh(&vcpu->arch.local_int.lock); | 440 | spin_lock_bh(&vcpu->arch.local_int.lock); |
441 | add_wait_queue(&vcpu->arch.local_int.wq, &wait); | 441 | add_wait_queue(&vcpu->wq, &wait); |
442 | while (list_empty(&vcpu->arch.local_int.list) && | 442 | while (list_empty(&vcpu->arch.local_int.list) && |
443 | list_empty(&vcpu->arch.local_int.float_int->list) && | 443 | list_empty(&vcpu->arch.local_int.float_int->list) && |
444 | (!vcpu->arch.local_int.timer_due) && | 444 | (!vcpu->arch.local_int.timer_due) && |
@@ -452,7 +452,7 @@ no_timer: | |||
452 | } | 452 | } |
453 | __unset_cpu_idle(vcpu); | 453 | __unset_cpu_idle(vcpu); |
454 | __set_current_state(TASK_RUNNING); | 454 | __set_current_state(TASK_RUNNING); |
455 | remove_wait_queue(&vcpu->arch.local_int.wq, &wait); | 455 | remove_wait_queue(&vcpu->wq, &wait); |
456 | spin_unlock_bh(&vcpu->arch.local_int.lock); | 456 | spin_unlock_bh(&vcpu->arch.local_int.lock); |
457 | spin_unlock(&vcpu->arch.local_int.float_int->lock); | 457 | spin_unlock(&vcpu->arch.local_int.float_int->lock); |
458 | hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); | 458 | hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); |
@@ -465,8 +465,8 @@ void kvm_s390_tasklet(unsigned long parm) | |||
465 | 465 | ||
466 | spin_lock(&vcpu->arch.local_int.lock); | 466 | spin_lock(&vcpu->arch.local_int.lock); |
467 | vcpu->arch.local_int.timer_due = 1; | 467 | vcpu->arch.local_int.timer_due = 1; |
468 | if (waitqueue_active(&vcpu->arch.local_int.wq)) | 468 | if (waitqueue_active(&vcpu->wq)) |
469 | wake_up_interruptible(&vcpu->arch.local_int.wq); | 469 | wake_up_interruptible(&vcpu->wq); |
470 | spin_unlock(&vcpu->arch.local_int.lock); | 470 | spin_unlock(&vcpu->arch.local_int.lock); |
471 | } | 471 | } |
472 | 472 | ||
@@ -613,7 +613,7 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) | |||
613 | spin_lock_bh(&li->lock); | 613 | spin_lock_bh(&li->lock); |
614 | list_add(&inti->list, &li->list); | 614 | list_add(&inti->list, &li->list); |
615 | atomic_set(&li->active, 1); | 615 | atomic_set(&li->active, 1); |
616 | BUG_ON(waitqueue_active(&li->wq)); | 616 | BUG_ON(waitqueue_active(li->wq)); |
617 | spin_unlock_bh(&li->lock); | 617 | spin_unlock_bh(&li->lock); |
618 | return 0; | 618 | return 0; |
619 | } | 619 | } |
@@ -746,8 +746,8 @@ int kvm_s390_inject_vm(struct kvm *kvm, | |||
746 | li = fi->local_int[sigcpu]; | 746 | li = fi->local_int[sigcpu]; |
747 | spin_lock_bh(&li->lock); | 747 | spin_lock_bh(&li->lock); |
748 | atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); | 748 | atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); |
749 | if (waitqueue_active(&li->wq)) | 749 | if (waitqueue_active(li->wq)) |
750 | wake_up_interruptible(&li->wq); | 750 | wake_up_interruptible(li->wq); |
751 | spin_unlock_bh(&li->lock); | 751 | spin_unlock_bh(&li->lock); |
752 | spin_unlock(&fi->lock); | 752 | spin_unlock(&fi->lock); |
753 | mutex_unlock(&kvm->lock); | 753 | mutex_unlock(&kvm->lock); |
@@ -832,8 +832,8 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, | |||
832 | if (inti->type == KVM_S390_SIGP_STOP) | 832 | if (inti->type == KVM_S390_SIGP_STOP) |
833 | li->action_bits |= ACTION_STOP_ON_STOP; | 833 | li->action_bits |= ACTION_STOP_ON_STOP; |
834 | atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); | 834 | atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); |
835 | if (waitqueue_active(&li->wq)) | 835 | if (waitqueue_active(&vcpu->wq)) |
836 | wake_up_interruptible(&vcpu->arch.local_int.wq); | 836 | wake_up_interruptible(&vcpu->wq); |
837 | spin_unlock_bh(&li->lock); | 837 | spin_unlock_bh(&li->lock); |
838 | mutex_unlock(&vcpu->kvm->lock); | 838 | mutex_unlock(&vcpu->kvm->lock); |
839 | return 0; | 839 | return 0; |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c1c7c683fa26..ba694d2ba51e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
@@ -59,6 +59,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
59 | { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, | 59 | { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, |
60 | { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, | 60 | { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, |
61 | { "exit_wait_state", VCPU_STAT(exit_wait_state) }, | 61 | { "exit_wait_state", VCPU_STAT(exit_wait_state) }, |
62 | { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, | ||
62 | { "instruction_stidp", VCPU_STAT(instruction_stidp) }, | 63 | { "instruction_stidp", VCPU_STAT(instruction_stidp) }, |
63 | { "instruction_spx", VCPU_STAT(instruction_spx) }, | 64 | { "instruction_spx", VCPU_STAT(instruction_spx) }, |
64 | { "instruction_stpx", VCPU_STAT(instruction_stpx) }, | 65 | { "instruction_stpx", VCPU_STAT(instruction_stpx) }, |
@@ -84,6 +85,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
84 | }; | 85 | }; |
85 | 86 | ||
86 | static unsigned long long *facilities; | 87 | static unsigned long long *facilities; |
88 | static struct gmap_notifier gmap_notifier; | ||
87 | 89 | ||
88 | /* Section: not file related */ | 90 | /* Section: not file related */ |
89 | int kvm_arch_hardware_enable(void *garbage) | 91 | int kvm_arch_hardware_enable(void *garbage) |
@@ -96,13 +98,18 @@ void kvm_arch_hardware_disable(void *garbage) | |||
96 | { | 98 | { |
97 | } | 99 | } |
98 | 100 | ||
101 | static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address); | ||
102 | |||
99 | int kvm_arch_hardware_setup(void) | 103 | int kvm_arch_hardware_setup(void) |
100 | { | 104 | { |
105 | gmap_notifier.notifier_call = kvm_gmap_notifier; | ||
106 | gmap_register_ipte_notifier(&gmap_notifier); | ||
101 | return 0; | 107 | return 0; |
102 | } | 108 | } |
103 | 109 | ||
104 | void kvm_arch_hardware_unsetup(void) | 110 | void kvm_arch_hardware_unsetup(void) |
105 | { | 111 | { |
112 | gmap_unregister_ipte_notifier(&gmap_notifier); | ||
106 | } | 113 | } |
107 | 114 | ||
108 | void kvm_arch_check_processor_compat(void *rtn) | 115 | void kvm_arch_check_processor_compat(void *rtn) |
@@ -239,6 +246,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
239 | kvm->arch.gmap = gmap_alloc(current->mm); | 246 | kvm->arch.gmap = gmap_alloc(current->mm); |
240 | if (!kvm->arch.gmap) | 247 | if (!kvm->arch.gmap) |
241 | goto out_nogmap; | 248 | goto out_nogmap; |
249 | kvm->arch.gmap->private = kvm; | ||
242 | } | 250 | } |
243 | 251 | ||
244 | kvm->arch.css_support = 0; | 252 | kvm->arch.css_support = 0; |
@@ -270,7 +278,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | |||
270 | 278 | ||
271 | free_page((unsigned long)(vcpu->arch.sie_block)); | 279 | free_page((unsigned long)(vcpu->arch.sie_block)); |
272 | kvm_vcpu_uninit(vcpu); | 280 | kvm_vcpu_uninit(vcpu); |
273 | kfree(vcpu); | 281 | kmem_cache_free(kvm_vcpu_cache, vcpu); |
274 | } | 282 | } |
275 | 283 | ||
276 | static void kvm_free_vcpus(struct kvm *kvm) | 284 | static void kvm_free_vcpus(struct kvm *kvm) |
@@ -309,6 +317,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
309 | vcpu->arch.gmap = gmap_alloc(current->mm); | 317 | vcpu->arch.gmap = gmap_alloc(current->mm); |
310 | if (!vcpu->arch.gmap) | 318 | if (!vcpu->arch.gmap) |
311 | return -ENOMEM; | 319 | return -ENOMEM; |
320 | vcpu->arch.gmap->private = vcpu->kvm; | ||
312 | return 0; | 321 | return 0; |
313 | } | 322 | } |
314 | 323 | ||
@@ -373,8 +382,10 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
373 | { | 382 | { |
374 | atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | | 383 | atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | |
375 | CPUSTAT_SM | | 384 | CPUSTAT_SM | |
376 | CPUSTAT_STOPPED); | 385 | CPUSTAT_STOPPED | |
386 | CPUSTAT_GED); | ||
377 | vcpu->arch.sie_block->ecb = 6; | 387 | vcpu->arch.sie_block->ecb = 6; |
388 | vcpu->arch.sie_block->ecb2 = 8; | ||
378 | vcpu->arch.sie_block->eca = 0xC1002001U; | 389 | vcpu->arch.sie_block->eca = 0xC1002001U; |
379 | vcpu->arch.sie_block->fac = (int) (long) facilities; | 390 | vcpu->arch.sie_block->fac = (int) (long) facilities; |
380 | hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); | 391 | hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); |
@@ -397,7 +408,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, | |||
397 | 408 | ||
398 | rc = -ENOMEM; | 409 | rc = -ENOMEM; |
399 | 410 | ||
400 | vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); | 411 | vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); |
401 | if (!vcpu) | 412 | if (!vcpu) |
402 | goto out; | 413 | goto out; |
403 | 414 | ||
@@ -427,7 +438,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, | |||
427 | vcpu->arch.local_int.float_int = &kvm->arch.float_int; | 438 | vcpu->arch.local_int.float_int = &kvm->arch.float_int; |
428 | spin_lock(&kvm->arch.float_int.lock); | 439 | spin_lock(&kvm->arch.float_int.lock); |
429 | kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int; | 440 | kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int; |
430 | init_waitqueue_head(&vcpu->arch.local_int.wq); | 441 | vcpu->arch.local_int.wq = &vcpu->wq; |
431 | vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; | 442 | vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; |
432 | spin_unlock(&kvm->arch.float_int.lock); | 443 | spin_unlock(&kvm->arch.float_int.lock); |
433 | 444 | ||
@@ -442,7 +453,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, | |||
442 | out_free_sie_block: | 453 | out_free_sie_block: |
443 | free_page((unsigned long)(vcpu->arch.sie_block)); | 454 | free_page((unsigned long)(vcpu->arch.sie_block)); |
444 | out_free_cpu: | 455 | out_free_cpu: |
445 | kfree(vcpu); | 456 | kmem_cache_free(kvm_vcpu_cache, vcpu); |
446 | out: | 457 | out: |
447 | return ERR_PTR(rc); | 458 | return ERR_PTR(rc); |
448 | } | 459 | } |
@@ -454,6 +465,50 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
454 | return 0; | 465 | return 0; |
455 | } | 466 | } |
456 | 467 | ||
468 | void s390_vcpu_block(struct kvm_vcpu *vcpu) | ||
469 | { | ||
470 | atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); | ||
471 | } | ||
472 | |||
473 | void s390_vcpu_unblock(struct kvm_vcpu *vcpu) | ||
474 | { | ||
475 | atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); | ||
476 | } | ||
477 | |||
478 | /* | ||
479 | * Kick a guest cpu out of SIE and wait until SIE is not running. | ||
480 | * If the CPU is not running (e.g. waiting as idle) the function will | ||
481 | * return immediately. */ | ||
482 | void exit_sie(struct kvm_vcpu *vcpu) | ||
483 | { | ||
484 | atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags); | ||
485 | while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) | ||
486 | cpu_relax(); | ||
487 | } | ||
488 | |||
489 | /* Kick a guest cpu out of SIE and prevent SIE-reentry */ | ||
490 | void exit_sie_sync(struct kvm_vcpu *vcpu) | ||
491 | { | ||
492 | s390_vcpu_block(vcpu); | ||
493 | exit_sie(vcpu); | ||
494 | } | ||
495 | |||
496 | static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address) | ||
497 | { | ||
498 | int i; | ||
499 | struct kvm *kvm = gmap->private; | ||
500 | struct kvm_vcpu *vcpu; | ||
501 | |||
502 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
503 | /* match against both prefix pages */ | ||
504 | if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) { | ||
505 | VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address); | ||
506 | kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); | ||
507 | exit_sie_sync(vcpu); | ||
508 | } | ||
509 | } | ||
510 | } | ||
511 | |||
457 | int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) | 512 | int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) |
458 | { | 513 | { |
459 | /* kvm common code refers to this, but never calls it */ | 514 | /* kvm common code refers to this, but never calls it */ |
@@ -606,6 +661,27 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | |||
606 | return -EINVAL; /* not implemented yet */ | 661 | return -EINVAL; /* not implemented yet */ |
607 | } | 662 | } |
608 | 663 | ||
664 | static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) | ||
665 | { | ||
666 | /* | ||
667 | * We use MMU_RELOAD just to re-arm the ipte notifier for the | ||
668 | * guest prefix page. gmap_ipte_notify will wait on the ptl lock. | ||
669 | * This ensures that the ipte instruction for this request has | ||
670 | * already finished. We might race against a second unmapper that | ||
671 | * wants to set the blocking bit. Lets just retry the request loop. | ||
672 | */ | ||
673 | while (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { | ||
674 | int rc; | ||
675 | rc = gmap_ipte_notify(vcpu->arch.gmap, | ||
676 | vcpu->arch.sie_block->prefix, | ||
677 | PAGE_SIZE * 2); | ||
678 | if (rc) | ||
679 | return rc; | ||
680 | s390_vcpu_unblock(vcpu); | ||
681 | } | ||
682 | return 0; | ||
683 | } | ||
684 | |||
609 | static int __vcpu_run(struct kvm_vcpu *vcpu) | 685 | static int __vcpu_run(struct kvm_vcpu *vcpu) |
610 | { | 686 | { |
611 | int rc; | 687 | int rc; |
@@ -621,6 +697,10 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
621 | if (!kvm_is_ucontrol(vcpu->kvm)) | 697 | if (!kvm_is_ucontrol(vcpu->kvm)) |
622 | kvm_s390_deliver_pending_interrupts(vcpu); | 698 | kvm_s390_deliver_pending_interrupts(vcpu); |
623 | 699 | ||
700 | rc = kvm_s390_handle_requests(vcpu); | ||
701 | if (rc) | ||
702 | return rc; | ||
703 | |||
624 | vcpu->arch.sie_block->icptcode = 0; | 704 | vcpu->arch.sie_block->icptcode = 0; |
625 | preempt_disable(); | 705 | preempt_disable(); |
626 | kvm_guest_enter(); | 706 | kvm_guest_enter(); |
@@ -630,7 +710,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
630 | trace_kvm_s390_sie_enter(vcpu, | 710 | trace_kvm_s390_sie_enter(vcpu, |
631 | atomic_read(&vcpu->arch.sie_block->cpuflags)); | 711 | atomic_read(&vcpu->arch.sie_block->cpuflags)); |
632 | rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); | 712 | rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); |
633 | if (rc) { | 713 | if (rc > 0) |
714 | rc = 0; | ||
715 | if (rc < 0) { | ||
634 | if (kvm_is_ucontrol(vcpu->kvm)) { | 716 | if (kvm_is_ucontrol(vcpu->kvm)) { |
635 | rc = SIE_INTERCEPT_UCONTROL; | 717 | rc = SIE_INTERCEPT_UCONTROL; |
636 | } else { | 718 | } else { |
@@ -1046,7 +1128,7 @@ static int __init kvm_s390_init(void) | |||
1046 | return -ENOMEM; | 1128 | return -ENOMEM; |
1047 | } | 1129 | } |
1048 | memcpy(facilities, S390_lowcore.stfle_fac_list, 16); | 1130 | memcpy(facilities, S390_lowcore.stfle_fac_list, 16); |
1049 | facilities[0] &= 0xff00fff3f47c0000ULL; | 1131 | facilities[0] &= 0xff82fff3f47c0000ULL; |
1050 | facilities[1] &= 0x001c000000000000ULL; | 1132 | facilities[1] &= 0x001c000000000000ULL; |
1051 | return 0; | 1133 | return 0; |
1052 | } | 1134 | } |
@@ -1059,3 +1141,12 @@ static void __exit kvm_s390_exit(void) | |||
1059 | 1141 | ||
1060 | module_init(kvm_s390_init); | 1142 | module_init(kvm_s390_init); |
1061 | module_exit(kvm_s390_exit); | 1143 | module_exit(kvm_s390_exit); |
1144 | |||
1145 | /* | ||
1146 | * Enable autoloading of the kvm module. | ||
1147 | * Note that we add the module alias here instead of virt/kvm/kvm_main.c | ||
1148 | * since x86 takes a different approach. | ||
1149 | */ | ||
1150 | #include <linux/miscdevice.h> | ||
1151 | MODULE_ALIAS_MISCDEV(KVM_MINOR); | ||
1152 | MODULE_ALIAS("devname:kvm"); | ||
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index efc14f687265..028ca9fd2158 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
@@ -63,6 +63,7 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) | |||
63 | { | 63 | { |
64 | vcpu->arch.sie_block->prefix = prefix & 0x7fffe000u; | 64 | vcpu->arch.sie_block->prefix = prefix & 0x7fffe000u; |
65 | vcpu->arch.sie_block->ihcpu = 0xffff; | 65 | vcpu->arch.sie_block->ihcpu = 0xffff; |
66 | kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); | ||
66 | } | 67 | } |
67 | 68 | ||
68 | static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu) | 69 | static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu) |
@@ -85,6 +86,12 @@ static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, | |||
85 | *address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; | 86 | *address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; |
86 | } | 87 | } |
87 | 88 | ||
89 | static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2) | ||
90 | { | ||
91 | *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20; | ||
92 | *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; | ||
93 | } | ||
94 | |||
88 | static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu) | 95 | static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu) |
89 | { | 96 | { |
90 | u32 base2 = vcpu->arch.sie_block->ipb >> 28; | 97 | u32 base2 = vcpu->arch.sie_block->ipb >> 28; |
@@ -125,7 +132,8 @@ int kvm_s390_handle_e5(struct kvm_vcpu *vcpu); | |||
125 | int kvm_s390_handle_01(struct kvm_vcpu *vcpu); | 132 | int kvm_s390_handle_01(struct kvm_vcpu *vcpu); |
126 | int kvm_s390_handle_b9(struct kvm_vcpu *vcpu); | 133 | int kvm_s390_handle_b9(struct kvm_vcpu *vcpu); |
127 | int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu); | 134 | int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu); |
128 | int kvm_s390_handle_priv_eb(struct kvm_vcpu *vcpu); | 135 | int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu); |
136 | int kvm_s390_handle_eb(struct kvm_vcpu *vcpu); | ||
129 | 137 | ||
130 | /* implemented in sigp.c */ | 138 | /* implemented in sigp.c */ |
131 | int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); | 139 | int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); |
@@ -133,6 +141,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); | |||
133 | /* implemented in kvm-s390.c */ | 141 | /* implemented in kvm-s390.c */ |
134 | int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, | 142 | int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, |
135 | unsigned long addr); | 143 | unsigned long addr); |
144 | void s390_vcpu_block(struct kvm_vcpu *vcpu); | ||
145 | void s390_vcpu_unblock(struct kvm_vcpu *vcpu); | ||
146 | void exit_sie(struct kvm_vcpu *vcpu); | ||
147 | void exit_sie_sync(struct kvm_vcpu *vcpu); | ||
136 | /* implemented in diag.c */ | 148 | /* implemented in diag.c */ |
137 | int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); | 149 | int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); |
138 | 150 | ||
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 6bbd7b5a0bbe..0da3e6eb6be6 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * handling privileged instructions | 2 | * handling privileged instructions |
3 | * | 3 | * |
4 | * Copyright IBM Corp. 2008 | 4 | * Copyright IBM Corp. 2008, 2013 |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License (version 2 only) | 7 | * it under the terms of the GNU General Public License (version 2 only) |
@@ -20,6 +20,9 @@ | |||
20 | #include <asm/debug.h> | 20 | #include <asm/debug.h> |
21 | #include <asm/ebcdic.h> | 21 | #include <asm/ebcdic.h> |
22 | #include <asm/sysinfo.h> | 22 | #include <asm/sysinfo.h> |
23 | #include <asm/pgtable.h> | ||
24 | #include <asm/pgalloc.h> | ||
25 | #include <asm/io.h> | ||
23 | #include <asm/ptrace.h> | 26 | #include <asm/ptrace.h> |
24 | #include <asm/compat.h> | 27 | #include <asm/compat.h> |
25 | #include "gaccess.h" | 28 | #include "gaccess.h" |
@@ -34,6 +37,9 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) | |||
34 | 37 | ||
35 | vcpu->stat.instruction_spx++; | 38 | vcpu->stat.instruction_spx++; |
36 | 39 | ||
40 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | ||
41 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | ||
42 | |||
37 | operand2 = kvm_s390_get_base_disp_s(vcpu); | 43 | operand2 = kvm_s390_get_base_disp_s(vcpu); |
38 | 44 | ||
39 | /* must be word boundary */ | 45 | /* must be word boundary */ |
@@ -65,6 +71,9 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) | |||
65 | 71 | ||
66 | vcpu->stat.instruction_stpx++; | 72 | vcpu->stat.instruction_stpx++; |
67 | 73 | ||
74 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | ||
75 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | ||
76 | |||
68 | operand2 = kvm_s390_get_base_disp_s(vcpu); | 77 | operand2 = kvm_s390_get_base_disp_s(vcpu); |
69 | 78 | ||
70 | /* must be word boundary */ | 79 | /* must be word boundary */ |
@@ -89,6 +98,9 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) | |||
89 | 98 | ||
90 | vcpu->stat.instruction_stap++; | 99 | vcpu->stat.instruction_stap++; |
91 | 100 | ||
101 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | ||
102 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | ||
103 | |||
92 | useraddr = kvm_s390_get_base_disp_s(vcpu); | 104 | useraddr = kvm_s390_get_base_disp_s(vcpu); |
93 | 105 | ||
94 | if (useraddr & 1) | 106 | if (useraddr & 1) |
@@ -105,7 +117,12 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) | |||
105 | static int handle_skey(struct kvm_vcpu *vcpu) | 117 | static int handle_skey(struct kvm_vcpu *vcpu) |
106 | { | 118 | { |
107 | vcpu->stat.instruction_storage_key++; | 119 | vcpu->stat.instruction_storage_key++; |
108 | vcpu->arch.sie_block->gpsw.addr -= 4; | 120 | |
121 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | ||
122 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | ||
123 | |||
124 | vcpu->arch.sie_block->gpsw.addr = | ||
125 | __rewind_psw(vcpu->arch.sie_block->gpsw, 4); | ||
109 | VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); | 126 | VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); |
110 | return 0; | 127 | return 0; |
111 | } | 128 | } |
@@ -129,9 +146,10 @@ static int handle_tpi(struct kvm_vcpu *vcpu) | |||
129 | * Store the two-word I/O interruption code into the | 146 | * Store the two-word I/O interruption code into the |
130 | * provided area. | 147 | * provided area. |
131 | */ | 148 | */ |
132 | put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) addr); | 149 | if (put_guest(vcpu, inti->io.subchannel_id, (u16 __user *)addr) |
133 | put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) (addr + 2)); | 150 | || put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *)(addr + 2)) |
134 | put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) (addr + 4)); | 151 | || put_guest(vcpu, inti->io.io_int_parm, (u32 __user *)(addr + 4))) |
152 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
135 | } else { | 153 | } else { |
136 | /* | 154 | /* |
137 | * Store the three-word I/O interruption code into | 155 | * Store the three-word I/O interruption code into |
@@ -182,6 +200,9 @@ static int handle_io_inst(struct kvm_vcpu *vcpu) | |||
182 | { | 200 | { |
183 | VCPU_EVENT(vcpu, 4, "%s", "I/O instruction"); | 201 | VCPU_EVENT(vcpu, 4, "%s", "I/O instruction"); |
184 | 202 | ||
203 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | ||
204 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | ||
205 | |||
185 | if (vcpu->kvm->arch.css_support) { | 206 | if (vcpu->kvm->arch.css_support) { |
186 | /* | 207 | /* |
187 | * Most I/O instructions will be handled by userspace. | 208 | * Most I/O instructions will be handled by userspace. |
@@ -210,8 +231,12 @@ static int handle_stfl(struct kvm_vcpu *vcpu) | |||
210 | int rc; | 231 | int rc; |
211 | 232 | ||
212 | vcpu->stat.instruction_stfl++; | 233 | vcpu->stat.instruction_stfl++; |
234 | |||
235 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | ||
236 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | ||
237 | |||
213 | /* only pass the facility bits, which we can handle */ | 238 | /* only pass the facility bits, which we can handle */ |
214 | facility_list = S390_lowcore.stfl_fac_list & 0xff00fff3; | 239 | facility_list = S390_lowcore.stfl_fac_list & 0xff82fff3; |
215 | 240 | ||
216 | rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), | 241 | rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), |
217 | &facility_list, sizeof(facility_list)); | 242 | &facility_list, sizeof(facility_list)); |
@@ -255,8 +280,8 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) | |||
255 | u64 addr; | 280 | u64 addr; |
256 | 281 | ||
257 | if (gpsw->mask & PSW_MASK_PSTATE) | 282 | if (gpsw->mask & PSW_MASK_PSTATE) |
258 | return kvm_s390_inject_program_int(vcpu, | 283 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); |
259 | PGM_PRIVILEGED_OPERATION); | 284 | |
260 | addr = kvm_s390_get_base_disp_s(vcpu); | 285 | addr = kvm_s390_get_base_disp_s(vcpu); |
261 | if (addr & 7) | 286 | if (addr & 7) |
262 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 287 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
@@ -278,6 +303,9 @@ static int handle_lpswe(struct kvm_vcpu *vcpu) | |||
278 | psw_t new_psw; | 303 | psw_t new_psw; |
279 | u64 addr; | 304 | u64 addr; |
280 | 305 | ||
306 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | ||
307 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | ||
308 | |||
281 | addr = kvm_s390_get_base_disp_s(vcpu); | 309 | addr = kvm_s390_get_base_disp_s(vcpu); |
282 | if (addr & 7) | 310 | if (addr & 7) |
283 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 311 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
@@ -296,6 +324,9 @@ static int handle_stidp(struct kvm_vcpu *vcpu) | |||
296 | 324 | ||
297 | vcpu->stat.instruction_stidp++; | 325 | vcpu->stat.instruction_stidp++; |
298 | 326 | ||
327 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | ||
328 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | ||
329 | |||
299 | operand2 = kvm_s390_get_base_disp_s(vcpu); | 330 | operand2 = kvm_s390_get_base_disp_s(vcpu); |
300 | 331 | ||
301 | if (operand2 & 7) | 332 | if (operand2 & 7) |
@@ -351,16 +382,30 @@ static int handle_stsi(struct kvm_vcpu *vcpu) | |||
351 | vcpu->stat.instruction_stsi++; | 382 | vcpu->stat.instruction_stsi++; |
352 | VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); | 383 | VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); |
353 | 384 | ||
354 | operand2 = kvm_s390_get_base_disp_s(vcpu); | 385 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) |
386 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | ||
387 | |||
388 | if (fc > 3) { | ||
389 | vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; /* cc 3 */ | ||
390 | return 0; | ||
391 | } | ||
355 | 392 | ||
356 | if (operand2 & 0xfff && fc > 0) | 393 | if (vcpu->run->s.regs.gprs[0] & 0x0fffff00 |
394 | || vcpu->run->s.regs.gprs[1] & 0xffff0000) | ||
357 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 395 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
358 | 396 | ||
359 | switch (fc) { | 397 | if (fc == 0) { |
360 | case 0: | ||
361 | vcpu->run->s.regs.gprs[0] = 3 << 28; | 398 | vcpu->run->s.regs.gprs[0] = 3 << 28; |
362 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); | 399 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); /* cc 0 */ |
363 | return 0; | 400 | return 0; |
401 | } | ||
402 | |||
403 | operand2 = kvm_s390_get_base_disp_s(vcpu); | ||
404 | |||
405 | if (operand2 & 0xfff) | ||
406 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
407 | |||
408 | switch (fc) { | ||
364 | case 1: /* same handling for 1 and 2 */ | 409 | case 1: /* same handling for 1 and 2 */ |
365 | case 2: | 410 | case 2: |
366 | mem = get_zeroed_page(GFP_KERNEL); | 411 | mem = get_zeroed_page(GFP_KERNEL); |
@@ -377,8 +422,6 @@ static int handle_stsi(struct kvm_vcpu *vcpu) | |||
377 | goto out_no_data; | 422 | goto out_no_data; |
378 | handle_stsi_3_2_2(vcpu, (void *) mem); | 423 | handle_stsi_3_2_2(vcpu, (void *) mem); |
379 | break; | 424 | break; |
380 | default: | ||
381 | goto out_no_data; | ||
382 | } | 425 | } |
383 | 426 | ||
384 | if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { | 427 | if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { |
@@ -432,20 +475,14 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) | |||
432 | intercept_handler_t handler; | 475 | intercept_handler_t handler; |
433 | 476 | ||
434 | /* | 477 | /* |
435 | * a lot of B2 instructions are priviledged. We first check for | 478 | * A lot of B2 instructions are priviledged. Here we check for |
436 | * the privileged ones, that we can handle in the kernel. If the | 479 | * the privileged ones, that we can handle in the kernel. |
437 | * kernel can handle this instruction, we check for the problem | 480 | * Anything else goes to userspace. |
438 | * state bit and (a) handle the instruction or (b) send a code 2 | 481 | */ |
439 | * program check. | ||
440 | * Anything else goes to userspace.*/ | ||
441 | handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; | 482 | handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; |
442 | if (handler) { | 483 | if (handler) |
443 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | 484 | return handler(vcpu); |
444 | return kvm_s390_inject_program_int(vcpu, | 485 | |
445 | PGM_PRIVILEGED_OPERATION); | ||
446 | else | ||
447 | return handler(vcpu); | ||
448 | } | ||
449 | return -EOPNOTSUPP; | 486 | return -EOPNOTSUPP; |
450 | } | 487 | } |
451 | 488 | ||
@@ -453,8 +490,7 @@ static int handle_epsw(struct kvm_vcpu *vcpu) | |||
453 | { | 490 | { |
454 | int reg1, reg2; | 491 | int reg1, reg2; |
455 | 492 | ||
456 | reg1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 24; | 493 | kvm_s390_get_regs_rre(vcpu, ®1, ®2); |
457 | reg2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; | ||
458 | 494 | ||
459 | /* This basically extracts the mask half of the psw. */ | 495 | /* This basically extracts the mask half of the psw. */ |
460 | vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000; | 496 | vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000; |
@@ -467,9 +503,88 @@ static int handle_epsw(struct kvm_vcpu *vcpu) | |||
467 | return 0; | 503 | return 0; |
468 | } | 504 | } |
469 | 505 | ||
506 | #define PFMF_RESERVED 0xfffc0101UL | ||
507 | #define PFMF_SK 0x00020000UL | ||
508 | #define PFMF_CF 0x00010000UL | ||
509 | #define PFMF_UI 0x00008000UL | ||
510 | #define PFMF_FSC 0x00007000UL | ||
511 | #define PFMF_NQ 0x00000800UL | ||
512 | #define PFMF_MR 0x00000400UL | ||
513 | #define PFMF_MC 0x00000200UL | ||
514 | #define PFMF_KEY 0x000000feUL | ||
515 | |||
516 | static int handle_pfmf(struct kvm_vcpu *vcpu) | ||
517 | { | ||
518 | int reg1, reg2; | ||
519 | unsigned long start, end; | ||
520 | |||
521 | vcpu->stat.instruction_pfmf++; | ||
522 | |||
523 | kvm_s390_get_regs_rre(vcpu, ®1, ®2); | ||
524 | |||
525 | if (!MACHINE_HAS_PFMF) | ||
526 | return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); | ||
527 | |||
528 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | ||
529 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | ||
530 | |||
531 | if (vcpu->run->s.regs.gprs[reg1] & PFMF_RESERVED) | ||
532 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
533 | |||
534 | /* Only provide non-quiescing support if the host supports it */ | ||
535 | if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ && | ||
536 | S390_lowcore.stfl_fac_list & 0x00020000) | ||
537 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
538 | |||
539 | /* No support for conditional-SSKE */ | ||
540 | if (vcpu->run->s.regs.gprs[reg1] & (PFMF_MR | PFMF_MC)) | ||
541 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
542 | |||
543 | start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; | ||
544 | switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { | ||
545 | case 0x00000000: | ||
546 | end = (start + (1UL << 12)) & ~((1UL << 12) - 1); | ||
547 | break; | ||
548 | case 0x00001000: | ||
549 | end = (start + (1UL << 20)) & ~((1UL << 20) - 1); | ||
550 | break; | ||
551 | /* We dont support EDAT2 | ||
552 | case 0x00002000: | ||
553 | end = (start + (1UL << 31)) & ~((1UL << 31) - 1); | ||
554 | break;*/ | ||
555 | default: | ||
556 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
557 | } | ||
558 | while (start < end) { | ||
559 | unsigned long useraddr; | ||
560 | |||
561 | useraddr = gmap_translate(start, vcpu->arch.gmap); | ||
562 | if (IS_ERR((void *)useraddr)) | ||
563 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
564 | |||
565 | if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { | ||
566 | if (clear_user((void __user *)useraddr, PAGE_SIZE)) | ||
567 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
568 | } | ||
569 | |||
570 | if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) { | ||
571 | if (set_guest_storage_key(current->mm, useraddr, | ||
572 | vcpu->run->s.regs.gprs[reg1] & PFMF_KEY, | ||
573 | vcpu->run->s.regs.gprs[reg1] & PFMF_NQ)) | ||
574 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
575 | } | ||
576 | |||
577 | start += PAGE_SIZE; | ||
578 | } | ||
579 | if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) | ||
580 | vcpu->run->s.regs.gprs[reg2] = end; | ||
581 | return 0; | ||
582 | } | ||
583 | |||
470 | static const intercept_handler_t b9_handlers[256] = { | 584 | static const intercept_handler_t b9_handlers[256] = { |
471 | [0x8d] = handle_epsw, | 585 | [0x8d] = handle_epsw, |
472 | [0x9c] = handle_io_inst, | 586 | [0x9c] = handle_io_inst, |
587 | [0xaf] = handle_pfmf, | ||
473 | }; | 588 | }; |
474 | 589 | ||
475 | int kvm_s390_handle_b9(struct kvm_vcpu *vcpu) | 590 | int kvm_s390_handle_b9(struct kvm_vcpu *vcpu) |
@@ -478,29 +593,96 @@ int kvm_s390_handle_b9(struct kvm_vcpu *vcpu) | |||
478 | 593 | ||
479 | /* This is handled just as for the B2 instructions. */ | 594 | /* This is handled just as for the B2 instructions. */ |
480 | handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; | 595 | handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; |
481 | if (handler) { | 596 | if (handler) |
482 | if ((handler != handle_epsw) && | 597 | return handler(vcpu); |
483 | (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)) | 598 | |
484 | return kvm_s390_inject_program_int(vcpu, | ||
485 | PGM_PRIVILEGED_OPERATION); | ||
486 | else | ||
487 | return handler(vcpu); | ||
488 | } | ||
489 | return -EOPNOTSUPP; | 599 | return -EOPNOTSUPP; |
490 | } | 600 | } |
491 | 601 | ||
602 | int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) | ||
603 | { | ||
604 | int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; | ||
605 | int reg3 = vcpu->arch.sie_block->ipa & 0x000f; | ||
606 | u64 useraddr; | ||
607 | u32 val = 0; | ||
608 | int reg, rc; | ||
609 | |||
610 | vcpu->stat.instruction_lctl++; | ||
611 | |||
612 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | ||
613 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | ||
614 | |||
615 | useraddr = kvm_s390_get_base_disp_rs(vcpu); | ||
616 | |||
617 | if (useraddr & 3) | ||
618 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
619 | |||
620 | VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, | ||
621 | useraddr); | ||
622 | trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr); | ||
623 | |||
624 | reg = reg1; | ||
625 | do { | ||
626 | rc = get_guest(vcpu, val, (u32 __user *) useraddr); | ||
627 | if (rc) | ||
628 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
629 | vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; | ||
630 | vcpu->arch.sie_block->gcr[reg] |= val; | ||
631 | useraddr += 4; | ||
632 | if (reg == reg3) | ||
633 | break; | ||
634 | reg = (reg + 1) % 16; | ||
635 | } while (1); | ||
636 | |||
637 | return 0; | ||
638 | } | ||
639 | |||
640 | static int handle_lctlg(struct kvm_vcpu *vcpu) | ||
641 | { | ||
642 | int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; | ||
643 | int reg3 = vcpu->arch.sie_block->ipa & 0x000f; | ||
644 | u64 useraddr; | ||
645 | int reg, rc; | ||
646 | |||
647 | vcpu->stat.instruction_lctlg++; | ||
648 | |||
649 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | ||
650 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | ||
651 | |||
652 | useraddr = kvm_s390_get_base_disp_rsy(vcpu); | ||
653 | |||
654 | if (useraddr & 7) | ||
655 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
656 | |||
657 | reg = reg1; | ||
658 | |||
659 | VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, | ||
660 | useraddr); | ||
661 | trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr); | ||
662 | |||
663 | do { | ||
664 | rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg], | ||
665 | (u64 __user *) useraddr); | ||
666 | if (rc) | ||
667 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
668 | useraddr += 8; | ||
669 | if (reg == reg3) | ||
670 | break; | ||
671 | reg = (reg + 1) % 16; | ||
672 | } while (1); | ||
673 | |||
674 | return 0; | ||
675 | } | ||
676 | |||
492 | static const intercept_handler_t eb_handlers[256] = { | 677 | static const intercept_handler_t eb_handlers[256] = { |
678 | [0x2f] = handle_lctlg, | ||
493 | [0x8a] = handle_io_inst, | 679 | [0x8a] = handle_io_inst, |
494 | }; | 680 | }; |
495 | 681 | ||
496 | int kvm_s390_handle_priv_eb(struct kvm_vcpu *vcpu) | 682 | int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) |
497 | { | 683 | { |
498 | intercept_handler_t handler; | 684 | intercept_handler_t handler; |
499 | 685 | ||
500 | /* All eb instructions that end up here are privileged. */ | ||
501 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | ||
502 | return kvm_s390_inject_program_int(vcpu, | ||
503 | PGM_PRIVILEGED_OPERATION); | ||
504 | handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff]; | 686 | handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff]; |
505 | if (handler) | 687 | if (handler) |
506 | return handler(vcpu); | 688 | return handler(vcpu); |
@@ -515,6 +697,9 @@ static int handle_tprot(struct kvm_vcpu *vcpu) | |||
515 | 697 | ||
516 | vcpu->stat.instruction_tprot++; | 698 | vcpu->stat.instruction_tprot++; |
517 | 699 | ||
700 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | ||
701 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | ||
702 | |||
518 | kvm_s390_get_base_disp_sse(vcpu, &address1, &address2); | 703 | kvm_s390_get_base_disp_sse(vcpu, &address1, &address2); |
519 | 704 | ||
520 | /* we only handle the Linux memory detection case: | 705 | /* we only handle the Linux memory detection case: |
@@ -560,8 +745,7 @@ static int handle_sckpf(struct kvm_vcpu *vcpu) | |||
560 | u32 value; | 745 | u32 value; |
561 | 746 | ||
562 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | 747 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) |
563 | return kvm_s390_inject_program_int(vcpu, | 748 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); |
564 | PGM_PRIVILEGED_OPERATION); | ||
565 | 749 | ||
566 | if (vcpu->run->s.regs.gprs[0] & 0x00000000ffff0000) | 750 | if (vcpu->run->s.regs.gprs[0] & 0x00000000ffff0000) |
567 | return kvm_s390_inject_program_int(vcpu, | 751 | return kvm_s390_inject_program_int(vcpu, |
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 1c48ab2845e0..bec398c57acf 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c | |||
@@ -79,8 +79,8 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) | |||
79 | list_add_tail(&inti->list, &li->list); | 79 | list_add_tail(&inti->list, &li->list); |
80 | atomic_set(&li->active, 1); | 80 | atomic_set(&li->active, 1); |
81 | atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); | 81 | atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); |
82 | if (waitqueue_active(&li->wq)) | 82 | if (waitqueue_active(li->wq)) |
83 | wake_up_interruptible(&li->wq); | 83 | wake_up_interruptible(li->wq); |
84 | spin_unlock_bh(&li->lock); | 84 | spin_unlock_bh(&li->lock); |
85 | rc = SIGP_CC_ORDER_CODE_ACCEPTED; | 85 | rc = SIGP_CC_ORDER_CODE_ACCEPTED; |
86 | VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); | 86 | VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); |
@@ -117,8 +117,8 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) | |||
117 | list_add_tail(&inti->list, &li->list); | 117 | list_add_tail(&inti->list, &li->list); |
118 | atomic_set(&li->active, 1); | 118 | atomic_set(&li->active, 1); |
119 | atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); | 119 | atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); |
120 | if (waitqueue_active(&li->wq)) | 120 | if (waitqueue_active(li->wq)) |
121 | wake_up_interruptible(&li->wq); | 121 | wake_up_interruptible(li->wq); |
122 | spin_unlock_bh(&li->lock); | 122 | spin_unlock_bh(&li->lock); |
123 | rc = SIGP_CC_ORDER_CODE_ACCEPTED; | 123 | rc = SIGP_CC_ORDER_CODE_ACCEPTED; |
124 | VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr); | 124 | VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr); |
@@ -145,8 +145,8 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) | |||
145 | atomic_set(&li->active, 1); | 145 | atomic_set(&li->active, 1); |
146 | atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); | 146 | atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); |
147 | li->action_bits |= action; | 147 | li->action_bits |= action; |
148 | if (waitqueue_active(&li->wq)) | 148 | if (waitqueue_active(li->wq)) |
149 | wake_up_interruptible(&li->wq); | 149 | wake_up_interruptible(li->wq); |
150 | out: | 150 | out: |
151 | spin_unlock_bh(&li->lock); | 151 | spin_unlock_bh(&li->lock); |
152 | 152 | ||
@@ -250,8 +250,8 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, | |||
250 | 250 | ||
251 | list_add_tail(&inti->list, &li->list); | 251 | list_add_tail(&inti->list, &li->list); |
252 | atomic_set(&li->active, 1); | 252 | atomic_set(&li->active, 1); |
253 | if (waitqueue_active(&li->wq)) | 253 | if (waitqueue_active(li->wq)) |
254 | wake_up_interruptible(&li->wq); | 254 | wake_up_interruptible(li->wq); |
255 | rc = SIGP_CC_ORDER_CODE_ACCEPTED; | 255 | rc = SIGP_CC_ORDER_CODE_ACCEPTED; |
256 | 256 | ||
257 | VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address); | 257 | VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address); |
@@ -333,8 +333,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) | |||
333 | 333 | ||
334 | /* sigp in userspace can exit */ | 334 | /* sigp in userspace can exit */ |
335 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | 335 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) |
336 | return kvm_s390_inject_program_int(vcpu, | 336 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); |
337 | PGM_PRIVILEGED_OPERATION); | ||
338 | 337 | ||
339 | order_code = kvm_s390_get_base_disp_rs(vcpu); | 338 | order_code = kvm_s390_get_base_disp_rs(vcpu); |
340 | 339 | ||
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 74c29d922458..17bf4d3d303a 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
@@ -689,7 +689,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) | |||
689 | entry = *ptep; | 689 | entry = *ptep; |
690 | if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) { | 690 | if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) { |
691 | pgste = pgste_get_lock(ptep); | 691 | pgste = pgste_get_lock(ptep); |
692 | pgste_val(pgste) |= RCP_IN_BIT; | 692 | pgste_val(pgste) |= PGSTE_IN_BIT; |
693 | pgste_set_unlock(ptep, pgste); | 693 | pgste_set_unlock(ptep, pgste); |
694 | start += PAGE_SIZE; | 694 | start += PAGE_SIZE; |
695 | len -= PAGE_SIZE; | 695 | len -= PAGE_SIZE; |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index af9c5525434d..f87f7fcefa0a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -222,14 +222,22 @@ struct kvm_mmu_page { | |||
222 | int root_count; /* Currently serving as active root */ | 222 | int root_count; /* Currently serving as active root */ |
223 | unsigned int unsync_children; | 223 | unsigned int unsync_children; |
224 | unsigned long parent_ptes; /* Reverse mapping for parent_pte */ | 224 | unsigned long parent_ptes; /* Reverse mapping for parent_pte */ |
225 | |||
226 | /* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen. */ | ||
227 | unsigned long mmu_valid_gen; | ||
228 | |||
225 | DECLARE_BITMAP(unsync_child_bitmap, 512); | 229 | DECLARE_BITMAP(unsync_child_bitmap, 512); |
226 | 230 | ||
227 | #ifdef CONFIG_X86_32 | 231 | #ifdef CONFIG_X86_32 |
232 | /* | ||
233 | * Used out of the mmu-lock to avoid reading spte values while an | ||
234 | * update is in progress; see the comments in __get_spte_lockless(). | ||
235 | */ | ||
228 | int clear_spte_count; | 236 | int clear_spte_count; |
229 | #endif | 237 | #endif |
230 | 238 | ||
239 | /* Number of writes since the last time traversal visited this page. */ | ||
231 | int write_flooding_count; | 240 | int write_flooding_count; |
232 | bool mmio_cached; | ||
233 | }; | 241 | }; |
234 | 242 | ||
235 | struct kvm_pio_request { | 243 | struct kvm_pio_request { |
@@ -529,11 +537,14 @@ struct kvm_arch { | |||
529 | unsigned int n_requested_mmu_pages; | 537 | unsigned int n_requested_mmu_pages; |
530 | unsigned int n_max_mmu_pages; | 538 | unsigned int n_max_mmu_pages; |
531 | unsigned int indirect_shadow_pages; | 539 | unsigned int indirect_shadow_pages; |
540 | unsigned long mmu_valid_gen; | ||
532 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; | 541 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; |
533 | /* | 542 | /* |
534 | * Hash table of struct kvm_mmu_page. | 543 | * Hash table of struct kvm_mmu_page. |
535 | */ | 544 | */ |
536 | struct list_head active_mmu_pages; | 545 | struct list_head active_mmu_pages; |
546 | struct list_head zapped_obsolete_pages; | ||
547 | |||
537 | struct list_head assigned_dev_head; | 548 | struct list_head assigned_dev_head; |
538 | struct iommu_domain *iommu_domain; | 549 | struct iommu_domain *iommu_domain; |
539 | int iommu_flags; | 550 | int iommu_flags; |
@@ -769,7 +780,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, | |||
769 | struct kvm_memory_slot *slot, | 780 | struct kvm_memory_slot *slot, |
770 | gfn_t gfn_offset, unsigned long mask); | 781 | gfn_t gfn_offset, unsigned long mask); |
771 | void kvm_mmu_zap_all(struct kvm *kvm); | 782 | void kvm_mmu_zap_all(struct kvm *kvm); |
772 | void kvm_mmu_zap_mmio_sptes(struct kvm *kvm); | 783 | void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm); |
773 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); | 784 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); |
774 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); | 785 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); |
775 | 786 | ||
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index d609e1d84048..bf4fb04d0112 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
@@ -5,12 +5,13 @@ CFLAGS_x86.o := -I. | |||
5 | CFLAGS_svm.o := -I. | 5 | CFLAGS_svm.o := -I. |
6 | CFLAGS_vmx.o := -I. | 6 | CFLAGS_vmx.o := -I. |
7 | 7 | ||
8 | kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ | 8 | KVM := ../../../virt/kvm |
9 | coalesced_mmio.o irq_comm.o eventfd.o \ | 9 | |
10 | irqchip.o) | 10 | kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \ |
11 | kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(addprefix ../../../virt/kvm/, \ | 11 | $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \ |
12 | assigned-dev.o iommu.o) | 12 | $(KVM)/eventfd.o $(KVM)/irqchip.o |
13 | kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) | 13 | kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o |
14 | kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o | ||
14 | 15 | ||
15 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ | 16 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ |
16 | i8254.o cpuid.o pmu.o | 17 | i8254.o cpuid.o pmu.o |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 5953dcea752d..2bc1e81045b0 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -61,6 +61,8 @@ | |||
61 | #define OpMem8 26ull /* 8-bit zero extended memory operand */ | 61 | #define OpMem8 26ull /* 8-bit zero extended memory operand */ |
62 | #define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */ | 62 | #define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */ |
63 | #define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */ | 63 | #define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */ |
64 | #define OpAccLo 29ull /* Low part of extended acc (AX/AX/EAX/RAX) */ | ||
65 | #define OpAccHi 30ull /* High part of extended acc (-/DX/EDX/RDX) */ | ||
64 | 66 | ||
65 | #define OpBits 5 /* Width of operand field */ | 67 | #define OpBits 5 /* Width of operand field */ |
66 | #define OpMask ((1ull << OpBits) - 1) | 68 | #define OpMask ((1ull << OpBits) - 1) |
@@ -86,6 +88,7 @@ | |||
86 | #define DstMem64 (OpMem64 << DstShift) | 88 | #define DstMem64 (OpMem64 << DstShift) |
87 | #define DstImmUByte (OpImmUByte << DstShift) | 89 | #define DstImmUByte (OpImmUByte << DstShift) |
88 | #define DstDX (OpDX << DstShift) | 90 | #define DstDX (OpDX << DstShift) |
91 | #define DstAccLo (OpAccLo << DstShift) | ||
89 | #define DstMask (OpMask << DstShift) | 92 | #define DstMask (OpMask << DstShift) |
90 | /* Source operand type. */ | 93 | /* Source operand type. */ |
91 | #define SrcShift 6 | 94 | #define SrcShift 6 |
@@ -108,6 +111,7 @@ | |||
108 | #define SrcImm64 (OpImm64 << SrcShift) | 111 | #define SrcImm64 (OpImm64 << SrcShift) |
109 | #define SrcDX (OpDX << SrcShift) | 112 | #define SrcDX (OpDX << SrcShift) |
110 | #define SrcMem8 (OpMem8 << SrcShift) | 113 | #define SrcMem8 (OpMem8 << SrcShift) |
114 | #define SrcAccHi (OpAccHi << SrcShift) | ||
111 | #define SrcMask (OpMask << SrcShift) | 115 | #define SrcMask (OpMask << SrcShift) |
112 | #define BitOp (1<<11) | 116 | #define BitOp (1<<11) |
113 | #define MemAbs (1<<12) /* Memory operand is absolute displacement */ | 117 | #define MemAbs (1<<12) /* Memory operand is absolute displacement */ |
@@ -138,6 +142,7 @@ | |||
138 | /* Source 2 operand type */ | 142 | /* Source 2 operand type */ |
139 | #define Src2Shift (31) | 143 | #define Src2Shift (31) |
140 | #define Src2None (OpNone << Src2Shift) | 144 | #define Src2None (OpNone << Src2Shift) |
145 | #define Src2Mem (OpMem << Src2Shift) | ||
141 | #define Src2CL (OpCL << Src2Shift) | 146 | #define Src2CL (OpCL << Src2Shift) |
142 | #define Src2ImmByte (OpImmByte << Src2Shift) | 147 | #define Src2ImmByte (OpImmByte << Src2Shift) |
143 | #define Src2One (OpOne << Src2Shift) | 148 | #define Src2One (OpOne << Src2Shift) |
@@ -155,6 +160,9 @@ | |||
155 | #define Avx ((u64)1 << 43) /* Advanced Vector Extensions */ | 160 | #define Avx ((u64)1 << 43) /* Advanced Vector Extensions */ |
156 | #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */ | 161 | #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */ |
157 | #define NoWrite ((u64)1 << 45) /* No writeback */ | 162 | #define NoWrite ((u64)1 << 45) /* No writeback */ |
163 | #define SrcWrite ((u64)1 << 46) /* Write back src operand */ | ||
164 | |||
165 | #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) | ||
158 | 166 | ||
159 | #define X2(x...) x, x | 167 | #define X2(x...) x, x |
160 | #define X3(x...) X2(x), x | 168 | #define X3(x...) X2(x), x |
@@ -171,10 +179,11 @@ | |||
171 | /* | 179 | /* |
172 | * fastop functions have a special calling convention: | 180 | * fastop functions have a special calling convention: |
173 | * | 181 | * |
174 | * dst: [rdx]:rax (in/out) | 182 | * dst: rax (in/out) |
175 | * src: rbx (in/out) | 183 | * src: rdx (in/out) |
176 | * src2: rcx (in) | 184 | * src2: rcx (in) |
177 | * flags: rflags (in/out) | 185 | * flags: rflags (in/out) |
186 | * ex: rsi (in:fastop pointer, out:zero if exception) | ||
178 | * | 187 | * |
179 | * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for | 188 | * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for |
180 | * different operand sizes can be reached by calculation, rather than a jump | 189 | * different operand sizes can be reached by calculation, rather than a jump |
@@ -276,174 +285,17 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) | |||
276 | } | 285 | } |
277 | 286 | ||
278 | /* | 287 | /* |
279 | * Instruction emulation: | ||
280 | * Most instructions are emulated directly via a fragment of inline assembly | ||
281 | * code. This allows us to save/restore EFLAGS and thus very easily pick up | ||
282 | * any modified flags. | ||
283 | */ | ||
284 | |||
285 | #if defined(CONFIG_X86_64) | ||
286 | #define _LO32 "k" /* force 32-bit operand */ | ||
287 | #define _STK "%%rsp" /* stack pointer */ | ||
288 | #elif defined(__i386__) | ||
289 | #define _LO32 "" /* force 32-bit operand */ | ||
290 | #define _STK "%%esp" /* stack pointer */ | ||
291 | #endif | ||
292 | |||
293 | /* | ||
294 | * These EFLAGS bits are restored from saved value during emulation, and | 288 | * These EFLAGS bits are restored from saved value during emulation, and |
295 | * any changes are written back to the saved value after emulation. | 289 | * any changes are written back to the saved value after emulation. |
296 | */ | 290 | */ |
297 | #define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF) | 291 | #define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF) |
298 | 292 | ||
299 | /* Before executing instruction: restore necessary bits in EFLAGS. */ | ||
300 | #define _PRE_EFLAGS(_sav, _msk, _tmp) \ | ||
301 | /* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \ | ||
302 | "movl %"_sav",%"_LO32 _tmp"; " \ | ||
303 | "push %"_tmp"; " \ | ||
304 | "push %"_tmp"; " \ | ||
305 | "movl %"_msk",%"_LO32 _tmp"; " \ | ||
306 | "andl %"_LO32 _tmp",("_STK"); " \ | ||
307 | "pushf; " \ | ||
308 | "notl %"_LO32 _tmp"; " \ | ||
309 | "andl %"_LO32 _tmp",("_STK"); " \ | ||
310 | "andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); " \ | ||
311 | "pop %"_tmp"; " \ | ||
312 | "orl %"_LO32 _tmp",("_STK"); " \ | ||
313 | "popf; " \ | ||
314 | "pop %"_sav"; " | ||
315 | |||
316 | /* After executing instruction: write-back necessary bits in EFLAGS. */ | ||
317 | #define _POST_EFLAGS(_sav, _msk, _tmp) \ | ||
318 | /* _sav |= EFLAGS & _msk; */ \ | ||
319 | "pushf; " \ | ||
320 | "pop %"_tmp"; " \ | ||
321 | "andl %"_msk",%"_LO32 _tmp"; " \ | ||
322 | "orl %"_LO32 _tmp",%"_sav"; " | ||
323 | |||
324 | #ifdef CONFIG_X86_64 | 293 | #ifdef CONFIG_X86_64 |
325 | #define ON64(x) x | 294 | #define ON64(x) x |
326 | #else | 295 | #else |
327 | #define ON64(x) | 296 | #define ON64(x) |
328 | #endif | 297 | #endif |
329 | 298 | ||
330 | #define ____emulate_2op(ctxt, _op, _x, _y, _suffix, _dsttype) \ | ||
331 | do { \ | ||
332 | __asm__ __volatile__ ( \ | ||
333 | _PRE_EFLAGS("0", "4", "2") \ | ||
334 | _op _suffix " %"_x"3,%1; " \ | ||
335 | _POST_EFLAGS("0", "4", "2") \ | ||
336 | : "=m" ((ctxt)->eflags), \ | ||
337 | "+q" (*(_dsttype*)&(ctxt)->dst.val), \ | ||
338 | "=&r" (_tmp) \ | ||
339 | : _y ((ctxt)->src.val), "i" (EFLAGS_MASK)); \ | ||
340 | } while (0) | ||
341 | |||
342 | |||
343 | /* Raw emulation: instruction has two explicit operands. */ | ||
344 | #define __emulate_2op_nobyte(ctxt,_op,_wx,_wy,_lx,_ly,_qx,_qy) \ | ||
345 | do { \ | ||
346 | unsigned long _tmp; \ | ||
347 | \ | ||
348 | switch ((ctxt)->dst.bytes) { \ | ||
349 | case 2: \ | ||
350 | ____emulate_2op(ctxt,_op,_wx,_wy,"w",u16); \ | ||
351 | break; \ | ||
352 | case 4: \ | ||
353 | ____emulate_2op(ctxt,_op,_lx,_ly,"l",u32); \ | ||
354 | break; \ | ||
355 | case 8: \ | ||
356 | ON64(____emulate_2op(ctxt,_op,_qx,_qy,"q",u64)); \ | ||
357 | break; \ | ||
358 | } \ | ||
359 | } while (0) | ||
360 | |||
361 | #define __emulate_2op(ctxt,_op,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ | ||
362 | do { \ | ||
363 | unsigned long _tmp; \ | ||
364 | switch ((ctxt)->dst.bytes) { \ | ||
365 | case 1: \ | ||
366 | ____emulate_2op(ctxt,_op,_bx,_by,"b",u8); \ | ||
367 | break; \ | ||
368 | default: \ | ||
369 | __emulate_2op_nobyte(ctxt, _op, \ | ||
370 | _wx, _wy, _lx, _ly, _qx, _qy); \ | ||
371 | break; \ | ||
372 | } \ | ||
373 | } while (0) | ||
374 | |||
375 | /* Source operand is byte-sized and may be restricted to just %cl. */ | ||
376 | #define emulate_2op_SrcB(ctxt, _op) \ | ||
377 | __emulate_2op(ctxt, _op, "b", "c", "b", "c", "b", "c", "b", "c") | ||
378 | |||
379 | /* Source operand is byte, word, long or quad sized. */ | ||
380 | #define emulate_2op_SrcV(ctxt, _op) \ | ||
381 | __emulate_2op(ctxt, _op, "b", "q", "w", "r", _LO32, "r", "", "r") | ||
382 | |||
383 | /* Source operand is word, long or quad sized. */ | ||
384 | #define emulate_2op_SrcV_nobyte(ctxt, _op) \ | ||
385 | __emulate_2op_nobyte(ctxt, _op, "w", "r", _LO32, "r", "", "r") | ||
386 | |||
387 | /* Instruction has three operands and one operand is stored in ECX register */ | ||
388 | #define __emulate_2op_cl(ctxt, _op, _suffix, _type) \ | ||
389 | do { \ | ||
390 | unsigned long _tmp; \ | ||
391 | _type _clv = (ctxt)->src2.val; \ | ||
392 | _type _srcv = (ctxt)->src.val; \ | ||
393 | _type _dstv = (ctxt)->dst.val; \ | ||
394 | \ | ||
395 | __asm__ __volatile__ ( \ | ||
396 | _PRE_EFLAGS("0", "5", "2") \ | ||
397 | _op _suffix " %4,%1 \n" \ | ||
398 | _POST_EFLAGS("0", "5", "2") \ | ||
399 | : "=m" ((ctxt)->eflags), "+r" (_dstv), "=&r" (_tmp) \ | ||
400 | : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ | ||
401 | ); \ | ||
402 | \ | ||
403 | (ctxt)->src2.val = (unsigned long) _clv; \ | ||
404 | (ctxt)->src2.val = (unsigned long) _srcv; \ | ||
405 | (ctxt)->dst.val = (unsigned long) _dstv; \ | ||
406 | } while (0) | ||
407 | |||
408 | #define emulate_2op_cl(ctxt, _op) \ | ||
409 | do { \ | ||
410 | switch ((ctxt)->dst.bytes) { \ | ||
411 | case 2: \ | ||
412 | __emulate_2op_cl(ctxt, _op, "w", u16); \ | ||
413 | break; \ | ||
414 | case 4: \ | ||
415 | __emulate_2op_cl(ctxt, _op, "l", u32); \ | ||
416 | break; \ | ||
417 | case 8: \ | ||
418 | ON64(__emulate_2op_cl(ctxt, _op, "q", ulong)); \ | ||
419 | break; \ | ||
420 | } \ | ||
421 | } while (0) | ||
422 | |||
423 | #define __emulate_1op(ctxt, _op, _suffix) \ | ||
424 | do { \ | ||
425 | unsigned long _tmp; \ | ||
426 | \ | ||
427 | __asm__ __volatile__ ( \ | ||
428 | _PRE_EFLAGS("0", "3", "2") \ | ||
429 | _op _suffix " %1; " \ | ||
430 | _POST_EFLAGS("0", "3", "2") \ | ||
431 | : "=m" ((ctxt)->eflags), "+m" ((ctxt)->dst.val), \ | ||
432 | "=&r" (_tmp) \ | ||
433 | : "i" (EFLAGS_MASK)); \ | ||
434 | } while (0) | ||
435 | |||
436 | /* Instruction has only one explicit operand (no source operand). */ | ||
437 | #define emulate_1op(ctxt, _op) \ | ||
438 | do { \ | ||
439 | switch ((ctxt)->dst.bytes) { \ | ||
440 | case 1: __emulate_1op(ctxt, _op, "b"); break; \ | ||
441 | case 2: __emulate_1op(ctxt, _op, "w"); break; \ | ||
442 | case 4: __emulate_1op(ctxt, _op, "l"); break; \ | ||
443 | case 8: ON64(__emulate_1op(ctxt, _op, "q")); break; \ | ||
444 | } \ | ||
445 | } while (0) | ||
446 | |||
447 | static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); | 299 | static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); |
448 | 300 | ||
449 | #define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t" | 301 | #define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t" |
@@ -462,7 +314,10 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); | |||
462 | #define FOPNOP() FOP_ALIGN FOP_RET | 314 | #define FOPNOP() FOP_ALIGN FOP_RET |
463 | 315 | ||
464 | #define FOP1E(op, dst) \ | 316 | #define FOP1E(op, dst) \ |
465 | FOP_ALIGN #op " %" #dst " \n\t" FOP_RET | 317 | FOP_ALIGN "10: " #op " %" #dst " \n\t" FOP_RET |
318 | |||
319 | #define FOP1EEX(op, dst) \ | ||
320 | FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception) | ||
466 | 321 | ||
467 | #define FASTOP1(op) \ | 322 | #define FASTOP1(op) \ |
468 | FOP_START(op) \ | 323 | FOP_START(op) \ |
@@ -472,24 +327,42 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); | |||
472 | ON64(FOP1E(op##q, rax)) \ | 327 | ON64(FOP1E(op##q, rax)) \ |
473 | FOP_END | 328 | FOP_END |
474 | 329 | ||
330 | /* 1-operand, using src2 (for MUL/DIV r/m) */ | ||
331 | #define FASTOP1SRC2(op, name) \ | ||
332 | FOP_START(name) \ | ||
333 | FOP1E(op, cl) \ | ||
334 | FOP1E(op, cx) \ | ||
335 | FOP1E(op, ecx) \ | ||
336 | ON64(FOP1E(op, rcx)) \ | ||
337 | FOP_END | ||
338 | |||
339 | /* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */ | ||
340 | #define FASTOP1SRC2EX(op, name) \ | ||
341 | FOP_START(name) \ | ||
342 | FOP1EEX(op, cl) \ | ||
343 | FOP1EEX(op, cx) \ | ||
344 | FOP1EEX(op, ecx) \ | ||
345 | ON64(FOP1EEX(op, rcx)) \ | ||
346 | FOP_END | ||
347 | |||
475 | #define FOP2E(op, dst, src) \ | 348 | #define FOP2E(op, dst, src) \ |
476 | FOP_ALIGN #op " %" #src ", %" #dst " \n\t" FOP_RET | 349 | FOP_ALIGN #op " %" #src ", %" #dst " \n\t" FOP_RET |
477 | 350 | ||
478 | #define FASTOP2(op) \ | 351 | #define FASTOP2(op) \ |
479 | FOP_START(op) \ | 352 | FOP_START(op) \ |
480 | FOP2E(op##b, al, bl) \ | 353 | FOP2E(op##b, al, dl) \ |
481 | FOP2E(op##w, ax, bx) \ | 354 | FOP2E(op##w, ax, dx) \ |
482 | FOP2E(op##l, eax, ebx) \ | 355 | FOP2E(op##l, eax, edx) \ |
483 | ON64(FOP2E(op##q, rax, rbx)) \ | 356 | ON64(FOP2E(op##q, rax, rdx)) \ |
484 | FOP_END | 357 | FOP_END |
485 | 358 | ||
486 | /* 2 operand, word only */ | 359 | /* 2 operand, word only */ |
487 | #define FASTOP2W(op) \ | 360 | #define FASTOP2W(op) \ |
488 | FOP_START(op) \ | 361 | FOP_START(op) \ |
489 | FOPNOP() \ | 362 | FOPNOP() \ |
490 | FOP2E(op##w, ax, bx) \ | 363 | FOP2E(op##w, ax, dx) \ |
491 | FOP2E(op##l, eax, ebx) \ | 364 | FOP2E(op##l, eax, edx) \ |
492 | ON64(FOP2E(op##q, rax, rbx)) \ | 365 | ON64(FOP2E(op##q, rax, rdx)) \ |
493 | FOP_END | 366 | FOP_END |
494 | 367 | ||
495 | /* 2 operand, src is CL */ | 368 | /* 2 operand, src is CL */ |
@@ -508,14 +381,17 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); | |||
508 | #define FASTOP3WCL(op) \ | 381 | #define FASTOP3WCL(op) \ |
509 | FOP_START(op) \ | 382 | FOP_START(op) \ |
510 | FOPNOP() \ | 383 | FOPNOP() \ |
511 | FOP3E(op##w, ax, bx, cl) \ | 384 | FOP3E(op##w, ax, dx, cl) \ |
512 | FOP3E(op##l, eax, ebx, cl) \ | 385 | FOP3E(op##l, eax, edx, cl) \ |
513 | ON64(FOP3E(op##q, rax, rbx, cl)) \ | 386 | ON64(FOP3E(op##q, rax, rdx, cl)) \ |
514 | FOP_END | 387 | FOP_END |
515 | 388 | ||
516 | /* Special case for SETcc - 1 instruction per cc */ | 389 | /* Special case for SETcc - 1 instruction per cc */ |
517 | #define FOP_SETCC(op) ".align 4; " #op " %al; ret \n\t" | 390 | #define FOP_SETCC(op) ".align 4; " #op " %al; ret \n\t" |
518 | 391 | ||
392 | asm(".global kvm_fastop_exception \n" | ||
393 | "kvm_fastop_exception: xor %esi, %esi; ret"); | ||
394 | |||
519 | FOP_START(setcc) | 395 | FOP_START(setcc) |
520 | FOP_SETCC(seto) | 396 | FOP_SETCC(seto) |
521 | FOP_SETCC(setno) | 397 | FOP_SETCC(setno) |
@@ -538,47 +414,6 @@ FOP_END; | |||
538 | FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET | 414 | FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET |
539 | FOP_END; | 415 | FOP_END; |
540 | 416 | ||
541 | #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \ | ||
542 | do { \ | ||
543 | unsigned long _tmp; \ | ||
544 | ulong *rax = reg_rmw((ctxt), VCPU_REGS_RAX); \ | ||
545 | ulong *rdx = reg_rmw((ctxt), VCPU_REGS_RDX); \ | ||
546 | \ | ||
547 | __asm__ __volatile__ ( \ | ||
548 | _PRE_EFLAGS("0", "5", "1") \ | ||
549 | "1: \n\t" \ | ||
550 | _op _suffix " %6; " \ | ||
551 | "2: \n\t" \ | ||
552 | _POST_EFLAGS("0", "5", "1") \ | ||
553 | ".pushsection .fixup,\"ax\" \n\t" \ | ||
554 | "3: movb $1, %4 \n\t" \ | ||
555 | "jmp 2b \n\t" \ | ||
556 | ".popsection \n\t" \ | ||
557 | _ASM_EXTABLE(1b, 3b) \ | ||
558 | : "=m" ((ctxt)->eflags), "=&r" (_tmp), \ | ||
559 | "+a" (*rax), "+d" (*rdx), "+qm"(_ex) \ | ||
560 | : "i" (EFLAGS_MASK), "m" ((ctxt)->src.val)); \ | ||
561 | } while (0) | ||
562 | |||
563 | /* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */ | ||
564 | #define emulate_1op_rax_rdx(ctxt, _op, _ex) \ | ||
565 | do { \ | ||
566 | switch((ctxt)->src.bytes) { \ | ||
567 | case 1: \ | ||
568 | __emulate_1op_rax_rdx(ctxt, _op, "b", _ex); \ | ||
569 | break; \ | ||
570 | case 2: \ | ||
571 | __emulate_1op_rax_rdx(ctxt, _op, "w", _ex); \ | ||
572 | break; \ | ||
573 | case 4: \ | ||
574 | __emulate_1op_rax_rdx(ctxt, _op, "l", _ex); \ | ||
575 | break; \ | ||
576 | case 8: ON64( \ | ||
577 | __emulate_1op_rax_rdx(ctxt, _op, "q", _ex)); \ | ||
578 | break; \ | ||
579 | } \ | ||
580 | } while (0) | ||
581 | |||
582 | static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, | 417 | static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, |
583 | enum x86_intercept intercept, | 418 | enum x86_intercept intercept, |
584 | enum x86_intercept_stage stage) | 419 | enum x86_intercept_stage stage) |
@@ -988,6 +823,11 @@ FASTOP2(xor); | |||
988 | FASTOP2(cmp); | 823 | FASTOP2(cmp); |
989 | FASTOP2(test); | 824 | FASTOP2(test); |
990 | 825 | ||
826 | FASTOP1SRC2(mul, mul_ex); | ||
827 | FASTOP1SRC2(imul, imul_ex); | ||
828 | FASTOP1SRC2EX(div, div_ex); | ||
829 | FASTOP1SRC2EX(idiv, idiv_ex); | ||
830 | |||
991 | FASTOP3WCL(shld); | 831 | FASTOP3WCL(shld); |
992 | FASTOP3WCL(shrd); | 832 | FASTOP3WCL(shrd); |
993 | 833 | ||
@@ -1013,6 +853,8 @@ FASTOP2W(bts); | |||
1013 | FASTOP2W(btr); | 853 | FASTOP2W(btr); |
1014 | FASTOP2W(btc); | 854 | FASTOP2W(btc); |
1015 | 855 | ||
856 | FASTOP2(xadd); | ||
857 | |||
1016 | static u8 test_cc(unsigned int condition, unsigned long flags) | 858 | static u8 test_cc(unsigned int condition, unsigned long flags) |
1017 | { | 859 | { |
1018 | u8 rc; | 860 | u8 rc; |
@@ -1726,45 +1568,42 @@ static void write_register_operand(struct operand *op) | |||
1726 | } | 1568 | } |
1727 | } | 1569 | } |
1728 | 1570 | ||
1729 | static int writeback(struct x86_emulate_ctxt *ctxt) | 1571 | static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op) |
1730 | { | 1572 | { |
1731 | int rc; | 1573 | int rc; |
1732 | 1574 | ||
1733 | if (ctxt->d & NoWrite) | 1575 | switch (op->type) { |
1734 | return X86EMUL_CONTINUE; | ||
1735 | |||
1736 | switch (ctxt->dst.type) { | ||
1737 | case OP_REG: | 1576 | case OP_REG: |
1738 | write_register_operand(&ctxt->dst); | 1577 | write_register_operand(op); |
1739 | break; | 1578 | break; |
1740 | case OP_MEM: | 1579 | case OP_MEM: |
1741 | if (ctxt->lock_prefix) | 1580 | if (ctxt->lock_prefix) |
1742 | rc = segmented_cmpxchg(ctxt, | 1581 | rc = segmented_cmpxchg(ctxt, |
1743 | ctxt->dst.addr.mem, | 1582 | op->addr.mem, |
1744 | &ctxt->dst.orig_val, | 1583 | &op->orig_val, |
1745 | &ctxt->dst.val, | 1584 | &op->val, |
1746 | ctxt->dst.bytes); | 1585 | op->bytes); |
1747 | else | 1586 | else |
1748 | rc = segmented_write(ctxt, | 1587 | rc = segmented_write(ctxt, |
1749 | ctxt->dst.addr.mem, | 1588 | op->addr.mem, |
1750 | &ctxt->dst.val, | 1589 | &op->val, |
1751 | ctxt->dst.bytes); | 1590 | op->bytes); |
1752 | if (rc != X86EMUL_CONTINUE) | 1591 | if (rc != X86EMUL_CONTINUE) |
1753 | return rc; | 1592 | return rc; |
1754 | break; | 1593 | break; |
1755 | case OP_MEM_STR: | 1594 | case OP_MEM_STR: |
1756 | rc = segmented_write(ctxt, | 1595 | rc = segmented_write(ctxt, |
1757 | ctxt->dst.addr.mem, | 1596 | op->addr.mem, |
1758 | ctxt->dst.data, | 1597 | op->data, |
1759 | ctxt->dst.bytes * ctxt->dst.count); | 1598 | op->bytes * op->count); |
1760 | if (rc != X86EMUL_CONTINUE) | 1599 | if (rc != X86EMUL_CONTINUE) |
1761 | return rc; | 1600 | return rc; |
1762 | break; | 1601 | break; |
1763 | case OP_XMM: | 1602 | case OP_XMM: |
1764 | write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm); | 1603 | write_sse_reg(ctxt, &op->vec_val, op->addr.xmm); |
1765 | break; | 1604 | break; |
1766 | case OP_MM: | 1605 | case OP_MM: |
1767 | write_mmx_reg(ctxt, &ctxt->dst.mm_val, ctxt->dst.addr.mm); | 1606 | write_mmx_reg(ctxt, &op->mm_val, op->addr.mm); |
1768 | break; | 1607 | break; |
1769 | case OP_NONE: | 1608 | case OP_NONE: |
1770 | /* no writeback */ | 1609 | /* no writeback */ |
@@ -2117,42 +1956,6 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) | |||
2117 | return X86EMUL_CONTINUE; | 1956 | return X86EMUL_CONTINUE; |
2118 | } | 1957 | } |
2119 | 1958 | ||
2120 | static int em_mul_ex(struct x86_emulate_ctxt *ctxt) | ||
2121 | { | ||
2122 | u8 ex = 0; | ||
2123 | |||
2124 | emulate_1op_rax_rdx(ctxt, "mul", ex); | ||
2125 | return X86EMUL_CONTINUE; | ||
2126 | } | ||
2127 | |||
2128 | static int em_imul_ex(struct x86_emulate_ctxt *ctxt) | ||
2129 | { | ||
2130 | u8 ex = 0; | ||
2131 | |||
2132 | emulate_1op_rax_rdx(ctxt, "imul", ex); | ||
2133 | return X86EMUL_CONTINUE; | ||
2134 | } | ||
2135 | |||
2136 | static int em_div_ex(struct x86_emulate_ctxt *ctxt) | ||
2137 | { | ||
2138 | u8 de = 0; | ||
2139 | |||
2140 | emulate_1op_rax_rdx(ctxt, "div", de); | ||
2141 | if (de) | ||
2142 | return emulate_de(ctxt); | ||
2143 | return X86EMUL_CONTINUE; | ||
2144 | } | ||
2145 | |||
2146 | static int em_idiv_ex(struct x86_emulate_ctxt *ctxt) | ||
2147 | { | ||
2148 | u8 de = 0; | ||
2149 | |||
2150 | emulate_1op_rax_rdx(ctxt, "idiv", de); | ||
2151 | if (de) | ||
2152 | return emulate_de(ctxt); | ||
2153 | return X86EMUL_CONTINUE; | ||
2154 | } | ||
2155 | |||
2156 | static int em_grp45(struct x86_emulate_ctxt *ctxt) | 1959 | static int em_grp45(struct x86_emulate_ctxt *ctxt) |
2157 | { | 1960 | { |
2158 | int rc = X86EMUL_CONTINUE; | 1961 | int rc = X86EMUL_CONTINUE; |
@@ -3734,10 +3537,10 @@ static const struct opcode group3[] = { | |||
3734 | F(DstMem | SrcImm | NoWrite, em_test), | 3537 | F(DstMem | SrcImm | NoWrite, em_test), |
3735 | F(DstMem | SrcNone | Lock, em_not), | 3538 | F(DstMem | SrcNone | Lock, em_not), |
3736 | F(DstMem | SrcNone | Lock, em_neg), | 3539 | F(DstMem | SrcNone | Lock, em_neg), |
3737 | I(SrcMem, em_mul_ex), | 3540 | F(DstXacc | Src2Mem, em_mul_ex), |
3738 | I(SrcMem, em_imul_ex), | 3541 | F(DstXacc | Src2Mem, em_imul_ex), |
3739 | I(SrcMem, em_div_ex), | 3542 | F(DstXacc | Src2Mem, em_div_ex), |
3740 | I(SrcMem, em_idiv_ex), | 3543 | F(DstXacc | Src2Mem, em_idiv_ex), |
3741 | }; | 3544 | }; |
3742 | 3545 | ||
3743 | static const struct opcode group4[] = { | 3546 | static const struct opcode group4[] = { |
@@ -4064,7 +3867,7 @@ static const struct opcode twobyte_table[256] = { | |||
4064 | F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr), | 3867 | F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr), |
4065 | D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), | 3868 | D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), |
4066 | /* 0xC0 - 0xC7 */ | 3869 | /* 0xC0 - 0xC7 */ |
4067 | D2bv(DstMem | SrcReg | ModRM | Lock), | 3870 | F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd), |
4068 | N, D(DstMem | SrcReg | ModRM | Mov), | 3871 | N, D(DstMem | SrcReg | ModRM | Mov), |
4069 | N, N, N, GD(0, &group9), | 3872 | N, N, N, GD(0, &group9), |
4070 | /* 0xC8 - 0xCF */ | 3873 | /* 0xC8 - 0xCF */ |
@@ -4172,6 +3975,24 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
4172 | fetch_register_operand(op); | 3975 | fetch_register_operand(op); |
4173 | op->orig_val = op->val; | 3976 | op->orig_val = op->val; |
4174 | break; | 3977 | break; |
3978 | case OpAccLo: | ||
3979 | op->type = OP_REG; | ||
3980 | op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes; | ||
3981 | op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); | ||
3982 | fetch_register_operand(op); | ||
3983 | op->orig_val = op->val; | ||
3984 | break; | ||
3985 | case OpAccHi: | ||
3986 | if (ctxt->d & ByteOp) { | ||
3987 | op->type = OP_NONE; | ||
3988 | break; | ||
3989 | } | ||
3990 | op->type = OP_REG; | ||
3991 | op->bytes = ctxt->op_bytes; | ||
3992 | op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX); | ||
3993 | fetch_register_operand(op); | ||
3994 | op->orig_val = op->val; | ||
3995 | break; | ||
4175 | case OpDI: | 3996 | case OpDI: |
4176 | op->type = OP_MEM; | 3997 | op->type = OP_MEM; |
4177 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | 3998 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; |
@@ -4553,11 +4374,15 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, | |||
4553 | static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) | 4374 | static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) |
4554 | { | 4375 | { |
4555 | ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; | 4376 | ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; |
4556 | fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; | 4377 | if (!(ctxt->d & ByteOp)) |
4378 | fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; | ||
4557 | asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" | 4379 | asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" |
4558 | : "+a"(ctxt->dst.val), "+b"(ctxt->src.val), [flags]"+D"(flags) | 4380 | : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), |
4559 | : "c"(ctxt->src2.val), [fastop]"S"(fop)); | 4381 | [fastop]"+S"(fop) |
4382 | : "c"(ctxt->src2.val)); | ||
4560 | ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); | 4383 | ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); |
4384 | if (!fop) /* exception is returned in fop variable */ | ||
4385 | return emulate_de(ctxt); | ||
4561 | return X86EMUL_CONTINUE; | 4386 | return X86EMUL_CONTINUE; |
4562 | } | 4387 | } |
4563 | 4388 | ||
@@ -4773,9 +4598,17 @@ special_insn: | |||
4773 | goto done; | 4598 | goto done; |
4774 | 4599 | ||
4775 | writeback: | 4600 | writeback: |
4776 | rc = writeback(ctxt); | 4601 | if (!(ctxt->d & NoWrite)) { |
4777 | if (rc != X86EMUL_CONTINUE) | 4602 | rc = writeback(ctxt, &ctxt->dst); |
4778 | goto done; | 4603 | if (rc != X86EMUL_CONTINUE) |
4604 | goto done; | ||
4605 | } | ||
4606 | if (ctxt->d & SrcWrite) { | ||
4607 | BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR); | ||
4608 | rc = writeback(ctxt, &ctxt->src); | ||
4609 | if (rc != X86EMUL_CONTINUE) | ||
4610 | goto done; | ||
4611 | } | ||
4779 | 4612 | ||
4780 | /* | 4613 | /* |
4781 | * restore dst type in case the decoding will be reused | 4614 | * restore dst type in case the decoding will be reused |
@@ -4872,12 +4705,6 @@ twobyte_insn: | |||
4872 | ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val : | 4705 | ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val : |
4873 | (s16) ctxt->src.val; | 4706 | (s16) ctxt->src.val; |
4874 | break; | 4707 | break; |
4875 | case 0xc0 ... 0xc1: /* xadd */ | ||
4876 | fastop(ctxt, em_add); | ||
4877 | /* Write back the register source. */ | ||
4878 | ctxt->src.val = ctxt->dst.orig_val; | ||
4879 | write_register_operand(&ctxt->src); | ||
4880 | break; | ||
4881 | case 0xc3: /* movnti */ | 4708 | case 0xc3: /* movnti */ |
4882 | ctxt->dst.bytes = ctxt->op_bytes; | 4709 | ctxt->dst.bytes = ctxt->op_bytes; |
4883 | ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val : | 4710 | ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val : |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0eee2c8b64d1..afc11245827c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -1608,8 +1608,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) | |||
1608 | return; | 1608 | return; |
1609 | 1609 | ||
1610 | if (atomic_read(&apic->lapic_timer.pending) > 0) { | 1610 | if (atomic_read(&apic->lapic_timer.pending) > 0) { |
1611 | if (kvm_apic_local_deliver(apic, APIC_LVTT)) | 1611 | kvm_apic_local_deliver(apic, APIC_LVTT); |
1612 | atomic_dec(&apic->lapic_timer.pending); | 1612 | atomic_set(&apic->lapic_timer.pending, 0); |
1613 | } | 1613 | } |
1614 | } | 1614 | } |
1615 | 1615 | ||
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 004cc87b781c..0d094da49541 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -197,15 +197,63 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask) | |||
197 | } | 197 | } |
198 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); | 198 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); |
199 | 199 | ||
200 | static void mark_mmio_spte(u64 *sptep, u64 gfn, unsigned access) | 200 | /* |
201 | * spte bits of bit 3 ~ bit 11 are used as low 9 bits of generation number, | ||
202 | * the bits of bits 52 ~ bit 61 are used as high 10 bits of generation | ||
203 | * number. | ||
204 | */ | ||
205 | #define MMIO_SPTE_GEN_LOW_SHIFT 3 | ||
206 | #define MMIO_SPTE_GEN_HIGH_SHIFT 52 | ||
207 | |||
208 | #define MMIO_GEN_SHIFT 19 | ||
209 | #define MMIO_GEN_LOW_SHIFT 9 | ||
210 | #define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 1) | ||
211 | #define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1) | ||
212 | #define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1) | ||
213 | |||
214 | static u64 generation_mmio_spte_mask(unsigned int gen) | ||
201 | { | 215 | { |
202 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); | 216 | u64 mask; |
217 | |||
218 | WARN_ON(gen > MMIO_MAX_GEN); | ||
219 | |||
220 | mask = (gen & MMIO_GEN_LOW_MASK) << MMIO_SPTE_GEN_LOW_SHIFT; | ||
221 | mask |= ((u64)gen >> MMIO_GEN_LOW_SHIFT) << MMIO_SPTE_GEN_HIGH_SHIFT; | ||
222 | return mask; | ||
223 | } | ||
224 | |||
225 | static unsigned int get_mmio_spte_generation(u64 spte) | ||
226 | { | ||
227 | unsigned int gen; | ||
228 | |||
229 | spte &= ~shadow_mmio_mask; | ||
230 | |||
231 | gen = (spte >> MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_GEN_LOW_MASK; | ||
232 | gen |= (spte >> MMIO_SPTE_GEN_HIGH_SHIFT) << MMIO_GEN_LOW_SHIFT; | ||
233 | return gen; | ||
234 | } | ||
235 | |||
236 | static unsigned int kvm_current_mmio_generation(struct kvm *kvm) | ||
237 | { | ||
238 | /* | ||
239 | * Init kvm generation close to MMIO_MAX_GEN to easily test the | ||
240 | * code of handling generation number wrap-around. | ||
241 | */ | ||
242 | return (kvm_memslots(kvm)->generation + | ||
243 | MMIO_MAX_GEN - 150) & MMIO_GEN_MASK; | ||
244 | } | ||
245 | |||
246 | static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn, | ||
247 | unsigned access) | ||
248 | { | ||
249 | unsigned int gen = kvm_current_mmio_generation(kvm); | ||
250 | u64 mask = generation_mmio_spte_mask(gen); | ||
203 | 251 | ||
204 | access &= ACC_WRITE_MASK | ACC_USER_MASK; | 252 | access &= ACC_WRITE_MASK | ACC_USER_MASK; |
253 | mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT; | ||
205 | 254 | ||
206 | sp->mmio_cached = true; | 255 | trace_mark_mmio_spte(sptep, gfn, access, gen); |
207 | trace_mark_mmio_spte(sptep, gfn, access); | 256 | mmu_spte_set(sptep, mask); |
208 | mmu_spte_set(sptep, shadow_mmio_mask | access | gfn << PAGE_SHIFT); | ||
209 | } | 257 | } |
210 | 258 | ||
211 | static bool is_mmio_spte(u64 spte) | 259 | static bool is_mmio_spte(u64 spte) |
@@ -215,24 +263,38 @@ static bool is_mmio_spte(u64 spte) | |||
215 | 263 | ||
216 | static gfn_t get_mmio_spte_gfn(u64 spte) | 264 | static gfn_t get_mmio_spte_gfn(u64 spte) |
217 | { | 265 | { |
218 | return (spte & ~shadow_mmio_mask) >> PAGE_SHIFT; | 266 | u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask; |
267 | return (spte & ~mask) >> PAGE_SHIFT; | ||
219 | } | 268 | } |
220 | 269 | ||
221 | static unsigned get_mmio_spte_access(u64 spte) | 270 | static unsigned get_mmio_spte_access(u64 spte) |
222 | { | 271 | { |
223 | return (spte & ~shadow_mmio_mask) & ~PAGE_MASK; | 272 | u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask; |
273 | return (spte & ~mask) & ~PAGE_MASK; | ||
224 | } | 274 | } |
225 | 275 | ||
226 | static bool set_mmio_spte(u64 *sptep, gfn_t gfn, pfn_t pfn, unsigned access) | 276 | static bool set_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, |
277 | pfn_t pfn, unsigned access) | ||
227 | { | 278 | { |
228 | if (unlikely(is_noslot_pfn(pfn))) { | 279 | if (unlikely(is_noslot_pfn(pfn))) { |
229 | mark_mmio_spte(sptep, gfn, access); | 280 | mark_mmio_spte(kvm, sptep, gfn, access); |
230 | return true; | 281 | return true; |
231 | } | 282 | } |
232 | 283 | ||
233 | return false; | 284 | return false; |
234 | } | 285 | } |
235 | 286 | ||
287 | static bool check_mmio_spte(struct kvm *kvm, u64 spte) | ||
288 | { | ||
289 | unsigned int kvm_gen, spte_gen; | ||
290 | |||
291 | kvm_gen = kvm_current_mmio_generation(kvm); | ||
292 | spte_gen = get_mmio_spte_generation(spte); | ||
293 | |||
294 | trace_check_mmio_spte(spte, kvm_gen, spte_gen); | ||
295 | return likely(kvm_gen == spte_gen); | ||
296 | } | ||
297 | |||
236 | static inline u64 rsvd_bits(int s, int e) | 298 | static inline u64 rsvd_bits(int s, int e) |
237 | { | 299 | { |
238 | return ((1ULL << (e - s + 1)) - 1) << s; | 300 | return ((1ULL << (e - s + 1)) - 1) << s; |
@@ -404,9 +466,20 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte) | |||
404 | /* | 466 | /* |
405 | * The idea using the light way get the spte on x86_32 guest is from | 467 | * The idea using the light way get the spte on x86_32 guest is from |
406 | * gup_get_pte(arch/x86/mm/gup.c). | 468 | * gup_get_pte(arch/x86/mm/gup.c). |
407 | * The difference is we can not catch the spte tlb flush if we leave | 469 | * |
408 | * guest mode, so we emulate it by increase clear_spte_count when spte | 470 | * An spte tlb flush may be pending, because kvm_set_pte_rmapp |
409 | * is cleared. | 471 | * coalesces them and we are running out of the MMU lock. Therefore |
472 | * we need to protect against in-progress updates of the spte. | ||
473 | * | ||
474 | * Reading the spte while an update is in progress may get the old value | ||
475 | * for the high part of the spte. The race is fine for a present->non-present | ||
476 | * change (because the high part of the spte is ignored for non-present spte), | ||
477 | * but for a present->present change we must reread the spte. | ||
478 | * | ||
479 | * All such changes are done in two steps (present->non-present and | ||
480 | * non-present->present), hence it is enough to count the number of | ||
481 | * present->non-present updates: if it changed while reading the spte, | ||
482 | * we might have hit the race. This is done using clear_spte_count. | ||
410 | */ | 483 | */ |
411 | static u64 __get_spte_lockless(u64 *sptep) | 484 | static u64 __get_spte_lockless(u64 *sptep) |
412 | { | 485 | { |
@@ -1511,6 +1584,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
1511 | if (!direct) | 1584 | if (!direct) |
1512 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); | 1585 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); |
1513 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | 1586 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); |
1587 | |||
1588 | /* | ||
1589 | * The active_mmu_pages list is the FIFO list, do not move the | ||
1590 | * page until it is zapped. kvm_zap_obsolete_pages depends on | ||
1591 | * this feature. See the comments in kvm_zap_obsolete_pages(). | ||
1592 | */ | ||
1514 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | 1593 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); |
1515 | sp->parent_ptes = 0; | 1594 | sp->parent_ptes = 0; |
1516 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); | 1595 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); |
@@ -1648,6 +1727,16 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
1648 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | 1727 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, |
1649 | struct list_head *invalid_list); | 1728 | struct list_head *invalid_list); |
1650 | 1729 | ||
1730 | /* | ||
1731 | * NOTE: we should pay more attention on the zapped-obsolete page | ||
1732 | * (is_obsolete_sp(sp) && sp->role.invalid) when you do hash list walk | ||
1733 | * since it has been deleted from active_mmu_pages but still can be found | ||
1734 | * at hast list. | ||
1735 | * | ||
1736 | * for_each_gfn_indirect_valid_sp has skipped that kind of page and | ||
1737 | * kvm_mmu_get_page(), the only user of for_each_gfn_sp(), has skipped | ||
1738 | * all the obsolete pages. | ||
1739 | */ | ||
1651 | #define for_each_gfn_sp(_kvm, _sp, _gfn) \ | 1740 | #define for_each_gfn_sp(_kvm, _sp, _gfn) \ |
1652 | hlist_for_each_entry(_sp, \ | 1741 | hlist_for_each_entry(_sp, \ |
1653 | &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ | 1742 | &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ |
@@ -1838,6 +1927,11 @@ static void clear_sp_write_flooding_count(u64 *spte) | |||
1838 | __clear_sp_write_flooding_count(sp); | 1927 | __clear_sp_write_flooding_count(sp); |
1839 | } | 1928 | } |
1840 | 1929 | ||
1930 | static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp) | ||
1931 | { | ||
1932 | return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen); | ||
1933 | } | ||
1934 | |||
1841 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | 1935 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, |
1842 | gfn_t gfn, | 1936 | gfn_t gfn, |
1843 | gva_t gaddr, | 1937 | gva_t gaddr, |
@@ -1864,6 +1958,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
1864 | role.quadrant = quadrant; | 1958 | role.quadrant = quadrant; |
1865 | } | 1959 | } |
1866 | for_each_gfn_sp(vcpu->kvm, sp, gfn) { | 1960 | for_each_gfn_sp(vcpu->kvm, sp, gfn) { |
1961 | if (is_obsolete_sp(vcpu->kvm, sp)) | ||
1962 | continue; | ||
1963 | |||
1867 | if (!need_sync && sp->unsync) | 1964 | if (!need_sync && sp->unsync) |
1868 | need_sync = true; | 1965 | need_sync = true; |
1869 | 1966 | ||
@@ -1900,6 +1997,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
1900 | 1997 | ||
1901 | account_shadowed(vcpu->kvm, gfn); | 1998 | account_shadowed(vcpu->kvm, gfn); |
1902 | } | 1999 | } |
2000 | sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; | ||
1903 | init_shadow_page_table(sp); | 2001 | init_shadow_page_table(sp); |
1904 | trace_kvm_mmu_get_page(sp, true); | 2002 | trace_kvm_mmu_get_page(sp, true); |
1905 | return sp; | 2003 | return sp; |
@@ -2070,8 +2168,10 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
2070 | ret = mmu_zap_unsync_children(kvm, sp, invalid_list); | 2168 | ret = mmu_zap_unsync_children(kvm, sp, invalid_list); |
2071 | kvm_mmu_page_unlink_children(kvm, sp); | 2169 | kvm_mmu_page_unlink_children(kvm, sp); |
2072 | kvm_mmu_unlink_parents(kvm, sp); | 2170 | kvm_mmu_unlink_parents(kvm, sp); |
2171 | |||
2073 | if (!sp->role.invalid && !sp->role.direct) | 2172 | if (!sp->role.invalid && !sp->role.direct) |
2074 | unaccount_shadowed(kvm, sp->gfn); | 2173 | unaccount_shadowed(kvm, sp->gfn); |
2174 | |||
2075 | if (sp->unsync) | 2175 | if (sp->unsync) |
2076 | kvm_unlink_unsync_page(kvm, sp); | 2176 | kvm_unlink_unsync_page(kvm, sp); |
2077 | if (!sp->root_count) { | 2177 | if (!sp->root_count) { |
@@ -2081,7 +2181,13 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
2081 | kvm_mod_used_mmu_pages(kvm, -1); | 2181 | kvm_mod_used_mmu_pages(kvm, -1); |
2082 | } else { | 2182 | } else { |
2083 | list_move(&sp->link, &kvm->arch.active_mmu_pages); | 2183 | list_move(&sp->link, &kvm->arch.active_mmu_pages); |
2084 | kvm_reload_remote_mmus(kvm); | 2184 | |
2185 | /* | ||
2186 | * The obsolete pages can not be used on any vcpus. | ||
2187 | * See the comments in kvm_mmu_invalidate_zap_all_pages(). | ||
2188 | */ | ||
2189 | if (!sp->role.invalid && !is_obsolete_sp(kvm, sp)) | ||
2190 | kvm_reload_remote_mmus(kvm); | ||
2085 | } | 2191 | } |
2086 | 2192 | ||
2087 | sp->role.invalid = 1; | 2193 | sp->role.invalid = 1; |
@@ -2331,7 +2437,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2331 | u64 spte; | 2437 | u64 spte; |
2332 | int ret = 0; | 2438 | int ret = 0; |
2333 | 2439 | ||
2334 | if (set_mmio_spte(sptep, gfn, pfn, pte_access)) | 2440 | if (set_mmio_spte(vcpu->kvm, sptep, gfn, pfn, pte_access)) |
2335 | return 0; | 2441 | return 0; |
2336 | 2442 | ||
2337 | spte = PT_PRESENT_MASK; | 2443 | spte = PT_PRESENT_MASK; |
@@ -2869,22 +2975,25 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
2869 | 2975 | ||
2870 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 2976 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
2871 | return; | 2977 | return; |
2872 | spin_lock(&vcpu->kvm->mmu_lock); | 2978 | |
2873 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL && | 2979 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL && |
2874 | (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL || | 2980 | (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL || |
2875 | vcpu->arch.mmu.direct_map)) { | 2981 | vcpu->arch.mmu.direct_map)) { |
2876 | hpa_t root = vcpu->arch.mmu.root_hpa; | 2982 | hpa_t root = vcpu->arch.mmu.root_hpa; |
2877 | 2983 | ||
2984 | spin_lock(&vcpu->kvm->mmu_lock); | ||
2878 | sp = page_header(root); | 2985 | sp = page_header(root); |
2879 | --sp->root_count; | 2986 | --sp->root_count; |
2880 | if (!sp->root_count && sp->role.invalid) { | 2987 | if (!sp->root_count && sp->role.invalid) { |
2881 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); | 2988 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); |
2882 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 2989 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
2883 | } | 2990 | } |
2884 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | ||
2885 | spin_unlock(&vcpu->kvm->mmu_lock); | 2991 | spin_unlock(&vcpu->kvm->mmu_lock); |
2992 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | ||
2886 | return; | 2993 | return; |
2887 | } | 2994 | } |
2995 | |||
2996 | spin_lock(&vcpu->kvm->mmu_lock); | ||
2888 | for (i = 0; i < 4; ++i) { | 2997 | for (i = 0; i < 4; ++i) { |
2889 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | 2998 | hpa_t root = vcpu->arch.mmu.pae_root[i]; |
2890 | 2999 | ||
@@ -3148,17 +3257,12 @@ static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr) | |||
3148 | return spte; | 3257 | return spte; |
3149 | } | 3258 | } |
3150 | 3259 | ||
3151 | /* | ||
3152 | * If it is a real mmio page fault, return 1 and emulat the instruction | ||
3153 | * directly, return 0 to let CPU fault again on the address, -1 is | ||
3154 | * returned if bug is detected. | ||
3155 | */ | ||
3156 | int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) | 3260 | int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) |
3157 | { | 3261 | { |
3158 | u64 spte; | 3262 | u64 spte; |
3159 | 3263 | ||
3160 | if (quickly_check_mmio_pf(vcpu, addr, direct)) | 3264 | if (quickly_check_mmio_pf(vcpu, addr, direct)) |
3161 | return 1; | 3265 | return RET_MMIO_PF_EMULATE; |
3162 | 3266 | ||
3163 | spte = walk_shadow_page_get_mmio_spte(vcpu, addr); | 3267 | spte = walk_shadow_page_get_mmio_spte(vcpu, addr); |
3164 | 3268 | ||
@@ -3166,12 +3270,15 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) | |||
3166 | gfn_t gfn = get_mmio_spte_gfn(spte); | 3270 | gfn_t gfn = get_mmio_spte_gfn(spte); |
3167 | unsigned access = get_mmio_spte_access(spte); | 3271 | unsigned access = get_mmio_spte_access(spte); |
3168 | 3272 | ||
3273 | if (!check_mmio_spte(vcpu->kvm, spte)) | ||
3274 | return RET_MMIO_PF_INVALID; | ||
3275 | |||
3169 | if (direct) | 3276 | if (direct) |
3170 | addr = 0; | 3277 | addr = 0; |
3171 | 3278 | ||
3172 | trace_handle_mmio_page_fault(addr, gfn, access); | 3279 | trace_handle_mmio_page_fault(addr, gfn, access); |
3173 | vcpu_cache_mmio_info(vcpu, addr, gfn, access); | 3280 | vcpu_cache_mmio_info(vcpu, addr, gfn, access); |
3174 | return 1; | 3281 | return RET_MMIO_PF_EMULATE; |
3175 | } | 3282 | } |
3176 | 3283 | ||
3177 | /* | 3284 | /* |
@@ -3179,13 +3286,13 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) | |||
3179 | * it's a BUG if the gfn is not a mmio page. | 3286 | * it's a BUG if the gfn is not a mmio page. |
3180 | */ | 3287 | */ |
3181 | if (direct && !check_direct_spte_mmio_pf(spte)) | 3288 | if (direct && !check_direct_spte_mmio_pf(spte)) |
3182 | return -1; | 3289 | return RET_MMIO_PF_BUG; |
3183 | 3290 | ||
3184 | /* | 3291 | /* |
3185 | * If the page table is zapped by other cpus, let CPU fault again on | 3292 | * If the page table is zapped by other cpus, let CPU fault again on |
3186 | * the address. | 3293 | * the address. |
3187 | */ | 3294 | */ |
3188 | return 0; | 3295 | return RET_MMIO_PF_RETRY; |
3189 | } | 3296 | } |
3190 | EXPORT_SYMBOL_GPL(handle_mmio_page_fault_common); | 3297 | EXPORT_SYMBOL_GPL(handle_mmio_page_fault_common); |
3191 | 3298 | ||
@@ -3195,7 +3302,7 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, | |||
3195 | int ret; | 3302 | int ret; |
3196 | 3303 | ||
3197 | ret = handle_mmio_page_fault_common(vcpu, addr, direct); | 3304 | ret = handle_mmio_page_fault_common(vcpu, addr, direct); |
3198 | WARN_ON(ret < 0); | 3305 | WARN_ON(ret == RET_MMIO_PF_BUG); |
3199 | return ret; | 3306 | return ret; |
3200 | } | 3307 | } |
3201 | 3308 | ||
@@ -3207,8 +3314,12 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | |||
3207 | 3314 | ||
3208 | pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); | 3315 | pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); |
3209 | 3316 | ||
3210 | if (unlikely(error_code & PFERR_RSVD_MASK)) | 3317 | if (unlikely(error_code & PFERR_RSVD_MASK)) { |
3211 | return handle_mmio_page_fault(vcpu, gva, error_code, true); | 3318 | r = handle_mmio_page_fault(vcpu, gva, error_code, true); |
3319 | |||
3320 | if (likely(r != RET_MMIO_PF_INVALID)) | ||
3321 | return r; | ||
3322 | } | ||
3212 | 3323 | ||
3213 | r = mmu_topup_memory_caches(vcpu); | 3324 | r = mmu_topup_memory_caches(vcpu); |
3214 | if (r) | 3325 | if (r) |
@@ -3284,8 +3395,12 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
3284 | ASSERT(vcpu); | 3395 | ASSERT(vcpu); |
3285 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 3396 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
3286 | 3397 | ||
3287 | if (unlikely(error_code & PFERR_RSVD_MASK)) | 3398 | if (unlikely(error_code & PFERR_RSVD_MASK)) { |
3288 | return handle_mmio_page_fault(vcpu, gpa, error_code, true); | 3399 | r = handle_mmio_page_fault(vcpu, gpa, error_code, true); |
3400 | |||
3401 | if (likely(r != RET_MMIO_PF_INVALID)) | ||
3402 | return r; | ||
3403 | } | ||
3289 | 3404 | ||
3290 | r = mmu_topup_memory_caches(vcpu); | 3405 | r = mmu_topup_memory_caches(vcpu); |
3291 | if (r) | 3406 | if (r) |
@@ -3391,8 +3506,8 @@ static inline void protect_clean_gpte(unsigned *access, unsigned gpte) | |||
3391 | *access &= mask; | 3506 | *access &= mask; |
3392 | } | 3507 | } |
3393 | 3508 | ||
3394 | static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, | 3509 | static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, |
3395 | int *nr_present) | 3510 | unsigned access, int *nr_present) |
3396 | { | 3511 | { |
3397 | if (unlikely(is_mmio_spte(*sptep))) { | 3512 | if (unlikely(is_mmio_spte(*sptep))) { |
3398 | if (gfn != get_mmio_spte_gfn(*sptep)) { | 3513 | if (gfn != get_mmio_spte_gfn(*sptep)) { |
@@ -3401,7 +3516,7 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, | |||
3401 | } | 3516 | } |
3402 | 3517 | ||
3403 | (*nr_present)++; | 3518 | (*nr_present)++; |
3404 | mark_mmio_spte(sptep, gfn, access); | 3519 | mark_mmio_spte(kvm, sptep, gfn, access); |
3405 | return true; | 3520 | return true; |
3406 | } | 3521 | } |
3407 | 3522 | ||
@@ -3764,9 +3879,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) | |||
3764 | if (r) | 3879 | if (r) |
3765 | goto out; | 3880 | goto out; |
3766 | r = mmu_alloc_roots(vcpu); | 3881 | r = mmu_alloc_roots(vcpu); |
3767 | spin_lock(&vcpu->kvm->mmu_lock); | 3882 | kvm_mmu_sync_roots(vcpu); |
3768 | mmu_sync_roots(vcpu); | ||
3769 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
3770 | if (r) | 3883 | if (r) |
3771 | goto out; | 3884 | goto out; |
3772 | /* set_cr3() should ensure TLB has been flushed */ | 3885 | /* set_cr3() should ensure TLB has been flushed */ |
@@ -4179,39 +4292,107 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
4179 | spin_unlock(&kvm->mmu_lock); | 4292 | spin_unlock(&kvm->mmu_lock); |
4180 | } | 4293 | } |
4181 | 4294 | ||
4182 | void kvm_mmu_zap_all(struct kvm *kvm) | 4295 | #define BATCH_ZAP_PAGES 10 |
4296 | static void kvm_zap_obsolete_pages(struct kvm *kvm) | ||
4183 | { | 4297 | { |
4184 | struct kvm_mmu_page *sp, *node; | 4298 | struct kvm_mmu_page *sp, *node; |
4185 | LIST_HEAD(invalid_list); | 4299 | int batch = 0; |
4186 | 4300 | ||
4187 | spin_lock(&kvm->mmu_lock); | ||
4188 | restart: | 4301 | restart: |
4189 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) | 4302 | list_for_each_entry_safe_reverse(sp, node, |
4190 | if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list)) | 4303 | &kvm->arch.active_mmu_pages, link) { |
4304 | int ret; | ||
4305 | |||
4306 | /* | ||
4307 | * No obsolete page exists before new created page since | ||
4308 | * active_mmu_pages is the FIFO list. | ||
4309 | */ | ||
4310 | if (!is_obsolete_sp(kvm, sp)) | ||
4311 | break; | ||
4312 | |||
4313 | /* | ||
4314 | * Since we are reversely walking the list and the invalid | ||
4315 | * list will be moved to the head, skip the invalid page | ||
4316 | * can help us to avoid the infinity list walking. | ||
4317 | */ | ||
4318 | if (sp->role.invalid) | ||
4319 | continue; | ||
4320 | |||
4321 | /* | ||
4322 | * Need not flush tlb since we only zap the sp with invalid | ||
4323 | * generation number. | ||
4324 | */ | ||
4325 | if (batch >= BATCH_ZAP_PAGES && | ||
4326 | cond_resched_lock(&kvm->mmu_lock)) { | ||
4327 | batch = 0; | ||
4328 | goto restart; | ||
4329 | } | ||
4330 | |||
4331 | ret = kvm_mmu_prepare_zap_page(kvm, sp, | ||
4332 | &kvm->arch.zapped_obsolete_pages); | ||
4333 | batch += ret; | ||
4334 | |||
4335 | if (ret) | ||
4191 | goto restart; | 4336 | goto restart; |
4337 | } | ||
4192 | 4338 | ||
4193 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 4339 | /* |
4194 | spin_unlock(&kvm->mmu_lock); | 4340 | * Should flush tlb before free page tables since lockless-walking |
4341 | * may use the pages. | ||
4342 | */ | ||
4343 | kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages); | ||
4195 | } | 4344 | } |
4196 | 4345 | ||
4197 | void kvm_mmu_zap_mmio_sptes(struct kvm *kvm) | 4346 | /* |
4347 | * Fast invalidate all shadow pages and use lock-break technique | ||
4348 | * to zap obsolete pages. | ||
4349 | * | ||
4350 | * It's required when memslot is being deleted or VM is being | ||
4351 | * destroyed, in these cases, we should ensure that KVM MMU does | ||
4352 | * not use any resource of the being-deleted slot or all slots | ||
4353 | * after calling the function. | ||
4354 | */ | ||
4355 | void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm) | ||
4198 | { | 4356 | { |
4199 | struct kvm_mmu_page *sp, *node; | ||
4200 | LIST_HEAD(invalid_list); | ||
4201 | |||
4202 | spin_lock(&kvm->mmu_lock); | 4357 | spin_lock(&kvm->mmu_lock); |
4203 | restart: | 4358 | trace_kvm_mmu_invalidate_zap_all_pages(kvm); |
4204 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { | 4359 | kvm->arch.mmu_valid_gen++; |
4205 | if (!sp->mmio_cached) | ||
4206 | continue; | ||
4207 | if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list)) | ||
4208 | goto restart; | ||
4209 | } | ||
4210 | 4360 | ||
4211 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 4361 | /* |
4362 | * Notify all vcpus to reload its shadow page table | ||
4363 | * and flush TLB. Then all vcpus will switch to new | ||
4364 | * shadow page table with the new mmu_valid_gen. | ||
4365 | * | ||
4366 | * Note: we should do this under the protection of | ||
4367 | * mmu-lock, otherwise, vcpu would purge shadow page | ||
4368 | * but miss tlb flush. | ||
4369 | */ | ||
4370 | kvm_reload_remote_mmus(kvm); | ||
4371 | |||
4372 | kvm_zap_obsolete_pages(kvm); | ||
4212 | spin_unlock(&kvm->mmu_lock); | 4373 | spin_unlock(&kvm->mmu_lock); |
4213 | } | 4374 | } |
4214 | 4375 | ||
4376 | static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) | ||
4377 | { | ||
4378 | return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages)); | ||
4379 | } | ||
4380 | |||
4381 | void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm) | ||
4382 | { | ||
4383 | /* | ||
4384 | * The very rare case: if the generation-number is round, | ||
4385 | * zap all shadow pages. | ||
4386 | * | ||
4387 | * The max value is MMIO_MAX_GEN - 1 since it is not called | ||
4388 | * when mark memslot invalid. | ||
4389 | */ | ||
4390 | if (unlikely(kvm_current_mmio_generation(kvm) >= (MMIO_MAX_GEN - 1))) { | ||
4391 | printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n"); | ||
4392 | kvm_mmu_invalidate_zap_all_pages(kvm); | ||
4393 | } | ||
4394 | } | ||
4395 | |||
4215 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | 4396 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) |
4216 | { | 4397 | { |
4217 | struct kvm *kvm; | 4398 | struct kvm *kvm; |
@@ -4240,15 +4421,23 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
4240 | * want to shrink a VM that only started to populate its MMU | 4421 | * want to shrink a VM that only started to populate its MMU |
4241 | * anyway. | 4422 | * anyway. |
4242 | */ | 4423 | */ |
4243 | if (!kvm->arch.n_used_mmu_pages) | 4424 | if (!kvm->arch.n_used_mmu_pages && |
4425 | !kvm_has_zapped_obsolete_pages(kvm)) | ||
4244 | continue; | 4426 | continue; |
4245 | 4427 | ||
4246 | idx = srcu_read_lock(&kvm->srcu); | 4428 | idx = srcu_read_lock(&kvm->srcu); |
4247 | spin_lock(&kvm->mmu_lock); | 4429 | spin_lock(&kvm->mmu_lock); |
4248 | 4430 | ||
4431 | if (kvm_has_zapped_obsolete_pages(kvm)) { | ||
4432 | kvm_mmu_commit_zap_page(kvm, | ||
4433 | &kvm->arch.zapped_obsolete_pages); | ||
4434 | goto unlock; | ||
4435 | } | ||
4436 | |||
4249 | prepare_zap_oldest_mmu_page(kvm, &invalid_list); | 4437 | prepare_zap_oldest_mmu_page(kvm, &invalid_list); |
4250 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 4438 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
4251 | 4439 | ||
4440 | unlock: | ||
4252 | spin_unlock(&kvm->mmu_lock); | 4441 | spin_unlock(&kvm->mmu_lock); |
4253 | srcu_read_unlock(&kvm->srcu, idx); | 4442 | srcu_read_unlock(&kvm->srcu, idx); |
4254 | 4443 | ||
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 2adcbc2cac6d..5b59c573aba7 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -52,6 +52,23 @@ | |||
52 | 52 | ||
53 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); | 53 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); |
54 | void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); | 54 | void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); |
55 | |||
56 | /* | ||
57 | * Return values of handle_mmio_page_fault_common: | ||
58 | * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction | ||
59 | * directly. | ||
60 | * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page | ||
61 | * fault path update the mmio spte. | ||
62 | * RET_MMIO_PF_RETRY: let CPU fault again on the address. | ||
63 | * RET_MMIO_PF_BUG: bug is detected. | ||
64 | */ | ||
65 | enum { | ||
66 | RET_MMIO_PF_EMULATE = 1, | ||
67 | RET_MMIO_PF_INVALID = 2, | ||
68 | RET_MMIO_PF_RETRY = 0, | ||
69 | RET_MMIO_PF_BUG = -1 | ||
70 | }; | ||
71 | |||
55 | int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); | 72 | int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); |
56 | int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); | 73 | int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); |
57 | 74 | ||
@@ -97,4 +114,5 @@ static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access, | |||
97 | return (mmu->permissions[pfec >> 1] >> pte_access) & 1; | 114 | return (mmu->permissions[pfec >> 1] >> pte_access) & 1; |
98 | } | 115 | } |
99 | 116 | ||
117 | void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); | ||
100 | #endif | 118 | #endif |
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index b8f6172f4174..9d2e0ffcb190 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
@@ -7,16 +7,18 @@ | |||
7 | #undef TRACE_SYSTEM | 7 | #undef TRACE_SYSTEM |
8 | #define TRACE_SYSTEM kvmmmu | 8 | #define TRACE_SYSTEM kvmmmu |
9 | 9 | ||
10 | #define KVM_MMU_PAGE_FIELDS \ | 10 | #define KVM_MMU_PAGE_FIELDS \ |
11 | __field(__u64, gfn) \ | 11 | __field(unsigned long, mmu_valid_gen) \ |
12 | __field(__u32, role) \ | 12 | __field(__u64, gfn) \ |
13 | __field(__u32, root_count) \ | 13 | __field(__u32, role) \ |
14 | __field(__u32, root_count) \ | ||
14 | __field(bool, unsync) | 15 | __field(bool, unsync) |
15 | 16 | ||
16 | #define KVM_MMU_PAGE_ASSIGN(sp) \ | 17 | #define KVM_MMU_PAGE_ASSIGN(sp) \ |
17 | __entry->gfn = sp->gfn; \ | 18 | __entry->mmu_valid_gen = sp->mmu_valid_gen; \ |
18 | __entry->role = sp->role.word; \ | 19 | __entry->gfn = sp->gfn; \ |
19 | __entry->root_count = sp->root_count; \ | 20 | __entry->role = sp->role.word; \ |
21 | __entry->root_count = sp->root_count; \ | ||
20 | __entry->unsync = sp->unsync; | 22 | __entry->unsync = sp->unsync; |
21 | 23 | ||
22 | #define KVM_MMU_PAGE_PRINTK() ({ \ | 24 | #define KVM_MMU_PAGE_PRINTK() ({ \ |
@@ -28,8 +30,8 @@ | |||
28 | \ | 30 | \ |
29 | role.word = __entry->role; \ | 31 | role.word = __entry->role; \ |
30 | \ | 32 | \ |
31 | trace_seq_printf(p, "sp gfn %llx %u%s q%u%s %s%s" \ | 33 | trace_seq_printf(p, "sp gen %lx gfn %llx %u%s q%u%s %s%s" \ |
32 | " %snxe root %u %s%c", \ | 34 | " %snxe root %u %s%c", __entry->mmu_valid_gen, \ |
33 | __entry->gfn, role.level, \ | 35 | __entry->gfn, role.level, \ |
34 | role.cr4_pae ? " pae" : "", \ | 36 | role.cr4_pae ? " pae" : "", \ |
35 | role.quadrant, \ | 37 | role.quadrant, \ |
@@ -197,23 +199,25 @@ DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page, | |||
197 | 199 | ||
198 | TRACE_EVENT( | 200 | TRACE_EVENT( |
199 | mark_mmio_spte, | 201 | mark_mmio_spte, |
200 | TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access), | 202 | TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access, unsigned int gen), |
201 | TP_ARGS(sptep, gfn, access), | 203 | TP_ARGS(sptep, gfn, access, gen), |
202 | 204 | ||
203 | TP_STRUCT__entry( | 205 | TP_STRUCT__entry( |
204 | __field(void *, sptep) | 206 | __field(void *, sptep) |
205 | __field(gfn_t, gfn) | 207 | __field(gfn_t, gfn) |
206 | __field(unsigned, access) | 208 | __field(unsigned, access) |
209 | __field(unsigned int, gen) | ||
207 | ), | 210 | ), |
208 | 211 | ||
209 | TP_fast_assign( | 212 | TP_fast_assign( |
210 | __entry->sptep = sptep; | 213 | __entry->sptep = sptep; |
211 | __entry->gfn = gfn; | 214 | __entry->gfn = gfn; |
212 | __entry->access = access; | 215 | __entry->access = access; |
216 | __entry->gen = gen; | ||
213 | ), | 217 | ), |
214 | 218 | ||
215 | TP_printk("sptep:%p gfn %llx access %x", __entry->sptep, __entry->gfn, | 219 | TP_printk("sptep:%p gfn %llx access %x gen %x", __entry->sptep, |
216 | __entry->access) | 220 | __entry->gfn, __entry->access, __entry->gen) |
217 | ); | 221 | ); |
218 | 222 | ||
219 | TRACE_EVENT( | 223 | TRACE_EVENT( |
@@ -274,6 +278,50 @@ TRACE_EVENT( | |||
274 | __spte_satisfied(old_spte), __spte_satisfied(new_spte) | 278 | __spte_satisfied(old_spte), __spte_satisfied(new_spte) |
275 | ) | 279 | ) |
276 | ); | 280 | ); |
281 | |||
282 | TRACE_EVENT( | ||
283 | kvm_mmu_invalidate_zap_all_pages, | ||
284 | TP_PROTO(struct kvm *kvm), | ||
285 | TP_ARGS(kvm), | ||
286 | |||
287 | TP_STRUCT__entry( | ||
288 | __field(unsigned long, mmu_valid_gen) | ||
289 | __field(unsigned int, mmu_used_pages) | ||
290 | ), | ||
291 | |||
292 | TP_fast_assign( | ||
293 | __entry->mmu_valid_gen = kvm->arch.mmu_valid_gen; | ||
294 | __entry->mmu_used_pages = kvm->arch.n_used_mmu_pages; | ||
295 | ), | ||
296 | |||
297 | TP_printk("kvm-mmu-valid-gen %lx used_pages %x", | ||
298 | __entry->mmu_valid_gen, __entry->mmu_used_pages | ||
299 | ) | ||
300 | ); | ||
301 | |||
302 | |||
303 | TRACE_EVENT( | ||
304 | check_mmio_spte, | ||
305 | TP_PROTO(u64 spte, unsigned int kvm_gen, unsigned int spte_gen), | ||
306 | TP_ARGS(spte, kvm_gen, spte_gen), | ||
307 | |||
308 | TP_STRUCT__entry( | ||
309 | __field(unsigned int, kvm_gen) | ||
310 | __field(unsigned int, spte_gen) | ||
311 | __field(u64, spte) | ||
312 | ), | ||
313 | |||
314 | TP_fast_assign( | ||
315 | __entry->kvm_gen = kvm_gen; | ||
316 | __entry->spte_gen = spte_gen; | ||
317 | __entry->spte = spte; | ||
318 | ), | ||
319 | |||
320 | TP_printk("spte %llx kvm_gen %x spte-gen %x valid %d", __entry->spte, | ||
321 | __entry->kvm_gen, __entry->spte_gen, | ||
322 | __entry->kvm_gen == __entry->spte_gen | ||
323 | ) | ||
324 | ); | ||
277 | #endif /* _TRACE_KVMMMU_H */ | 325 | #endif /* _TRACE_KVMMMU_H */ |
278 | 326 | ||
279 | #undef TRACE_INCLUDE_PATH | 327 | #undef TRACE_INCLUDE_PATH |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index da20860b457a..7769699d48a8 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -552,9 +552,12 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
552 | 552 | ||
553 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); | 553 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); |
554 | 554 | ||
555 | if (unlikely(error_code & PFERR_RSVD_MASK)) | 555 | if (unlikely(error_code & PFERR_RSVD_MASK)) { |
556 | return handle_mmio_page_fault(vcpu, addr, error_code, | 556 | r = handle_mmio_page_fault(vcpu, addr, error_code, |
557 | mmu_is_nested(vcpu)); | 557 | mmu_is_nested(vcpu)); |
558 | if (likely(r != RET_MMIO_PF_INVALID)) | ||
559 | return r; | ||
560 | }; | ||
558 | 561 | ||
559 | r = mmu_topup_memory_caches(vcpu); | 562 | r = mmu_topup_memory_caches(vcpu); |
560 | if (r) | 563 | if (r) |
@@ -792,7 +795,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
792 | pte_access &= gpte_access(vcpu, gpte); | 795 | pte_access &= gpte_access(vcpu, gpte); |
793 | protect_clean_gpte(&pte_access, gpte); | 796 | protect_clean_gpte(&pte_access, gpte); |
794 | 797 | ||
795 | if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present)) | 798 | if (sync_mmio_spte(vcpu->kvm, &sp->spt[i], gfn, pte_access, |
799 | &nr_present)) | ||
796 | continue; | 800 | continue; |
797 | 801 | ||
798 | if (gfn != sp->gfns[i]) { | 802 | if (gfn != sp->gfns[i]) { |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a14a6eaf871d..c0bc80391e40 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -1026,7 +1026,10 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) | |||
1026 | g_tsc_offset = svm->vmcb->control.tsc_offset - | 1026 | g_tsc_offset = svm->vmcb->control.tsc_offset - |
1027 | svm->nested.hsave->control.tsc_offset; | 1027 | svm->nested.hsave->control.tsc_offset; |
1028 | svm->nested.hsave->control.tsc_offset = offset; | 1028 | svm->nested.hsave->control.tsc_offset = offset; |
1029 | } | 1029 | } else |
1030 | trace_kvm_write_tsc_offset(vcpu->vcpu_id, | ||
1031 | svm->vmcb->control.tsc_offset, | ||
1032 | offset); | ||
1030 | 1033 | ||
1031 | svm->vmcb->control.tsc_offset = offset + g_tsc_offset; | 1034 | svm->vmcb->control.tsc_offset = offset + g_tsc_offset; |
1032 | 1035 | ||
@@ -1044,6 +1047,11 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho | |||
1044 | svm->vmcb->control.tsc_offset += adjustment; | 1047 | svm->vmcb->control.tsc_offset += adjustment; |
1045 | if (is_guest_mode(vcpu)) | 1048 | if (is_guest_mode(vcpu)) |
1046 | svm->nested.hsave->control.tsc_offset += adjustment; | 1049 | svm->nested.hsave->control.tsc_offset += adjustment; |
1050 | else | ||
1051 | trace_kvm_write_tsc_offset(vcpu->vcpu_id, | ||
1052 | svm->vmcb->control.tsc_offset - adjustment, | ||
1053 | svm->vmcb->control.tsc_offset); | ||
1054 | |||
1047 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); | 1055 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); |
1048 | } | 1056 | } |
1049 | 1057 | ||
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index fe5e00ed7036..545245d7cc63 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -756,6 +756,27 @@ TRACE_EVENT( | |||
756 | __entry->gpa_match ? "GPA" : "GVA") | 756 | __entry->gpa_match ? "GPA" : "GVA") |
757 | ); | 757 | ); |
758 | 758 | ||
759 | TRACE_EVENT(kvm_write_tsc_offset, | ||
760 | TP_PROTO(unsigned int vcpu_id, __u64 previous_tsc_offset, | ||
761 | __u64 next_tsc_offset), | ||
762 | TP_ARGS(vcpu_id, previous_tsc_offset, next_tsc_offset), | ||
763 | |||
764 | TP_STRUCT__entry( | ||
765 | __field( unsigned int, vcpu_id ) | ||
766 | __field( __u64, previous_tsc_offset ) | ||
767 | __field( __u64, next_tsc_offset ) | ||
768 | ), | ||
769 | |||
770 | TP_fast_assign( | ||
771 | __entry->vcpu_id = vcpu_id; | ||
772 | __entry->previous_tsc_offset = previous_tsc_offset; | ||
773 | __entry->next_tsc_offset = next_tsc_offset; | ||
774 | ), | ||
775 | |||
776 | TP_printk("vcpu=%u prev=%llu next=%llu", __entry->vcpu_id, | ||
777 | __entry->previous_tsc_offset, __entry->next_tsc_offset) | ||
778 | ); | ||
779 | |||
759 | #ifdef CONFIG_X86_64 | 780 | #ifdef CONFIG_X86_64 |
760 | 781 | ||
761 | #define host_clocks \ | 782 | #define host_clocks \ |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b30f5a54a2ab..a7e18551c968 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -2096,6 +2096,8 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) | |||
2096 | (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ? | 2096 | (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ? |
2097 | vmcs12->tsc_offset : 0)); | 2097 | vmcs12->tsc_offset : 0)); |
2098 | } else { | 2098 | } else { |
2099 | trace_kvm_write_tsc_offset(vcpu->vcpu_id, | ||
2100 | vmcs_read64(TSC_OFFSET), offset); | ||
2099 | vmcs_write64(TSC_OFFSET, offset); | 2101 | vmcs_write64(TSC_OFFSET, offset); |
2100 | } | 2102 | } |
2101 | } | 2103 | } |
@@ -2103,11 +2105,14 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) | |||
2103 | static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host) | 2105 | static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host) |
2104 | { | 2106 | { |
2105 | u64 offset = vmcs_read64(TSC_OFFSET); | 2107 | u64 offset = vmcs_read64(TSC_OFFSET); |
2108 | |||
2106 | vmcs_write64(TSC_OFFSET, offset + adjustment); | 2109 | vmcs_write64(TSC_OFFSET, offset + adjustment); |
2107 | if (is_guest_mode(vcpu)) { | 2110 | if (is_guest_mode(vcpu)) { |
2108 | /* Even when running L2, the adjustment needs to apply to L1 */ | 2111 | /* Even when running L2, the adjustment needs to apply to L1 */ |
2109 | to_vmx(vcpu)->nested.vmcs01_tsc_offset += adjustment; | 2112 | to_vmx(vcpu)->nested.vmcs01_tsc_offset += adjustment; |
2110 | } | 2113 | } else |
2114 | trace_kvm_write_tsc_offset(vcpu->vcpu_id, offset, | ||
2115 | offset + adjustment); | ||
2111 | } | 2116 | } |
2112 | 2117 | ||
2113 | static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) | 2118 | static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) |
@@ -4176,10 +4181,10 @@ static void ept_set_mmio_spte_mask(void) | |||
4176 | /* | 4181 | /* |
4177 | * EPT Misconfigurations can be generated if the value of bits 2:0 | 4182 | * EPT Misconfigurations can be generated if the value of bits 2:0 |
4178 | * of an EPT paging-structure entry is 110b (write/execute). | 4183 | * of an EPT paging-structure entry is 110b (write/execute). |
4179 | * Also, magic bits (0xffull << 49) is set to quickly identify mmio | 4184 | * Also, magic bits (0x3ull << 62) is set to quickly identify mmio |
4180 | * spte. | 4185 | * spte. |
4181 | */ | 4186 | */ |
4182 | kvm_mmu_set_mmio_spte_mask(0xffull << 49 | 0x6ull); | 4187 | kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull); |
4183 | } | 4188 | } |
4184 | 4189 | ||
4185 | /* | 4190 | /* |
@@ -5366,10 +5371,14 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) | |||
5366 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); | 5371 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); |
5367 | 5372 | ||
5368 | ret = handle_mmio_page_fault_common(vcpu, gpa, true); | 5373 | ret = handle_mmio_page_fault_common(vcpu, gpa, true); |
5369 | if (likely(ret == 1)) | 5374 | if (likely(ret == RET_MMIO_PF_EMULATE)) |
5370 | return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) == | 5375 | return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) == |
5371 | EMULATE_DONE; | 5376 | EMULATE_DONE; |
5372 | if (unlikely(!ret)) | 5377 | |
5378 | if (unlikely(ret == RET_MMIO_PF_INVALID)) | ||
5379 | return kvm_mmu_page_fault(vcpu, gpa, 0, NULL, 0); | ||
5380 | |||
5381 | if (unlikely(ret == RET_MMIO_PF_RETRY)) | ||
5373 | return 1; | 5382 | return 1; |
5374 | 5383 | ||
5375 | /* It is the real ept misconfig */ | 5384 | /* It is the real ept misconfig */ |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 292e6ca89f42..d21bce505315 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -1193,20 +1193,37 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
1193 | elapsed = ns - kvm->arch.last_tsc_nsec; | 1193 | elapsed = ns - kvm->arch.last_tsc_nsec; |
1194 | 1194 | ||
1195 | if (vcpu->arch.virtual_tsc_khz) { | 1195 | if (vcpu->arch.virtual_tsc_khz) { |
1196 | int faulted = 0; | ||
1197 | |||
1196 | /* n.b - signed multiplication and division required */ | 1198 | /* n.b - signed multiplication and division required */ |
1197 | usdiff = data - kvm->arch.last_tsc_write; | 1199 | usdiff = data - kvm->arch.last_tsc_write; |
1198 | #ifdef CONFIG_X86_64 | 1200 | #ifdef CONFIG_X86_64 |
1199 | usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz; | 1201 | usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz; |
1200 | #else | 1202 | #else |
1201 | /* do_div() only does unsigned */ | 1203 | /* do_div() only does unsigned */ |
1202 | asm("idivl %2; xor %%edx, %%edx" | 1204 | asm("1: idivl %[divisor]\n" |
1203 | : "=A"(usdiff) | 1205 | "2: xor %%edx, %%edx\n" |
1204 | : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz)); | 1206 | " movl $0, %[faulted]\n" |
1207 | "3:\n" | ||
1208 | ".section .fixup,\"ax\"\n" | ||
1209 | "4: movl $1, %[faulted]\n" | ||
1210 | " jmp 3b\n" | ||
1211 | ".previous\n" | ||
1212 | |||
1213 | _ASM_EXTABLE(1b, 4b) | ||
1214 | |||
1215 | : "=A"(usdiff), [faulted] "=r" (faulted) | ||
1216 | : "A"(usdiff * 1000), [divisor] "rm"(vcpu->arch.virtual_tsc_khz)); | ||
1217 | |||
1205 | #endif | 1218 | #endif |
1206 | do_div(elapsed, 1000); | 1219 | do_div(elapsed, 1000); |
1207 | usdiff -= elapsed; | 1220 | usdiff -= elapsed; |
1208 | if (usdiff < 0) | 1221 | if (usdiff < 0) |
1209 | usdiff = -usdiff; | 1222 | usdiff = -usdiff; |
1223 | |||
1224 | /* idivl overflow => difference is larger than USEC_PER_SEC */ | ||
1225 | if (faulted) | ||
1226 | usdiff = USEC_PER_SEC; | ||
1210 | } else | 1227 | } else |
1211 | usdiff = USEC_PER_SEC; /* disable TSC match window below */ | 1228 | usdiff = USEC_PER_SEC; /* disable TSC match window below */ |
1212 | 1229 | ||
@@ -1587,6 +1604,30 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1587 | return 0; | 1604 | return 0; |
1588 | } | 1605 | } |
1589 | 1606 | ||
1607 | /* | ||
1608 | * kvmclock updates which are isolated to a given vcpu, such as | ||
1609 | * vcpu->cpu migration, should not allow system_timestamp from | ||
1610 | * the rest of the vcpus to remain static. Otherwise ntp frequency | ||
1611 | * correction applies to one vcpu's system_timestamp but not | ||
1612 | * the others. | ||
1613 | * | ||
1614 | * So in those cases, request a kvmclock update for all vcpus. | ||
1615 | * The worst case for a remote vcpu to update its kvmclock | ||
1616 | * is then bounded by maximum nohz sleep latency. | ||
1617 | */ | ||
1618 | |||
1619 | static void kvm_gen_kvmclock_update(struct kvm_vcpu *v) | ||
1620 | { | ||
1621 | int i; | ||
1622 | struct kvm *kvm = v->kvm; | ||
1623 | struct kvm_vcpu *vcpu; | ||
1624 | |||
1625 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
1626 | set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); | ||
1627 | kvm_vcpu_kick(vcpu); | ||
1628 | } | ||
1629 | } | ||
1630 | |||
1590 | static bool msr_mtrr_valid(unsigned msr) | 1631 | static bool msr_mtrr_valid(unsigned msr) |
1591 | { | 1632 | { |
1592 | switch (msr) { | 1633 | switch (msr) { |
@@ -1984,7 +2025,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
1984 | kvmclock_reset(vcpu); | 2025 | kvmclock_reset(vcpu); |
1985 | 2026 | ||
1986 | vcpu->arch.time = data; | 2027 | vcpu->arch.time = data; |
1987 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | 2028 | kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu); |
1988 | 2029 | ||
1989 | /* we verify if the enable bit is set... */ | 2030 | /* we verify if the enable bit is set... */ |
1990 | if (!(data & 1)) | 2031 | if (!(data & 1)) |
@@ -2701,7 +2742,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
2701 | * kvmclock on vcpu->cpu migration | 2742 | * kvmclock on vcpu->cpu migration |
2702 | */ | 2743 | */ |
2703 | if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1) | 2744 | if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1) |
2704 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | 2745 | kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu); |
2705 | if (vcpu->cpu != cpu) | 2746 | if (vcpu->cpu != cpu) |
2706 | kvm_migrate_timers(vcpu); | 2747 | kvm_migrate_timers(vcpu); |
2707 | vcpu->cpu = cpu; | 2748 | vcpu->cpu = cpu; |
@@ -5238,7 +5279,13 @@ static void kvm_set_mmio_spte_mask(void) | |||
5238 | * Set the reserved bits and the present bit of an paging-structure | 5279 | * Set the reserved bits and the present bit of an paging-structure |
5239 | * entry to generate page fault with PFER.RSV = 1. | 5280 | * entry to generate page fault with PFER.RSV = 1. |
5240 | */ | 5281 | */ |
5241 | mask = ((1ull << (62 - maxphyaddr + 1)) - 1) << maxphyaddr; | 5282 | /* Mask the reserved physical address bits. */ |
5283 | mask = ((1ull << (51 - maxphyaddr + 1)) - 1) << maxphyaddr; | ||
5284 | |||
5285 | /* Bit 62 is always reserved for 32bit host. */ | ||
5286 | mask |= 0x3ull << 62; | ||
5287 | |||
5288 | /* Set the present bit. */ | ||
5242 | mask |= 1ull; | 5289 | mask |= 1ull; |
5243 | 5290 | ||
5244 | #ifdef CONFIG_X86_64 | 5291 | #ifdef CONFIG_X86_64 |
@@ -5498,13 +5545,6 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) | |||
5498 | char instruction[3]; | 5545 | char instruction[3]; |
5499 | unsigned long rip = kvm_rip_read(vcpu); | 5546 | unsigned long rip = kvm_rip_read(vcpu); |
5500 | 5547 | ||
5501 | /* | ||
5502 | * Blow out the MMU to ensure that no other VCPU has an active mapping | ||
5503 | * to ensure that the updated hypercall appears atomically across all | ||
5504 | * VCPUs. | ||
5505 | */ | ||
5506 | kvm_mmu_zap_all(vcpu->kvm); | ||
5507 | |||
5508 | kvm_x86_ops->patch_hypercall(vcpu, instruction); | 5548 | kvm_x86_ops->patch_hypercall(vcpu, instruction); |
5509 | 5549 | ||
5510 | return emulator_write_emulated(ctxt, rip, instruction, 3, NULL); | 5550 | return emulator_write_emulated(ctxt, rip, instruction, 3, NULL); |
@@ -5702,6 +5742,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5702 | __kvm_migrate_timers(vcpu); | 5742 | __kvm_migrate_timers(vcpu); |
5703 | if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu)) | 5743 | if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu)) |
5704 | kvm_gen_update_masterclock(vcpu->kvm); | 5744 | kvm_gen_update_masterclock(vcpu->kvm); |
5745 | if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu)) | ||
5746 | kvm_gen_kvmclock_update(vcpu); | ||
5705 | if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) { | 5747 | if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) { |
5706 | r = kvm_guest_time_update(vcpu); | 5748 | r = kvm_guest_time_update(vcpu); |
5707 | if (unlikely(r)) | 5749 | if (unlikely(r)) |
@@ -6812,6 +6854,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
6812 | return -EINVAL; | 6854 | return -EINVAL; |
6813 | 6855 | ||
6814 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 6856 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
6857 | INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); | ||
6815 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | 6858 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); |
6816 | 6859 | ||
6817 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ | 6860 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ |
@@ -7040,22 +7083,18 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
7040 | * If memory slot is created, or moved, we need to clear all | 7083 | * If memory slot is created, or moved, we need to clear all |
7041 | * mmio sptes. | 7084 | * mmio sptes. |
7042 | */ | 7085 | */ |
7043 | if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { | 7086 | kvm_mmu_invalidate_mmio_sptes(kvm); |
7044 | kvm_mmu_zap_mmio_sptes(kvm); | ||
7045 | kvm_reload_remote_mmus(kvm); | ||
7046 | } | ||
7047 | } | 7087 | } |
7048 | 7088 | ||
7049 | void kvm_arch_flush_shadow_all(struct kvm *kvm) | 7089 | void kvm_arch_flush_shadow_all(struct kvm *kvm) |
7050 | { | 7090 | { |
7051 | kvm_mmu_zap_all(kvm); | 7091 | kvm_mmu_invalidate_zap_all_pages(kvm); |
7052 | kvm_reload_remote_mmus(kvm); | ||
7053 | } | 7092 | } |
7054 | 7093 | ||
7055 | void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | 7094 | void kvm_arch_flush_shadow_memslot(struct kvm *kvm, |
7056 | struct kvm_memory_slot *slot) | 7095 | struct kvm_memory_slot *slot) |
7057 | { | 7096 | { |
7058 | kvm_arch_flush_shadow_all(kvm); | 7097 | kvm_mmu_invalidate_zap_all_pages(kvm); |
7059 | } | 7098 | } |
7060 | 7099 | ||
7061 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | 7100 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) |
@@ -7263,3 +7302,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); | |||
7263 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); | 7302 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); |
7264 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); | 7303 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); |
7265 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); | 7304 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); |
7305 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset); | ||
diff --git a/arch/arm/include/asm/kvm_arch_timer.h b/include/kvm/arm_arch_timer.h index 68cb9e1dfb81..6d9aeddc09bf 100644 --- a/arch/arm/include/asm/kvm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h | |||
@@ -61,6 +61,8 @@ struct arch_timer_cpu { | |||
61 | #ifdef CONFIG_KVM_ARM_TIMER | 61 | #ifdef CONFIG_KVM_ARM_TIMER |
62 | int kvm_timer_hyp_init(void); | 62 | int kvm_timer_hyp_init(void); |
63 | int kvm_timer_init(struct kvm *kvm); | 63 | int kvm_timer_init(struct kvm *kvm); |
64 | void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, | ||
65 | const struct kvm_irq_level *irq); | ||
64 | void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); | 66 | void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); |
65 | void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); | 67 | void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); |
66 | void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); | 68 | void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); |
@@ -76,6 +78,8 @@ static inline int kvm_timer_init(struct kvm *kvm) | |||
76 | return 0; | 78 | return 0; |
77 | } | 79 | } |
78 | 80 | ||
81 | static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, | ||
82 | const struct kvm_irq_level *irq) {} | ||
79 | static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {} | 83 | static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {} |
80 | static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {} | 84 | static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {} |
81 | static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {} | 85 | static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {} |
diff --git a/arch/arm/include/asm/kvm_vgic.h b/include/kvm/arm_vgic.h index 343744e4809c..343744e4809c 100644 --- a/arch/arm/include/asm/kvm_vgic.h +++ b/include/kvm/arm_vgic.h | |||
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8db53cfaccdb..a63d83ebd151 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -125,6 +125,7 @@ static inline bool is_error_page(struct page *page) | |||
125 | #define KVM_REQ_MCLOCK_INPROGRESS 19 | 125 | #define KVM_REQ_MCLOCK_INPROGRESS 19 |
126 | #define KVM_REQ_EPR_EXIT 20 | 126 | #define KVM_REQ_EPR_EXIT 20 |
127 | #define KVM_REQ_SCAN_IOAPIC 21 | 127 | #define KVM_REQ_SCAN_IOAPIC 21 |
128 | #define KVM_REQ_GLOBAL_CLOCK_UPDATE 22 | ||
128 | 129 | ||
129 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 | 130 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 |
130 | #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 | 131 | #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 |
@@ -145,7 +146,8 @@ struct kvm_io_range { | |||
145 | #define NR_IOBUS_DEVS 1000 | 146 | #define NR_IOBUS_DEVS 1000 |
146 | 147 | ||
147 | struct kvm_io_bus { | 148 | struct kvm_io_bus { |
148 | int dev_count; | 149 | int dev_count; |
150 | int ioeventfd_count; | ||
149 | struct kvm_io_range range[]; | 151 | struct kvm_io_range range[]; |
150 | }; | 152 | }; |
151 | 153 | ||
diff --git a/arch/arm/kvm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 49a7516d81c7..c2e1ef4604e8 100644 --- a/arch/arm/kvm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c | |||
@@ -25,14 +25,12 @@ | |||
25 | #include <clocksource/arm_arch_timer.h> | 25 | #include <clocksource/arm_arch_timer.h> |
26 | #include <asm/arch_timer.h> | 26 | #include <asm/arch_timer.h> |
27 | 27 | ||
28 | #include <asm/kvm_vgic.h> | 28 | #include <kvm/arm_vgic.h> |
29 | #include <asm/kvm_arch_timer.h> | 29 | #include <kvm/arm_arch_timer.h> |
30 | 30 | ||
31 | static struct timecounter *timecounter; | 31 | static struct timecounter *timecounter; |
32 | static struct workqueue_struct *wqueue; | 32 | static struct workqueue_struct *wqueue; |
33 | static struct kvm_irq_level timer_irq = { | 33 | static unsigned int host_vtimer_irq; |
34 | .level = 1, | ||
35 | }; | ||
36 | 34 | ||
37 | static cycle_t kvm_phys_timer_read(void) | 35 | static cycle_t kvm_phys_timer_read(void) |
38 | { | 36 | { |
@@ -67,8 +65,8 @@ static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) | |||
67 | 65 | ||
68 | timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; | 66 | timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; |
69 | kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, | 67 | kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, |
70 | vcpu->arch.timer_cpu.irq->irq, | 68 | timer->irq->irq, |
71 | vcpu->arch.timer_cpu.irq->level); | 69 | timer->irq->level); |
72 | } | 70 | } |
73 | 71 | ||
74 | static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) | 72 | static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) |
@@ -156,6 +154,20 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) | |||
156 | timer_arm(timer, ns); | 154 | timer_arm(timer, ns); |
157 | } | 155 | } |
158 | 156 | ||
157 | void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, | ||
158 | const struct kvm_irq_level *irq) | ||
159 | { | ||
160 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
161 | |||
162 | /* | ||
163 | * The vcpu timer irq number cannot be determined in | ||
164 | * kvm_timer_vcpu_init() because it is called much before | ||
165 | * kvm_vcpu_set_target(). To handle this, we determine | ||
166 | * vcpu timer irq number when the vcpu is reset. | ||
167 | */ | ||
168 | timer->irq = irq; | ||
169 | } | ||
170 | |||
159 | void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) | 171 | void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) |
160 | { | 172 | { |
161 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 173 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
@@ -163,12 +175,11 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) | |||
163 | INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); | 175 | INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); |
164 | hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 176 | hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
165 | timer->timer.function = kvm_timer_expire; | 177 | timer->timer.function = kvm_timer_expire; |
166 | timer->irq = &timer_irq; | ||
167 | } | 178 | } |
168 | 179 | ||
169 | static void kvm_timer_init_interrupt(void *info) | 180 | static void kvm_timer_init_interrupt(void *info) |
170 | { | 181 | { |
171 | enable_percpu_irq(timer_irq.irq, 0); | 182 | enable_percpu_irq(host_vtimer_irq, 0); |
172 | } | 183 | } |
173 | 184 | ||
174 | 185 | ||
@@ -182,7 +193,7 @@ static int kvm_timer_cpu_notify(struct notifier_block *self, | |||
182 | break; | 193 | break; |
183 | case CPU_DYING: | 194 | case CPU_DYING: |
184 | case CPU_DYING_FROZEN: | 195 | case CPU_DYING_FROZEN: |
185 | disable_percpu_irq(timer_irq.irq); | 196 | disable_percpu_irq(host_vtimer_irq); |
186 | break; | 197 | break; |
187 | } | 198 | } |
188 | 199 | ||
@@ -230,7 +241,7 @@ int kvm_timer_hyp_init(void) | |||
230 | goto out; | 241 | goto out; |
231 | } | 242 | } |
232 | 243 | ||
233 | timer_irq.irq = ppi; | 244 | host_vtimer_irq = ppi; |
234 | 245 | ||
235 | err = register_cpu_notifier(&kvm_timer_cpu_nb); | 246 | err = register_cpu_notifier(&kvm_timer_cpu_nb); |
236 | if (err) { | 247 | if (err) { |
diff --git a/arch/arm/kvm/vgic.c b/virt/kvm/arm/vgic.c index 17c5ac7d10ed..17c5ac7d10ed 100644 --- a/arch/arm/kvm/vgic.c +++ b/virt/kvm/arm/vgic.c | |||
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 64ee720b75c7..1550637d1b10 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
@@ -753,6 +753,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | |||
753 | if (ret < 0) | 753 | if (ret < 0) |
754 | goto unlock_fail; | 754 | goto unlock_fail; |
755 | 755 | ||
756 | kvm->buses[bus_idx]->ioeventfd_count++; | ||
756 | list_add_tail(&p->list, &kvm->ioeventfds); | 757 | list_add_tail(&p->list, &kvm->ioeventfds); |
757 | 758 | ||
758 | mutex_unlock(&kvm->slots_lock); | 759 | mutex_unlock(&kvm->slots_lock); |
@@ -798,6 +799,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | |||
798 | continue; | 799 | continue; |
799 | 800 | ||
800 | kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); | 801 | kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); |
802 | kvm->buses[bus_idx]->ioeventfd_count--; | ||
801 | ioeventfd_release(p); | 803 | ioeventfd_release(p); |
802 | ret = 0; | 804 | ret = 0; |
803 | break; | 805 | break; |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 302681c4aa44..1580dd4ace4e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -2926,7 +2926,8 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
2926 | struct kvm_io_bus *new_bus, *bus; | 2926 | struct kvm_io_bus *new_bus, *bus; |
2927 | 2927 | ||
2928 | bus = kvm->buses[bus_idx]; | 2928 | bus = kvm->buses[bus_idx]; |
2929 | if (bus->dev_count > NR_IOBUS_DEVS - 1) | 2929 | /* exclude ioeventfd which is limited by maximum fd */ |
2930 | if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) | ||
2930 | return -ENOSPC; | 2931 | return -ENOSPC; |
2931 | 2932 | ||
2932 | new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count + 1) * | 2933 | new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count + 1) * |
@@ -3181,6 +3182,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, | |||
3181 | 3182 | ||
3182 | out_undebugfs: | 3183 | out_undebugfs: |
3183 | unregister_syscore_ops(&kvm_syscore_ops); | 3184 | unregister_syscore_ops(&kvm_syscore_ops); |
3185 | misc_deregister(&kvm_dev); | ||
3184 | out_unreg: | 3186 | out_unreg: |
3185 | kvm_async_pf_deinit(); | 3187 | kvm_async_pf_deinit(); |
3186 | out_free: | 3188 | out_free: |