diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-15 18:00:28 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-15 18:00:28 -0400 |
commit | 636deed6c0bc137a7c4f4a97ae1fcf0ad75323da (patch) | |
tree | 7bd27189b8e30e3c1466f7730831a08db65f8646 | |
parent | aa2e3ac64ace127f403be85aa4d6015b859385f2 (diff) | |
parent | 4a605bc08e98381d8df61c30a4acb2eac15eb7da (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
"ARM:
- some cleanups
- direct physical timer assignment
- cache sanitization for 32-bit guests
s390:
- interrupt cleanup
- introduction of the Guest Information Block
- preparation for processor subfunctions in cpu models
PPC:
- bug fixes and improvements, especially related to machine checks
and protection keys
x86:
- many, many cleanups, including removing a bunch of MMU code for
unnecessary optimizations
- AVIC fixes
Generic:
- memcg accounting"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (147 commits)
kvm: vmx: fix formatting of a comment
KVM: doc: Document the life cycle of a VM and its resources
MAINTAINERS: Add KVM selftests to existing KVM entry
Revert "KVM/MMU: Flush tlb directly in the kvm_zap_gfn_range()"
KVM: PPC: Book3S: Add count cache flush parameters to kvmppc_get_cpu_char()
KVM: PPC: Fix compilation when KVM is not enabled
KVM: Minor cleanups for kvm_main.c
KVM: s390: add debug logging for cpu model subfunctions
KVM: s390: implement subfunction processor calls
arm64: KVM: Fix architecturally invalid reset value for FPEXC32_EL2
KVM: arm/arm64: Remove unused timer variable
KVM: PPC: Book3S: Improve KVM reference counting
KVM: PPC: Book3S HV: Fix build failure without IOMMU support
Revert "KVM: Eliminate extra function calls in kvm_get_dirty_log_protect()"
x86: kvmguest: use TSC clocksource if invariant TSC is exposed
KVM: Never start grow vCPU halt_poll_ns from value below halt_poll_ns_grow_start
KVM: Expose the initial start value in grow_halt_poll_ns() as a module parameter
KVM: grow_halt_poll_ns() should never shrink vCPU halt_poll_ns
KVM: x86/mmu: Consolidate kvm_mmu_zap_all() and kvm_mmu_zap_mmio_sptes()
KVM: x86/mmu: WARN if zapping a MMIO spte results in zapping children
...
93 files changed, 2623 insertions, 1199 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 356156f5c52d..7de9eee73fcd 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
@@ -45,6 +45,23 @@ the API. The only supported use is one virtual machine per process, | |||
45 | and one vcpu per thread. | 45 | and one vcpu per thread. |
46 | 46 | ||
47 | 47 | ||
48 | It is important to note that althought VM ioctls may only be issued from | ||
49 | the process that created the VM, a VM's lifecycle is associated with its | ||
50 | file descriptor, not its creator (process). In other words, the VM and | ||
51 | its resources, *including the associated address space*, are not freed | ||
52 | until the last reference to the VM's file descriptor has been released. | ||
53 | For example, if fork() is issued after ioctl(KVM_CREATE_VM), the VM will | ||
54 | not be freed until both the parent (original) process and its child have | ||
55 | put their references to the VM's file descriptor. | ||
56 | |||
57 | Because a VM's resources are not freed until the last reference to its | ||
58 | file descriptor is released, creating additional references to a VM via | ||
59 | via fork(), dup(), etc... without careful consideration is strongly | ||
60 | discouraged and may have unwanted side effects, e.g. memory allocated | ||
61 | by and on behalf of the VM's process may not be freed/unaccounted when | ||
62 | the VM is shut down. | ||
63 | |||
64 | |||
48 | 3. Extensions | 65 | 3. Extensions |
49 | ------------- | 66 | ------------- |
50 | 67 | ||
diff --git a/Documentation/virtual/kvm/halt-polling.txt b/Documentation/virtual/kvm/halt-polling.txt index 4a8418318769..4f791b128dd2 100644 --- a/Documentation/virtual/kvm/halt-polling.txt +++ b/Documentation/virtual/kvm/halt-polling.txt | |||
@@ -53,7 +53,8 @@ the global max polling interval then the polling interval can be increased in | |||
53 | the hope that next time during the longer polling interval the wake up source | 53 | the hope that next time during the longer polling interval the wake up source |
54 | will be received while the host is polling and the latency benefits will be | 54 | will be received while the host is polling and the latency benefits will be |
55 | received. The polling interval is grown in the function grow_halt_poll_ns() and | 55 | received. The polling interval is grown in the function grow_halt_poll_ns() and |
56 | is multiplied by the module parameter halt_poll_ns_grow. | 56 | is multiplied by the module parameters halt_poll_ns_grow and |
57 | halt_poll_ns_grow_start. | ||
57 | 58 | ||
58 | In the event that the total block time was greater than the global max polling | 59 | In the event that the total block time was greater than the global max polling |
59 | interval then the host will never poll for long enough (limited by the global | 60 | interval then the host will never poll for long enough (limited by the global |
@@ -80,22 +81,30 @@ shrunk. These variables are defined in include/linux/kvm_host.h and as module | |||
80 | parameters in virt/kvm/kvm_main.c, or arch/powerpc/kvm/book3s_hv.c in the | 81 | parameters in virt/kvm/kvm_main.c, or arch/powerpc/kvm/book3s_hv.c in the |
81 | powerpc kvm-hv case. | 82 | powerpc kvm-hv case. |
82 | 83 | ||
83 | Module Parameter | Description | Default Value | 84 | Module Parameter | Description | Default Value |
84 | -------------------------------------------------------------------------------- | 85 | -------------------------------------------------------------------------------- |
85 | halt_poll_ns | The global max polling interval | KVM_HALT_POLL_NS_DEFAULT | 86 | halt_poll_ns | The global max polling | KVM_HALT_POLL_NS_DEFAULT |
86 | | which defines the ceiling value | | 87 | | interval which defines | |
87 | | of the polling interval for | (per arch value) | 88 | | the ceiling value of the | |
88 | | each vcpu. | | 89 | | polling interval for | (per arch value) |
90 | | each vcpu. | | ||
89 | -------------------------------------------------------------------------------- | 91 | -------------------------------------------------------------------------------- |
90 | halt_poll_ns_grow | The value by which the halt | 2 | 92 | halt_poll_ns_grow | The value by which the | 2 |
91 | | polling interval is multiplied | | 93 | | halt polling interval is | |
92 | | in the grow_halt_poll_ns() | | 94 | | multiplied in the | |
93 | | function. | | 95 | | grow_halt_poll_ns() | |
96 | | function. | | ||
94 | -------------------------------------------------------------------------------- | 97 | -------------------------------------------------------------------------------- |
95 | halt_poll_ns_shrink | The value by which the halt | 0 | 98 | halt_poll_ns_grow_start | The initial value to grow | 10000 |
96 | | polling interval is divided in | | 99 | | to from zero in the | |
97 | | the shrink_halt_poll_ns() | | 100 | | grow_halt_poll_ns() | |
98 | | function. | | 101 | | function. | |
102 | -------------------------------------------------------------------------------- | ||
103 | halt_poll_ns_shrink | The value by which the | 0 | ||
104 | | halt polling interval is | | ||
105 | | divided in the | | ||
106 | | shrink_halt_poll_ns() | | ||
107 | | function. | | ||
99 | -------------------------------------------------------------------------------- | 108 | -------------------------------------------------------------------------------- |
100 | 109 | ||
101 | These module parameters can be set from the debugfs files in: | 110 | These module parameters can be set from the debugfs files in: |
diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt index e507a9e0421e..f365102c80f5 100644 --- a/Documentation/virtual/kvm/mmu.txt +++ b/Documentation/virtual/kvm/mmu.txt | |||
@@ -224,10 +224,6 @@ Shadow pages contain the following information: | |||
224 | A bitmap indicating which sptes in spt point (directly or indirectly) at | 224 | A bitmap indicating which sptes in spt point (directly or indirectly) at |
225 | pages that may be unsynchronized. Used to quickly locate all unsychronized | 225 | pages that may be unsynchronized. Used to quickly locate all unsychronized |
226 | pages reachable from a given page. | 226 | pages reachable from a given page. |
227 | mmu_valid_gen: | ||
228 | Generation number of the page. It is compared with kvm->arch.mmu_valid_gen | ||
229 | during hash table lookup, and used to skip invalidated shadow pages (see | ||
230 | "Zapping all pages" below.) | ||
231 | clear_spte_count: | 227 | clear_spte_count: |
232 | Only present on 32-bit hosts, where a 64-bit spte cannot be written | 228 | Only present on 32-bit hosts, where a 64-bit spte cannot be written |
233 | atomically. The reader uses this while running out of the MMU lock | 229 | atomically. The reader uses this while running out of the MMU lock |
@@ -402,27 +398,6 @@ causes its disallow_lpage to be incremented, thus preventing instantiation of | |||
402 | a large spte. The frames at the end of an unaligned memory slot have | 398 | a large spte. The frames at the end of an unaligned memory slot have |
403 | artificially inflated ->disallow_lpages so they can never be instantiated. | 399 | artificially inflated ->disallow_lpages so they can never be instantiated. |
404 | 400 | ||
405 | Zapping all pages (page generation count) | ||
406 | ========================================= | ||
407 | |||
408 | For the large memory guests, walking and zapping all pages is really slow | ||
409 | (because there are a lot of pages), and also blocks memory accesses of | ||
410 | all VCPUs because it needs to hold the MMU lock. | ||
411 | |||
412 | To make it be more scalable, kvm maintains a global generation number | ||
413 | which is stored in kvm->arch.mmu_valid_gen. Every shadow page stores | ||
414 | the current global generation-number into sp->mmu_valid_gen when it | ||
415 | is created. Pages with a mismatching generation number are "obsolete". | ||
416 | |||
417 | When KVM need zap all shadow pages sptes, it just simply increases the global | ||
418 | generation-number then reload root shadow pages on all vcpus. As the VCPUs | ||
419 | create new shadow page tables, the old pages are not used because of the | ||
420 | mismatching generation number. | ||
421 | |||
422 | KVM then walks through all pages and zaps obsolete pages. While the zap | ||
423 | operation needs to take the MMU lock, the lock can be released periodically | ||
424 | so that the VCPUs can make progress. | ||
425 | |||
426 | Fast invalidation of MMIO sptes | 401 | Fast invalidation of MMIO sptes |
427 | =============================== | 402 | =============================== |
428 | 403 | ||
@@ -435,8 +410,7 @@ shadow pages, and is made more scalable with a similar technique. | |||
435 | MMIO sptes have a few spare bits, which are used to store a | 410 | MMIO sptes have a few spare bits, which are used to store a |
436 | generation number. The global generation number is stored in | 411 | generation number. The global generation number is stored in |
437 | kvm_memslots(kvm)->generation, and increased whenever guest memory info | 412 | kvm_memslots(kvm)->generation, and increased whenever guest memory info |
438 | changes. This generation number is distinct from the one described in | 413 | changes. |
439 | the previous section. | ||
440 | 414 | ||
441 | When KVM finds an MMIO spte, it checks the generation number of the spte. | 415 | When KVM finds an MMIO spte, it checks the generation number of the spte. |
442 | If the generation number of the spte does not equal the global generation | 416 | If the generation number of the spte does not equal the global generation |
@@ -452,13 +426,16 @@ stored into the MMIO spte. Thus, the MMIO spte might be created based on | |||
452 | out-of-date information, but with an up-to-date generation number. | 426 | out-of-date information, but with an up-to-date generation number. |
453 | 427 | ||
454 | To avoid this, the generation number is incremented again after synchronize_srcu | 428 | To avoid this, the generation number is incremented again after synchronize_srcu |
455 | returns; thus, the low bit of kvm_memslots(kvm)->generation is only 1 during a | 429 | returns; thus, bit 63 of kvm_memslots(kvm)->generation set to 1 only during a |
456 | memslot update, while some SRCU readers might be using the old copy. We do not | 430 | memslot update, while some SRCU readers might be using the old copy. We do not |
457 | want to use an MMIO sptes created with an odd generation number, and we can do | 431 | want to use an MMIO sptes created with an odd generation number, and we can do |
458 | this without losing a bit in the MMIO spte. The low bit of the generation | 432 | this without losing a bit in the MMIO spte. The "update in-progress" bit of the |
459 | is not stored in MMIO spte, and presumed zero when it is extracted out of the | 433 | generation is not stored in MMIO spte, and is so is implicitly zero when the |
460 | spte. If KVM is unlucky and creates an MMIO spte while the low bit is 1, | 434 | generation is extracted out of the spte. If KVM is unlucky and creates an MMIO |
461 | the next access to the spte will always be a cache miss. | 435 | spte while an update is in-progress, the next access to the spte will always be |
436 | a cache miss. For example, a subsequent access during the update window will | ||
437 | miss due to the in-progress flag diverging, while an access after the update | ||
438 | window closes will have a higher generation number (as compared to the spte). | ||
462 | 439 | ||
463 | 440 | ||
464 | Further reading | 441 | Further reading |
diff --git a/MAINTAINERS b/MAINTAINERS index c009ad17ae64..e17ebf70b548 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -8461,6 +8461,7 @@ F: include/linux/kvm* | |||
8461 | F: include/kvm/iodev.h | 8461 | F: include/kvm/iodev.h |
8462 | F: virt/kvm/* | 8462 | F: virt/kvm/* |
8463 | F: tools/kvm/ | 8463 | F: tools/kvm/ |
8464 | F: tools/testing/selftests/kvm/ | ||
8464 | 8465 | ||
8465 | KERNEL VIRTUAL MACHINE FOR AMD-V (KVM/amd) | 8466 | KERNEL VIRTUAL MACHINE FOR AMD-V (KVM/amd) |
8466 | M: Joerg Roedel <joro@8bytes.org> | 8467 | M: Joerg Roedel <joro@8bytes.org> |
@@ -8470,29 +8471,25 @@ S: Maintained | |||
8470 | F: arch/x86/include/asm/svm.h | 8471 | F: arch/x86/include/asm/svm.h |
8471 | F: arch/x86/kvm/svm.c | 8472 | F: arch/x86/kvm/svm.c |
8472 | 8473 | ||
8473 | KERNEL VIRTUAL MACHINE FOR ARM (KVM/arm) | 8474 | KERNEL VIRTUAL MACHINE FOR ARM/ARM64 (KVM/arm, KVM/arm64) |
8474 | M: Christoffer Dall <christoffer.dall@arm.com> | 8475 | M: Christoffer Dall <christoffer.dall@arm.com> |
8475 | M: Marc Zyngier <marc.zyngier@arm.com> | 8476 | M: Marc Zyngier <marc.zyngier@arm.com> |
8477 | R: James Morse <james.morse@arm.com> | ||
8478 | R: Julien Thierry <julien.thierry@arm.com> | ||
8479 | R: Suzuki K Pouloze <suzuki.poulose@arm.com> | ||
8476 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) | 8480 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) |
8477 | L: kvmarm@lists.cs.columbia.edu | 8481 | L: kvmarm@lists.cs.columbia.edu |
8478 | W: http://systems.cs.columbia.edu/projects/kvm-arm | 8482 | W: http://systems.cs.columbia.edu/projects/kvm-arm |
8479 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git | 8483 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git |
8480 | S: Supported | 8484 | S: Maintained |
8481 | F: arch/arm/include/uapi/asm/kvm* | 8485 | F: arch/arm/include/uapi/asm/kvm* |
8482 | F: arch/arm/include/asm/kvm* | 8486 | F: arch/arm/include/asm/kvm* |
8483 | F: arch/arm/kvm/ | 8487 | F: arch/arm/kvm/ |
8484 | F: virt/kvm/arm/ | ||
8485 | F: include/kvm/arm_* | ||
8486 | |||
8487 | KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64) | ||
8488 | M: Christoffer Dall <christoffer.dall@arm.com> | ||
8489 | M: Marc Zyngier <marc.zyngier@arm.com> | ||
8490 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) | ||
8491 | L: kvmarm@lists.cs.columbia.edu | ||
8492 | S: Maintained | ||
8493 | F: arch/arm64/include/uapi/asm/kvm* | 8488 | F: arch/arm64/include/uapi/asm/kvm* |
8494 | F: arch/arm64/include/asm/kvm* | 8489 | F: arch/arm64/include/asm/kvm* |
8495 | F: arch/arm64/kvm/ | 8490 | F: arch/arm64/kvm/ |
8491 | F: virt/kvm/arm/ | ||
8492 | F: include/kvm/arm_* | ||
8496 | 8493 | ||
8497 | KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips) | 8494 | KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips) |
8498 | M: James Hogan <jhogan@kernel.org> | 8495 | M: James Hogan <jhogan@kernel.org> |
diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h index f6f485f4744e..d15b8c99f1b3 100644 --- a/arch/arm/include/asm/arch_gicv3.h +++ b/arch/arm/include/asm/arch_gicv3.h | |||
@@ -55,7 +55,7 @@ | |||
55 | #define ICH_VTR __ACCESS_CP15(c12, 4, c11, 1) | 55 | #define ICH_VTR __ACCESS_CP15(c12, 4, c11, 1) |
56 | #define ICH_MISR __ACCESS_CP15(c12, 4, c11, 2) | 56 | #define ICH_MISR __ACCESS_CP15(c12, 4, c11, 2) |
57 | #define ICH_EISR __ACCESS_CP15(c12, 4, c11, 3) | 57 | #define ICH_EISR __ACCESS_CP15(c12, 4, c11, 3) |
58 | #define ICH_ELSR __ACCESS_CP15(c12, 4, c11, 5) | 58 | #define ICH_ELRSR __ACCESS_CP15(c12, 4, c11, 5) |
59 | #define ICH_VMCR __ACCESS_CP15(c12, 4, c11, 7) | 59 | #define ICH_VMCR __ACCESS_CP15(c12, 4, c11, 7) |
60 | 60 | ||
61 | #define __LR0(x) __ACCESS_CP15(c12, 4, c12, x) | 61 | #define __LR0(x) __ACCESS_CP15(c12, 4, c12, x) |
@@ -152,7 +152,7 @@ CPUIF_MAP(ICH_HCR, ICH_HCR_EL2) | |||
152 | CPUIF_MAP(ICH_VTR, ICH_VTR_EL2) | 152 | CPUIF_MAP(ICH_VTR, ICH_VTR_EL2) |
153 | CPUIF_MAP(ICH_MISR, ICH_MISR_EL2) | 153 | CPUIF_MAP(ICH_MISR, ICH_MISR_EL2) |
154 | CPUIF_MAP(ICH_EISR, ICH_EISR_EL2) | 154 | CPUIF_MAP(ICH_EISR, ICH_EISR_EL2) |
155 | CPUIF_MAP(ICH_ELSR, ICH_ELSR_EL2) | 155 | CPUIF_MAP(ICH_ELRSR, ICH_ELRSR_EL2) |
156 | CPUIF_MAP(ICH_VMCR, ICH_VMCR_EL2) | 156 | CPUIF_MAP(ICH_VMCR, ICH_VMCR_EL2) |
157 | CPUIF_MAP(ICH_AP0R3, ICH_AP0R3_EL2) | 157 | CPUIF_MAP(ICH_AP0R3, ICH_AP0R3_EL2) |
158 | CPUIF_MAP(ICH_AP0R2, ICH_AP0R2_EL2) | 158 | CPUIF_MAP(ICH_AP0R2, ICH_AP0R2_EL2) |
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 77121b713bef..8927cae7c966 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h | |||
@@ -265,6 +265,14 @@ static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu) | |||
265 | } | 265 | } |
266 | } | 266 | } |
267 | 267 | ||
268 | static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) | ||
269 | { | ||
270 | if (kvm_vcpu_trap_is_iabt(vcpu)) | ||
271 | return false; | ||
272 | |||
273 | return kvm_vcpu_dabt_iswrite(vcpu); | ||
274 | } | ||
275 | |||
268 | static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu) | 276 | static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu) |
269 | { | 277 | { |
270 | return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; | 278 | return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; |
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 50e89869178a..770d73257ad9 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <asm/kvm_asm.h> | 26 | #include <asm/kvm_asm.h> |
27 | #include <asm/kvm_mmio.h> | 27 | #include <asm/kvm_mmio.h> |
28 | #include <asm/fpstate.h> | 28 | #include <asm/fpstate.h> |
29 | #include <asm/smp_plat.h> | ||
29 | #include <kvm/arm_arch_timer.h> | 30 | #include <kvm/arm_arch_timer.h> |
30 | 31 | ||
31 | #define __KVM_HAVE_ARCH_INTC_INITIALIZED | 32 | #define __KVM_HAVE_ARCH_INTC_INITIALIZED |
@@ -57,10 +58,13 @@ int __attribute_const__ kvm_target_cpu(void); | |||
57 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu); | 58 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu); |
58 | void kvm_reset_coprocs(struct kvm_vcpu *vcpu); | 59 | void kvm_reset_coprocs(struct kvm_vcpu *vcpu); |
59 | 60 | ||
60 | struct kvm_arch { | 61 | struct kvm_vmid { |
61 | /* VTTBR value associated with below pgd and vmid */ | 62 | /* The VMID generation used for the virt. memory system */ |
62 | u64 vttbr; | 63 | u64 vmid_gen; |
64 | u32 vmid; | ||
65 | }; | ||
63 | 66 | ||
67 | struct kvm_arch { | ||
64 | /* The last vcpu id that ran on each physical CPU */ | 68 | /* The last vcpu id that ran on each physical CPU */ |
65 | int __percpu *last_vcpu_ran; | 69 | int __percpu *last_vcpu_ran; |
66 | 70 | ||
@@ -70,11 +74,11 @@ struct kvm_arch { | |||
70 | */ | 74 | */ |
71 | 75 | ||
72 | /* The VMID generation used for the virt. memory system */ | 76 | /* The VMID generation used for the virt. memory system */ |
73 | u64 vmid_gen; | 77 | struct kvm_vmid vmid; |
74 | u32 vmid; | ||
75 | 78 | ||
76 | /* Stage-2 page table */ | 79 | /* Stage-2 page table */ |
77 | pgd_t *pgd; | 80 | pgd_t *pgd; |
81 | phys_addr_t pgd_phys; | ||
78 | 82 | ||
79 | /* Interrupt controller */ | 83 | /* Interrupt controller */ |
80 | struct vgic_dist vgic; | 84 | struct vgic_dist vgic; |
@@ -148,6 +152,13 @@ struct kvm_cpu_context { | |||
148 | 152 | ||
149 | typedef struct kvm_cpu_context kvm_cpu_context_t; | 153 | typedef struct kvm_cpu_context kvm_cpu_context_t; |
150 | 154 | ||
155 | static inline void kvm_init_host_cpu_context(kvm_cpu_context_t *cpu_ctxt, | ||
156 | int cpu) | ||
157 | { | ||
158 | /* The host's MPIDR is immutable, so let's set it up at boot time */ | ||
159 | cpu_ctxt->cp15[c0_MPIDR] = cpu_logical_map(cpu); | ||
160 | } | ||
161 | |||
151 | struct vcpu_reset_state { | 162 | struct vcpu_reset_state { |
152 | unsigned long pc; | 163 | unsigned long pc; |
153 | unsigned long r0; | 164 | unsigned long r0; |
@@ -224,7 +235,35 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); | |||
224 | int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); | 235 | int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); |
225 | int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); | 236 | int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); |
226 | int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); | 237 | int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); |
227 | unsigned long kvm_call_hyp(void *hypfn, ...); | 238 | |
239 | unsigned long __kvm_call_hyp(void *hypfn, ...); | ||
240 | |||
241 | /* | ||
242 | * The has_vhe() part doesn't get emitted, but is used for type-checking. | ||
243 | */ | ||
244 | #define kvm_call_hyp(f, ...) \ | ||
245 | do { \ | ||
246 | if (has_vhe()) { \ | ||
247 | f(__VA_ARGS__); \ | ||
248 | } else { \ | ||
249 | __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__); \ | ||
250 | } \ | ||
251 | } while(0) | ||
252 | |||
253 | #define kvm_call_hyp_ret(f, ...) \ | ||
254 | ({ \ | ||
255 | typeof(f(__VA_ARGS__)) ret; \ | ||
256 | \ | ||
257 | if (has_vhe()) { \ | ||
258 | ret = f(__VA_ARGS__); \ | ||
259 | } else { \ | ||
260 | ret = __kvm_call_hyp(kvm_ksym_ref(f), \ | ||
261 | ##__VA_ARGS__); \ | ||
262 | } \ | ||
263 | \ | ||
264 | ret; \ | ||
265 | }) | ||
266 | |||
228 | void force_vm_exit(const cpumask_t *mask); | 267 | void force_vm_exit(const cpumask_t *mask); |
229 | int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, | 268 | int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, |
230 | struct kvm_vcpu_events *events); | 269 | struct kvm_vcpu_events *events); |
@@ -275,7 +314,7 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, | |||
275 | * compliant with the PCS!). | 314 | * compliant with the PCS!). |
276 | */ | 315 | */ |
277 | 316 | ||
278 | kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr); | 317 | __kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr); |
279 | } | 318 | } |
280 | 319 | ||
281 | static inline void __cpu_init_stage2(void) | 320 | static inline void __cpu_init_stage2(void) |
diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h index e93a0cac9add..87bcd18df8d5 100644 --- a/arch/arm/include/asm/kvm_hyp.h +++ b/arch/arm/include/asm/kvm_hyp.h | |||
@@ -40,6 +40,7 @@ | |||
40 | #define TTBR1 __ACCESS_CP15_64(1, c2) | 40 | #define TTBR1 __ACCESS_CP15_64(1, c2) |
41 | #define VTTBR __ACCESS_CP15_64(6, c2) | 41 | #define VTTBR __ACCESS_CP15_64(6, c2) |
42 | #define PAR __ACCESS_CP15_64(0, c7) | 42 | #define PAR __ACCESS_CP15_64(0, c7) |
43 | #define CNTP_CVAL __ACCESS_CP15_64(2, c14) | ||
43 | #define CNTV_CVAL __ACCESS_CP15_64(3, c14) | 44 | #define CNTV_CVAL __ACCESS_CP15_64(3, c14) |
44 | #define CNTVOFF __ACCESS_CP15_64(4, c14) | 45 | #define CNTVOFF __ACCESS_CP15_64(4, c14) |
45 | 46 | ||
@@ -85,6 +86,7 @@ | |||
85 | #define TID_PRIV __ACCESS_CP15(c13, 0, c0, 4) | 86 | #define TID_PRIV __ACCESS_CP15(c13, 0, c0, 4) |
86 | #define HTPIDR __ACCESS_CP15(c13, 4, c0, 2) | 87 | #define HTPIDR __ACCESS_CP15(c13, 4, c0, 2) |
87 | #define CNTKCTL __ACCESS_CP15(c14, 0, c1, 0) | 88 | #define CNTKCTL __ACCESS_CP15(c14, 0, c1, 0) |
89 | #define CNTP_CTL __ACCESS_CP15(c14, 0, c2, 1) | ||
88 | #define CNTV_CTL __ACCESS_CP15(c14, 0, c3, 1) | 90 | #define CNTV_CTL __ACCESS_CP15(c14, 0, c3, 1) |
89 | #define CNTHCTL __ACCESS_CP15(c14, 4, c1, 0) | 91 | #define CNTHCTL __ACCESS_CP15(c14, 4, c1, 0) |
90 | 92 | ||
@@ -94,6 +96,8 @@ | |||
94 | #define read_sysreg_el0(r) read_sysreg(r##_el0) | 96 | #define read_sysreg_el0(r) read_sysreg(r##_el0) |
95 | #define write_sysreg_el0(v, r) write_sysreg(v, r##_el0) | 97 | #define write_sysreg_el0(v, r) write_sysreg(v, r##_el0) |
96 | 98 | ||
99 | #define cntp_ctl_el0 CNTP_CTL | ||
100 | #define cntp_cval_el0 CNTP_CVAL | ||
97 | #define cntv_ctl_el0 CNTV_CTL | 101 | #define cntv_ctl_el0 CNTV_CTL |
98 | #define cntv_cval_el0 CNTV_CVAL | 102 | #define cntv_cval_el0 CNTV_CVAL |
99 | #define cntvoff_el2 CNTVOFF | 103 | #define cntvoff_el2 CNTVOFF |
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 3a875fc1b63c..2de96a180166 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h | |||
@@ -421,9 +421,14 @@ static inline int hyp_map_aux_data(void) | |||
421 | 421 | ||
422 | static inline void kvm_set_ipa_limit(void) {} | 422 | static inline void kvm_set_ipa_limit(void) {} |
423 | 423 | ||
424 | static inline bool kvm_cpu_has_cnp(void) | 424 | static __always_inline u64 kvm_get_vttbr(struct kvm *kvm) |
425 | { | 425 | { |
426 | return false; | 426 | struct kvm_vmid *vmid = &kvm->arch.vmid; |
427 | u64 vmid_field, baddr; | ||
428 | |||
429 | baddr = kvm->arch.pgd_phys; | ||
430 | vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT; | ||
431 | return kvm_phys_to_vttbr(baddr) | vmid_field; | ||
427 | } | 432 | } |
428 | 433 | ||
429 | #endif /* !__ASSEMBLY__ */ | 434 | #endif /* !__ASSEMBLY__ */ |
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 48de846f2246..531e59f5be9c 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile | |||
@@ -8,9 +8,8 @@ ifeq ($(plus_virt),+virt) | |||
8 | plus_virt_def := -DREQUIRES_VIRT=1 | 8 | plus_virt_def := -DREQUIRES_VIRT=1 |
9 | endif | 9 | endif |
10 | 10 | ||
11 | ccflags-y += -Iarch/arm/kvm -Ivirt/kvm/arm/vgic | 11 | ccflags-y += -I $(srctree)/$(src) -I $(srctree)/virt/kvm/arm/vgic |
12 | CFLAGS_arm.o := -I. $(plus_virt_def) | 12 | CFLAGS_arm.o := $(plus_virt_def) |
13 | CFLAGS_mmu.o := -I. | ||
14 | 13 | ||
15 | AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) | 14 | AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) |
16 | AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) | 15 | AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) |
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index e8bd288fd5be..14915c78bd99 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c | |||
@@ -293,15 +293,16 @@ static bool access_cntp_tval(struct kvm_vcpu *vcpu, | |||
293 | const struct coproc_params *p, | 293 | const struct coproc_params *p, |
294 | const struct coproc_reg *r) | 294 | const struct coproc_reg *r) |
295 | { | 295 | { |
296 | u64 now = kvm_phys_timer_read(); | 296 | u32 val; |
297 | u64 val; | ||
298 | 297 | ||
299 | if (p->is_write) { | 298 | if (p->is_write) { |
300 | val = *vcpu_reg(vcpu, p->Rt1); | 299 | val = *vcpu_reg(vcpu, p->Rt1); |
301 | kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, val + now); | 300 | kvm_arm_timer_write_sysreg(vcpu, |
301 | TIMER_PTIMER, TIMER_REG_TVAL, val); | ||
302 | } else { | 302 | } else { |
303 | val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL); | 303 | val = kvm_arm_timer_read_sysreg(vcpu, |
304 | *vcpu_reg(vcpu, p->Rt1) = val - now; | 304 | TIMER_PTIMER, TIMER_REG_TVAL); |
305 | *vcpu_reg(vcpu, p->Rt1) = val; | ||
305 | } | 306 | } |
306 | 307 | ||
307 | return true; | 308 | return true; |
@@ -315,9 +316,11 @@ static bool access_cntp_ctl(struct kvm_vcpu *vcpu, | |||
315 | 316 | ||
316 | if (p->is_write) { | 317 | if (p->is_write) { |
317 | val = *vcpu_reg(vcpu, p->Rt1); | 318 | val = *vcpu_reg(vcpu, p->Rt1); |
318 | kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CTL, val); | 319 | kvm_arm_timer_write_sysreg(vcpu, |
320 | TIMER_PTIMER, TIMER_REG_CTL, val); | ||
319 | } else { | 321 | } else { |
320 | val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CTL); | 322 | val = kvm_arm_timer_read_sysreg(vcpu, |
323 | TIMER_PTIMER, TIMER_REG_CTL); | ||
321 | *vcpu_reg(vcpu, p->Rt1) = val; | 324 | *vcpu_reg(vcpu, p->Rt1) = val; |
322 | } | 325 | } |
323 | 326 | ||
@@ -333,9 +336,11 @@ static bool access_cntp_cval(struct kvm_vcpu *vcpu, | |||
333 | if (p->is_write) { | 336 | if (p->is_write) { |
334 | val = (u64)*vcpu_reg(vcpu, p->Rt2) << 32; | 337 | val = (u64)*vcpu_reg(vcpu, p->Rt2) << 32; |
335 | val |= *vcpu_reg(vcpu, p->Rt1); | 338 | val |= *vcpu_reg(vcpu, p->Rt1); |
336 | kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, val); | 339 | kvm_arm_timer_write_sysreg(vcpu, |
340 | TIMER_PTIMER, TIMER_REG_CVAL, val); | ||
337 | } else { | 341 | } else { |
338 | val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL); | 342 | val = kvm_arm_timer_read_sysreg(vcpu, |
343 | TIMER_PTIMER, TIMER_REG_CVAL); | ||
339 | *vcpu_reg(vcpu, p->Rt1) = val; | 344 | *vcpu_reg(vcpu, p->Rt1) = val; |
340 | *vcpu_reg(vcpu, p->Rt2) = val >> 32; | 345 | *vcpu_reg(vcpu, p->Rt2) = val >> 32; |
341 | } | 346 | } |
diff --git a/arch/arm/kvm/hyp/cp15-sr.c b/arch/arm/kvm/hyp/cp15-sr.c index c4782812714c..8bf895ec6e04 100644 --- a/arch/arm/kvm/hyp/cp15-sr.c +++ b/arch/arm/kvm/hyp/cp15-sr.c | |||
@@ -27,7 +27,6 @@ static u64 *cp15_64(struct kvm_cpu_context *ctxt, int idx) | |||
27 | 27 | ||
28 | void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) | 28 | void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) |
29 | { | 29 | { |
30 | ctxt->cp15[c0_MPIDR] = read_sysreg(VMPIDR); | ||
31 | ctxt->cp15[c0_CSSELR] = read_sysreg(CSSELR); | 30 | ctxt->cp15[c0_CSSELR] = read_sysreg(CSSELR); |
32 | ctxt->cp15[c1_SCTLR] = read_sysreg(SCTLR); | 31 | ctxt->cp15[c1_SCTLR] = read_sysreg(SCTLR); |
33 | ctxt->cp15[c1_CPACR] = read_sysreg(CPACR); | 32 | ctxt->cp15[c1_CPACR] = read_sysreg(CPACR); |
diff --git a/arch/arm/kvm/hyp/hyp-entry.S b/arch/arm/kvm/hyp/hyp-entry.S index aa3f9a9837ac..6ed3cf23fe89 100644 --- a/arch/arm/kvm/hyp/hyp-entry.S +++ b/arch/arm/kvm/hyp/hyp-entry.S | |||
@@ -176,7 +176,7 @@ THUMB( orr lr, lr, #PSR_T_BIT ) | |||
176 | msr spsr_cxsf, lr | 176 | msr spsr_cxsf, lr |
177 | ldr lr, =panic | 177 | ldr lr, =panic |
178 | msr ELR_hyp, lr | 178 | msr ELR_hyp, lr |
179 | ldr lr, =kvm_call_hyp | 179 | ldr lr, =__kvm_call_hyp |
180 | clrex | 180 | clrex |
181 | eret | 181 | eret |
182 | ENDPROC(__hyp_do_panic) | 182 | ENDPROC(__hyp_do_panic) |
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c index acf1c37fa49c..3b058a5d7c5f 100644 --- a/arch/arm/kvm/hyp/switch.c +++ b/arch/arm/kvm/hyp/switch.c | |||
@@ -77,7 +77,7 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) | |||
77 | static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu) | 77 | static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu) |
78 | { | 78 | { |
79 | struct kvm *kvm = kern_hyp_va(vcpu->kvm); | 79 | struct kvm *kvm = kern_hyp_va(vcpu->kvm); |
80 | write_sysreg(kvm->arch.vttbr, VTTBR); | 80 | write_sysreg(kvm_get_vttbr(kvm), VTTBR); |
81 | write_sysreg(vcpu->arch.midr, VPIDR); | 81 | write_sysreg(vcpu->arch.midr, VPIDR); |
82 | } | 82 | } |
83 | 83 | ||
diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c index c0edd450e104..8e4afba73635 100644 --- a/arch/arm/kvm/hyp/tlb.c +++ b/arch/arm/kvm/hyp/tlb.c | |||
@@ -41,7 +41,7 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) | |||
41 | 41 | ||
42 | /* Switch to requested VMID */ | 42 | /* Switch to requested VMID */ |
43 | kvm = kern_hyp_va(kvm); | 43 | kvm = kern_hyp_va(kvm); |
44 | write_sysreg(kvm->arch.vttbr, VTTBR); | 44 | write_sysreg(kvm_get_vttbr(kvm), VTTBR); |
45 | isb(); | 45 | isb(); |
46 | 46 | ||
47 | write_sysreg(0, TLBIALLIS); | 47 | write_sysreg(0, TLBIALLIS); |
@@ -61,7 +61,7 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) | |||
61 | struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); | 61 | struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); |
62 | 62 | ||
63 | /* Switch to requested VMID */ | 63 | /* Switch to requested VMID */ |
64 | write_sysreg(kvm->arch.vttbr, VTTBR); | 64 | write_sysreg(kvm_get_vttbr(kvm), VTTBR); |
65 | isb(); | 65 | isb(); |
66 | 66 | ||
67 | write_sysreg(0, TLBIALL); | 67 | write_sysreg(0, TLBIALL); |
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 80a1d6cd261c..a08e6419ebe9 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S | |||
@@ -42,7 +42,7 @@ | |||
42 | * r12: caller save | 42 | * r12: caller save |
43 | * rest: callee save | 43 | * rest: callee save |
44 | */ | 44 | */ |
45 | ENTRY(kvm_call_hyp) | 45 | ENTRY(__kvm_call_hyp) |
46 | hvc #0 | 46 | hvc #0 |
47 | bx lr | 47 | bx lr |
48 | ENDPROC(kvm_call_hyp) | 48 | ENDPROC(__kvm_call_hyp) |
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 506386a3edde..d3842791e1c4 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h | |||
@@ -77,6 +77,10 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) | |||
77 | */ | 77 | */ |
78 | if (!vcpu_el1_is_32bit(vcpu)) | 78 | if (!vcpu_el1_is_32bit(vcpu)) |
79 | vcpu->arch.hcr_el2 |= HCR_TID3; | 79 | vcpu->arch.hcr_el2 |= HCR_TID3; |
80 | |||
81 | if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) || | ||
82 | vcpu_el1_is_32bit(vcpu)) | ||
83 | vcpu->arch.hcr_el2 |= HCR_TID2; | ||
80 | } | 84 | } |
81 | 85 | ||
82 | static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) | 86 | static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) |
@@ -331,6 +335,14 @@ static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) | |||
331 | return ESR_ELx_SYS64_ISS_RT(esr); | 335 | return ESR_ELx_SYS64_ISS_RT(esr); |
332 | } | 336 | } |
333 | 337 | ||
338 | static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) | ||
339 | { | ||
340 | if (kvm_vcpu_trap_is_iabt(vcpu)) | ||
341 | return false; | ||
342 | |||
343 | return kvm_vcpu_dabt_iswrite(vcpu); | ||
344 | } | ||
345 | |||
334 | static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) | 346 | static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) |
335 | { | 347 | { |
336 | return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; | 348 | return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; |
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 222af1d2c3e4..a01fe087e022 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <asm/kvm.h> | 31 | #include <asm/kvm.h> |
32 | #include <asm/kvm_asm.h> | 32 | #include <asm/kvm_asm.h> |
33 | #include <asm/kvm_mmio.h> | 33 | #include <asm/kvm_mmio.h> |
34 | #include <asm/smp_plat.h> | ||
34 | #include <asm/thread_info.h> | 35 | #include <asm/thread_info.h> |
35 | 36 | ||
36 | #define __KVM_HAVE_ARCH_INTC_INITIALIZED | 37 | #define __KVM_HAVE_ARCH_INTC_INITIALIZED |
@@ -58,16 +59,19 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu); | |||
58 | int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext); | 59 | int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext); |
59 | void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); | 60 | void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); |
60 | 61 | ||
61 | struct kvm_arch { | 62 | struct kvm_vmid { |
62 | /* The VMID generation used for the virt. memory system */ | 63 | /* The VMID generation used for the virt. memory system */ |
63 | u64 vmid_gen; | 64 | u64 vmid_gen; |
64 | u32 vmid; | 65 | u32 vmid; |
66 | }; | ||
67 | |||
68 | struct kvm_arch { | ||
69 | struct kvm_vmid vmid; | ||
65 | 70 | ||
66 | /* stage2 entry level table */ | 71 | /* stage2 entry level table */ |
67 | pgd_t *pgd; | 72 | pgd_t *pgd; |
73 | phys_addr_t pgd_phys; | ||
68 | 74 | ||
69 | /* VTTBR value associated with above pgd and vmid */ | ||
70 | u64 vttbr; | ||
71 | /* VTCR_EL2 value for this VM */ | 75 | /* VTCR_EL2 value for this VM */ |
72 | u64 vtcr; | 76 | u64 vtcr; |
73 | 77 | ||
@@ -382,7 +386,36 @@ void kvm_arm_halt_guest(struct kvm *kvm); | |||
382 | void kvm_arm_resume_guest(struct kvm *kvm); | 386 | void kvm_arm_resume_guest(struct kvm *kvm); |
383 | 387 | ||
384 | u64 __kvm_call_hyp(void *hypfn, ...); | 388 | u64 __kvm_call_hyp(void *hypfn, ...); |
385 | #define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) | 389 | |
390 | /* | ||
391 | * The couple of isb() below are there to guarantee the same behaviour | ||
392 | * on VHE as on !VHE, where the eret to EL1 acts as a context | ||
393 | * synchronization event. | ||
394 | */ | ||
395 | #define kvm_call_hyp(f, ...) \ | ||
396 | do { \ | ||
397 | if (has_vhe()) { \ | ||
398 | f(__VA_ARGS__); \ | ||
399 | isb(); \ | ||
400 | } else { \ | ||
401 | __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__); \ | ||
402 | } \ | ||
403 | } while(0) | ||
404 | |||
405 | #define kvm_call_hyp_ret(f, ...) \ | ||
406 | ({ \ | ||
407 | typeof(f(__VA_ARGS__)) ret; \ | ||
408 | \ | ||
409 | if (has_vhe()) { \ | ||
410 | ret = f(__VA_ARGS__); \ | ||
411 | isb(); \ | ||
412 | } else { \ | ||
413 | ret = __kvm_call_hyp(kvm_ksym_ref(f), \ | ||
414 | ##__VA_ARGS__); \ | ||
415 | } \ | ||
416 | \ | ||
417 | ret; \ | ||
418 | }) | ||
386 | 419 | ||
387 | void force_vm_exit(const cpumask_t *mask); | 420 | void force_vm_exit(const cpumask_t *mask); |
388 | void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); | 421 | void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); |
@@ -401,6 +434,13 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); | |||
401 | 434 | ||
402 | DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); | 435 | DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); |
403 | 436 | ||
437 | static inline void kvm_init_host_cpu_context(kvm_cpu_context_t *cpu_ctxt, | ||
438 | int cpu) | ||
439 | { | ||
440 | /* The host's MPIDR is immutable, so let's set it up at boot time */ | ||
441 | cpu_ctxt->sys_regs[MPIDR_EL1] = cpu_logical_map(cpu); | ||
442 | } | ||
443 | |||
404 | void __kvm_enable_ssbs(void); | 444 | void __kvm_enable_ssbs(void); |
405 | 445 | ||
406 | static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, | 446 | static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, |
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index a80a7ef57325..4da765f2cca5 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/compiler.h> | 21 | #include <linux/compiler.h> |
22 | #include <linux/kvm_host.h> | 22 | #include <linux/kvm_host.h> |
23 | #include <asm/alternative.h> | 23 | #include <asm/alternative.h> |
24 | #include <asm/kvm_mmu.h> | ||
24 | #include <asm/sysreg.h> | 25 | #include <asm/sysreg.h> |
25 | 26 | ||
26 | #define __hyp_text __section(.hyp.text) notrace | 27 | #define __hyp_text __section(.hyp.text) notrace |
@@ -163,7 +164,7 @@ void __noreturn __hyp_do_panic(unsigned long, ...); | |||
163 | static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm) | 164 | static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm) |
164 | { | 165 | { |
165 | write_sysreg(kvm->arch.vtcr, vtcr_el2); | 166 | write_sysreg(kvm->arch.vtcr, vtcr_el2); |
166 | write_sysreg(kvm->arch.vttbr, vttbr_el2); | 167 | write_sysreg(kvm_get_vttbr(kvm), vttbr_el2); |
167 | 168 | ||
168 | /* | 169 | /* |
169 | * ARM erratum 1165522 requires the actual execution of the above | 170 | * ARM erratum 1165522 requires the actual execution of the above |
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 8af4b1befa42..b0742a16c6c9 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h | |||
@@ -138,7 +138,8 @@ static inline unsigned long __kern_hyp_va(unsigned long v) | |||
138 | }) | 138 | }) |
139 | 139 | ||
140 | /* | 140 | /* |
141 | * We currently only support a 40bit IPA. | 141 | * We currently support using a VM-specified IPA size. For backward |
142 | * compatibility, the default IPA size is fixed to 40bits. | ||
142 | */ | 143 | */ |
143 | #define KVM_PHYS_SHIFT (40) | 144 | #define KVM_PHYS_SHIFT (40) |
144 | 145 | ||
@@ -591,9 +592,15 @@ static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm) | |||
591 | return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm)); | 592 | return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm)); |
592 | } | 593 | } |
593 | 594 | ||
594 | static inline bool kvm_cpu_has_cnp(void) | 595 | static __always_inline u64 kvm_get_vttbr(struct kvm *kvm) |
595 | { | 596 | { |
596 | return system_supports_cnp(); | 597 | struct kvm_vmid *vmid = &kvm->arch.vmid; |
598 | u64 vmid_field, baddr; | ||
599 | u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0; | ||
600 | |||
601 | baddr = kvm->arch.pgd_phys; | ||
602 | vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT; | ||
603 | return kvm_phys_to_vttbr(baddr) | vmid_field | cnp; | ||
597 | } | 604 | } |
598 | 605 | ||
599 | #endif /* __ASSEMBLY__ */ | 606 | #endif /* __ASSEMBLY__ */ |
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 72dc4c011014..5b267dec6194 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h | |||
@@ -361,6 +361,7 @@ | |||
361 | 361 | ||
362 | #define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) | 362 | #define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) |
363 | 363 | ||
364 | #define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) | ||
364 | #define SYS_CLIDR_EL1 sys_reg(3, 1, 0, 0, 1) | 365 | #define SYS_CLIDR_EL1 sys_reg(3, 1, 0, 0, 1) |
365 | #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) | 366 | #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) |
366 | 367 | ||
@@ -392,6 +393,10 @@ | |||
392 | #define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1) | 393 | #define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1) |
393 | #define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2) | 394 | #define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2) |
394 | 395 | ||
396 | #define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0) | ||
397 | #define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1) | ||
398 | #define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0) | ||
399 | |||
395 | #define __PMEV_op2(n) ((n) & 0x7) | 400 | #define __PMEV_op2(n) ((n) & 0x7) |
396 | #define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3)) | 401 | #define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3)) |
397 | #define SYS_PMEVCNTRn_EL0(n) sys_reg(3, 3, 14, __CNTR_CRm(n), __PMEV_op2(n)) | 402 | #define SYS_PMEVCNTRn_EL0(n) sys_reg(3, 3, 14, __CNTR_CRm(n), __PMEV_op2(n)) |
@@ -426,7 +431,7 @@ | |||
426 | #define SYS_ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1) | 431 | #define SYS_ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1) |
427 | #define SYS_ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2) | 432 | #define SYS_ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2) |
428 | #define SYS_ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3) | 433 | #define SYS_ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3) |
429 | #define SYS_ICH_ELSR_EL2 sys_reg(3, 4, 12, 11, 5) | 434 | #define SYS_ICH_ELRSR_EL2 sys_reg(3, 4, 12, 11, 5) |
430 | #define SYS_ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7) | 435 | #define SYS_ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7) |
431 | 436 | ||
432 | #define __SYS__LR0_EL2(x) sys_reg(3, 4, 12, 12, x) | 437 | #define __SYS__LR0_EL2(x) sys_reg(3, 4, 12, 12, x) |
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 0f2a135ba15b..690e033a91c0 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile | |||
@@ -3,9 +3,7 @@ | |||
3 | # Makefile for Kernel-based Virtual Machine module | 3 | # Makefile for Kernel-based Virtual Machine module |
4 | # | 4 | # |
5 | 5 | ||
6 | ccflags-y += -Iarch/arm64/kvm -Ivirt/kvm/arm/vgic | 6 | ccflags-y += -I $(srctree)/$(src) -I $(srctree)/virt/kvm/arm/vgic |
7 | CFLAGS_arm.o := -I. | ||
8 | CFLAGS_mmu.o := -I. | ||
9 | 7 | ||
10 | KVM=../../../virt/kvm | 8 | KVM=../../../virt/kvm |
11 | 9 | ||
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index f39801e4136c..fd917d6d12af 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c | |||
@@ -76,7 +76,7 @@ static void restore_guest_debug_regs(struct kvm_vcpu *vcpu) | |||
76 | 76 | ||
77 | void kvm_arm_init_debug(void) | 77 | void kvm_arm_init_debug(void) |
78 | { | 78 | { |
79 | __this_cpu_write(mdcr_el2, kvm_call_hyp(__kvm_get_mdcr_el2)); | 79 | __this_cpu_write(mdcr_el2, kvm_call_hyp_ret(__kvm_get_mdcr_el2)); |
80 | } | 80 | } |
81 | 81 | ||
82 | /** | 82 | /** |
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 952f6cb9cf72..2845aa680841 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S | |||
@@ -40,9 +40,6 @@ | |||
40 | * arch/arm64/kernel/hyp_stub.S. | 40 | * arch/arm64/kernel/hyp_stub.S. |
41 | */ | 41 | */ |
42 | ENTRY(__kvm_call_hyp) | 42 | ENTRY(__kvm_call_hyp) |
43 | alternative_if_not ARM64_HAS_VIRT_HOST_EXTN | ||
44 | hvc #0 | 43 | hvc #0 |
45 | ret | 44 | ret |
46 | alternative_else_nop_endif | ||
47 | b __vhe_hyp_call | ||
48 | ENDPROC(__kvm_call_hyp) | 45 | ENDPROC(__kvm_call_hyp) |
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 73c1b483ec39..2b1e686772bf 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S | |||
@@ -43,18 +43,6 @@ | |||
43 | ldr lr, [sp], #16 | 43 | ldr lr, [sp], #16 |
44 | .endm | 44 | .endm |
45 | 45 | ||
46 | ENTRY(__vhe_hyp_call) | ||
47 | do_el2_call | ||
48 | /* | ||
49 | * We used to rely on having an exception return to get | ||
50 | * an implicit isb. In the E2H case, we don't have it anymore. | ||
51 | * rather than changing all the leaf functions, just do it here | ||
52 | * before returning to the rest of the kernel. | ||
53 | */ | ||
54 | isb | ||
55 | ret | ||
56 | ENDPROC(__vhe_hyp_call) | ||
57 | |||
58 | el1_sync: // Guest trapped into EL2 | 46 | el1_sync: // Guest trapped into EL2 |
59 | 47 | ||
60 | mrs x0, esr_el2 | 48 | mrs x0, esr_el2 |
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index b426e2cf973c..c52a8451637c 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c | |||
@@ -53,7 +53,6 @@ static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt) | |||
53 | 53 | ||
54 | static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) | 54 | static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) |
55 | { | 55 | { |
56 | ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2); | ||
57 | ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); | 56 | ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); |
58 | ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr); | 57 | ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr); |
59 | ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); | 58 | ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); |
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c936aa40c3f4..539feecda5b8 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c | |||
@@ -982,6 +982,10 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | |||
982 | return true; | 982 | return true; |
983 | } | 983 | } |
984 | 984 | ||
985 | #define reg_to_encoding(x) \ | ||
986 | sys_reg((u32)(x)->Op0, (u32)(x)->Op1, \ | ||
987 | (u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2); | ||
988 | |||
985 | /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ | 989 | /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ |
986 | #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ | 990 | #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ |
987 | { SYS_DESC(SYS_DBGBVRn_EL1(n)), \ | 991 | { SYS_DESC(SYS_DBGBVRn_EL1(n)), \ |
@@ -1003,44 +1007,38 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | |||
1003 | { SYS_DESC(SYS_PMEVTYPERn_EL0(n)), \ | 1007 | { SYS_DESC(SYS_PMEVTYPERn_EL0(n)), \ |
1004 | access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), } | 1008 | access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), } |
1005 | 1009 | ||
1006 | static bool access_cntp_tval(struct kvm_vcpu *vcpu, | 1010 | static bool access_arch_timer(struct kvm_vcpu *vcpu, |
1007 | struct sys_reg_params *p, | 1011 | struct sys_reg_params *p, |
1008 | const struct sys_reg_desc *r) | 1012 | const struct sys_reg_desc *r) |
1009 | { | 1013 | { |
1010 | u64 now = kvm_phys_timer_read(); | 1014 | enum kvm_arch_timers tmr; |
1011 | u64 cval; | 1015 | enum kvm_arch_timer_regs treg; |
1016 | u64 reg = reg_to_encoding(r); | ||
1012 | 1017 | ||
1013 | if (p->is_write) { | 1018 | switch (reg) { |
1014 | kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, | 1019 | case SYS_CNTP_TVAL_EL0: |
1015 | p->regval + now); | 1020 | case SYS_AARCH32_CNTP_TVAL: |
1016 | } else { | 1021 | tmr = TIMER_PTIMER; |
1017 | cval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL); | 1022 | treg = TIMER_REG_TVAL; |
1018 | p->regval = cval - now; | 1023 | break; |
1024 | case SYS_CNTP_CTL_EL0: | ||
1025 | case SYS_AARCH32_CNTP_CTL: | ||
1026 | tmr = TIMER_PTIMER; | ||
1027 | treg = TIMER_REG_CTL; | ||
1028 | break; | ||
1029 | case SYS_CNTP_CVAL_EL0: | ||
1030 | case SYS_AARCH32_CNTP_CVAL: | ||
1031 | tmr = TIMER_PTIMER; | ||
1032 | treg = TIMER_REG_CVAL; | ||
1033 | break; | ||
1034 | default: | ||
1035 | BUG(); | ||
1019 | } | 1036 | } |
1020 | 1037 | ||
1021 | return true; | ||
1022 | } | ||
1023 | |||
1024 | static bool access_cntp_ctl(struct kvm_vcpu *vcpu, | ||
1025 | struct sys_reg_params *p, | ||
1026 | const struct sys_reg_desc *r) | ||
1027 | { | ||
1028 | if (p->is_write) | ||
1029 | kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CTL, p->regval); | ||
1030 | else | ||
1031 | p->regval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CTL); | ||
1032 | |||
1033 | return true; | ||
1034 | } | ||
1035 | |||
1036 | static bool access_cntp_cval(struct kvm_vcpu *vcpu, | ||
1037 | struct sys_reg_params *p, | ||
1038 | const struct sys_reg_desc *r) | ||
1039 | { | ||
1040 | if (p->is_write) | 1038 | if (p->is_write) |
1041 | kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, p->regval); | 1039 | kvm_arm_timer_write_sysreg(vcpu, tmr, treg, p->regval); |
1042 | else | 1040 | else |
1043 | p->regval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL); | 1041 | p->regval = kvm_arm_timer_read_sysreg(vcpu, tmr, treg); |
1044 | 1042 | ||
1045 | return true; | 1043 | return true; |
1046 | } | 1044 | } |
@@ -1160,6 +1158,64 @@ static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, | |||
1160 | return __set_id_reg(rd, uaddr, true); | 1158 | return __set_id_reg(rd, uaddr, true); |
1161 | } | 1159 | } |
1162 | 1160 | ||
1161 | static bool access_ctr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
1162 | const struct sys_reg_desc *r) | ||
1163 | { | ||
1164 | if (p->is_write) | ||
1165 | return write_to_read_only(vcpu, p, r); | ||
1166 | |||
1167 | p->regval = read_sanitised_ftr_reg(SYS_CTR_EL0); | ||
1168 | return true; | ||
1169 | } | ||
1170 | |||
1171 | static bool access_clidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
1172 | const struct sys_reg_desc *r) | ||
1173 | { | ||
1174 | if (p->is_write) | ||
1175 | return write_to_read_only(vcpu, p, r); | ||
1176 | |||
1177 | p->regval = read_sysreg(clidr_el1); | ||
1178 | return true; | ||
1179 | } | ||
1180 | |||
1181 | static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
1182 | const struct sys_reg_desc *r) | ||
1183 | { | ||
1184 | if (p->is_write) | ||
1185 | vcpu_write_sys_reg(vcpu, p->regval, r->reg); | ||
1186 | else | ||
1187 | p->regval = vcpu_read_sys_reg(vcpu, r->reg); | ||
1188 | return true; | ||
1189 | } | ||
1190 | |||
1191 | static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
1192 | const struct sys_reg_desc *r) | ||
1193 | { | ||
1194 | u32 csselr; | ||
1195 | |||
1196 | if (p->is_write) | ||
1197 | return write_to_read_only(vcpu, p, r); | ||
1198 | |||
1199 | csselr = vcpu_read_sys_reg(vcpu, CSSELR_EL1); | ||
1200 | p->regval = get_ccsidr(csselr); | ||
1201 | |||
1202 | /* | ||
1203 | * Guests should not be doing cache operations by set/way at all, and | ||
1204 | * for this reason, we trap them and attempt to infer the intent, so | ||
1205 | * that we can flush the entire guest's address space at the appropriate | ||
1206 | * time. | ||
1207 | * To prevent this trapping from causing performance problems, let's | ||
1208 | * expose the geometry of all data and unified caches (which are | ||
1209 | * guaranteed to be PIPT and thus non-aliasing) as 1 set and 1 way. | ||
1210 | * [If guests should attempt to infer aliasing properties from the | ||
1211 | * geometry (which is not permitted by the architecture), they would | ||
1212 | * only do so for virtually indexed caches.] | ||
1213 | */ | ||
1214 | if (!(csselr & 1)) // data or unified cache | ||
1215 | p->regval &= ~GENMASK(27, 3); | ||
1216 | return true; | ||
1217 | } | ||
1218 | |||
1163 | /* sys_reg_desc initialiser for known cpufeature ID registers */ | 1219 | /* sys_reg_desc initialiser for known cpufeature ID registers */ |
1164 | #define ID_SANITISED(name) { \ | 1220 | #define ID_SANITISED(name) { \ |
1165 | SYS_DESC(SYS_##name), \ | 1221 | SYS_DESC(SYS_##name), \ |
@@ -1377,7 +1433,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { | |||
1377 | 1433 | ||
1378 | { SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0}, | 1434 | { SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0}, |
1379 | 1435 | ||
1380 | { SYS_DESC(SYS_CSSELR_EL1), NULL, reset_unknown, CSSELR_EL1 }, | 1436 | { SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr }, |
1437 | { SYS_DESC(SYS_CLIDR_EL1), access_clidr }, | ||
1438 | { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, | ||
1439 | { SYS_DESC(SYS_CTR_EL0), access_ctr }, | ||
1381 | 1440 | ||
1382 | { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, }, | 1441 | { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, }, |
1383 | { SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, | 1442 | { SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, |
@@ -1400,9 +1459,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { | |||
1400 | { SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 }, | 1459 | { SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 }, |
1401 | { SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 }, | 1460 | { SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 }, |
1402 | 1461 | ||
1403 | { SYS_DESC(SYS_CNTP_TVAL_EL0), access_cntp_tval }, | 1462 | { SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer }, |
1404 | { SYS_DESC(SYS_CNTP_CTL_EL0), access_cntp_ctl }, | 1463 | { SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer }, |
1405 | { SYS_DESC(SYS_CNTP_CVAL_EL0), access_cntp_cval }, | 1464 | { SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer }, |
1406 | 1465 | ||
1407 | /* PMEVCNTRn_EL0 */ | 1466 | /* PMEVCNTRn_EL0 */ |
1408 | PMU_PMEVCNTR_EL0(0), | 1467 | PMU_PMEVCNTR_EL0(0), |
@@ -1476,7 +1535,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { | |||
1476 | 1535 | ||
1477 | { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 }, | 1536 | { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 }, |
1478 | { SYS_DESC(SYS_IFSR32_EL2), NULL, reset_unknown, IFSR32_EL2 }, | 1537 | { SYS_DESC(SYS_IFSR32_EL2), NULL, reset_unknown, IFSR32_EL2 }, |
1479 | { SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x70 }, | 1538 | { SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x700 }, |
1480 | }; | 1539 | }; |
1481 | 1540 | ||
1482 | static bool trap_dbgidr(struct kvm_vcpu *vcpu, | 1541 | static bool trap_dbgidr(struct kvm_vcpu *vcpu, |
@@ -1677,6 +1736,7 @@ static const struct sys_reg_desc cp14_64_regs[] = { | |||
1677 | * register). | 1736 | * register). |
1678 | */ | 1737 | */ |
1679 | static const struct sys_reg_desc cp15_regs[] = { | 1738 | static const struct sys_reg_desc cp15_regs[] = { |
1739 | { Op1( 0), CRn( 0), CRm( 0), Op2( 1), access_ctr }, | ||
1680 | { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR }, | 1740 | { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR }, |
1681 | { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, | 1741 | { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, |
1682 | { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, | 1742 | { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, |
@@ -1723,10 +1783,9 @@ static const struct sys_reg_desc cp15_regs[] = { | |||
1723 | 1783 | ||
1724 | { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, | 1784 | { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, |
1725 | 1785 | ||
1726 | /* CNTP_TVAL */ | 1786 | /* Arch Tmers */ |
1727 | { Op1( 0), CRn(14), CRm( 2), Op2( 0), access_cntp_tval }, | 1787 | { SYS_DESC(SYS_AARCH32_CNTP_TVAL), access_arch_timer }, |
1728 | /* CNTP_CTL */ | 1788 | { SYS_DESC(SYS_AARCH32_CNTP_CTL), access_arch_timer }, |
1729 | { Op1( 0), CRn(14), CRm( 2), Op2( 1), access_cntp_ctl }, | ||
1730 | 1789 | ||
1731 | /* PMEVCNTRn */ | 1790 | /* PMEVCNTRn */ |
1732 | PMU_PMEVCNTR(0), | 1791 | PMU_PMEVCNTR(0), |
@@ -1794,6 +1853,10 @@ static const struct sys_reg_desc cp15_regs[] = { | |||
1794 | PMU_PMEVTYPER(30), | 1853 | PMU_PMEVTYPER(30), |
1795 | /* PMCCFILTR */ | 1854 | /* PMCCFILTR */ |
1796 | { Op1(0), CRn(14), CRm(15), Op2(7), access_pmu_evtyper }, | 1855 | { Op1(0), CRn(14), CRm(15), Op2(7), access_pmu_evtyper }, |
1856 | |||
1857 | { Op1(1), CRn( 0), CRm( 0), Op2(0), access_ccsidr }, | ||
1858 | { Op1(1), CRn( 0), CRm( 0), Op2(1), access_clidr }, | ||
1859 | { Op1(2), CRn( 0), CRm( 0), Op2(0), access_csselr, NULL, c0_CSSELR }, | ||
1797 | }; | 1860 | }; |
1798 | 1861 | ||
1799 | static const struct sys_reg_desc cp15_64_regs[] = { | 1862 | static const struct sys_reg_desc cp15_64_regs[] = { |
@@ -1803,7 +1866,7 @@ static const struct sys_reg_desc cp15_64_regs[] = { | |||
1803 | { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, | 1866 | { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, |
1804 | { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */ | 1867 | { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */ |
1805 | { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */ | 1868 | { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */ |
1806 | { Op1( 2), CRn( 0), CRm(14), Op2( 0), access_cntp_cval }, | 1869 | { SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer }, |
1807 | }; | 1870 | }; |
1808 | 1871 | ||
1809 | /* Target specific emulation tables */ | 1872 | /* Target specific emulation tables */ |
@@ -1832,30 +1895,19 @@ static const struct sys_reg_desc *get_target_table(unsigned target, | |||
1832 | } | 1895 | } |
1833 | } | 1896 | } |
1834 | 1897 | ||
1835 | #define reg_to_match_value(x) \ | ||
1836 | ({ \ | ||
1837 | unsigned long val; \ | ||
1838 | val = (x)->Op0 << 14; \ | ||
1839 | val |= (x)->Op1 << 11; \ | ||
1840 | val |= (x)->CRn << 7; \ | ||
1841 | val |= (x)->CRm << 3; \ | ||
1842 | val |= (x)->Op2; \ | ||
1843 | val; \ | ||
1844 | }) | ||
1845 | |||
1846 | static int match_sys_reg(const void *key, const void *elt) | 1898 | static int match_sys_reg(const void *key, const void *elt) |
1847 | { | 1899 | { |
1848 | const unsigned long pval = (unsigned long)key; | 1900 | const unsigned long pval = (unsigned long)key; |
1849 | const struct sys_reg_desc *r = elt; | 1901 | const struct sys_reg_desc *r = elt; |
1850 | 1902 | ||
1851 | return pval - reg_to_match_value(r); | 1903 | return pval - reg_to_encoding(r); |
1852 | } | 1904 | } |
1853 | 1905 | ||
1854 | static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params, | 1906 | static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params, |
1855 | const struct sys_reg_desc table[], | 1907 | const struct sys_reg_desc table[], |
1856 | unsigned int num) | 1908 | unsigned int num) |
1857 | { | 1909 | { |
1858 | unsigned long pval = reg_to_match_value(params); | 1910 | unsigned long pval = reg_to_encoding(params); |
1859 | 1911 | ||
1860 | return bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg); | 1912 | return bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg); |
1861 | } | 1913 | } |
@@ -2218,11 +2270,15 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, | |||
2218 | } | 2270 | } |
2219 | 2271 | ||
2220 | FUNCTION_INVARIANT(midr_el1) | 2272 | FUNCTION_INVARIANT(midr_el1) |
2221 | FUNCTION_INVARIANT(ctr_el0) | ||
2222 | FUNCTION_INVARIANT(revidr_el1) | 2273 | FUNCTION_INVARIANT(revidr_el1) |
2223 | FUNCTION_INVARIANT(clidr_el1) | 2274 | FUNCTION_INVARIANT(clidr_el1) |
2224 | FUNCTION_INVARIANT(aidr_el1) | 2275 | FUNCTION_INVARIANT(aidr_el1) |
2225 | 2276 | ||
2277 | static void get_ctr_el0(struct kvm_vcpu *v, const struct sys_reg_desc *r) | ||
2278 | { | ||
2279 | ((struct sys_reg_desc *)r)->val = read_sanitised_ftr_reg(SYS_CTR_EL0); | ||
2280 | } | ||
2281 | |||
2226 | /* ->val is filled in by kvm_sys_reg_table_init() */ | 2282 | /* ->val is filled in by kvm_sys_reg_table_init() */ |
2227 | static struct sys_reg_desc invariant_sys_regs[] = { | 2283 | static struct sys_reg_desc invariant_sys_regs[] = { |
2228 | { SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 }, | 2284 | { SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 }, |
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index d2abd98471e8..41204a49cf95 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h | |||
@@ -1134,7 +1134,7 @@ static inline void kvm_arch_hardware_unsetup(void) {} | |||
1134 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} | 1134 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} |
1135 | static inline void kvm_arch_free_memslot(struct kvm *kvm, | 1135 | static inline void kvm_arch_free_memslot(struct kvm *kvm, |
1136 | struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} | 1136 | struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} |
1137 | static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} | 1137 | static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} |
1138 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} | 1138 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} |
1139 | static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} | 1139 | static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} |
1140 | static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} | 1140 | static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 0f98f00da2ea..e6b5bb012ccb 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -99,6 +99,8 @@ struct kvm_nested_guest; | |||
99 | 99 | ||
100 | struct kvm_vm_stat { | 100 | struct kvm_vm_stat { |
101 | ulong remote_tlb_flush; | 101 | ulong remote_tlb_flush; |
102 | ulong num_2M_pages; | ||
103 | ulong num_1G_pages; | ||
102 | }; | 104 | }; |
103 | 105 | ||
104 | struct kvm_vcpu_stat { | 106 | struct kvm_vcpu_stat { |
@@ -377,6 +379,7 @@ struct kvmppc_mmu { | |||
377 | void (*slbmte)(struct kvm_vcpu *vcpu, u64 rb, u64 rs); | 379 | void (*slbmte)(struct kvm_vcpu *vcpu, u64 rb, u64 rs); |
378 | u64 (*slbmfee)(struct kvm_vcpu *vcpu, u64 slb_nr); | 380 | u64 (*slbmfee)(struct kvm_vcpu *vcpu, u64 slb_nr); |
379 | u64 (*slbmfev)(struct kvm_vcpu *vcpu, u64 slb_nr); | 381 | u64 (*slbmfev)(struct kvm_vcpu *vcpu, u64 slb_nr); |
382 | int (*slbfee)(struct kvm_vcpu *vcpu, gva_t eaddr, ulong *ret_slb); | ||
380 | void (*slbie)(struct kvm_vcpu *vcpu, u64 slb_nr); | 383 | void (*slbie)(struct kvm_vcpu *vcpu, u64 slb_nr); |
381 | void (*slbia)(struct kvm_vcpu *vcpu); | 384 | void (*slbia)(struct kvm_vcpu *vcpu); |
382 | /* book3s */ | 385 | /* book3s */ |
@@ -837,7 +840,7 @@ struct kvm_vcpu_arch { | |||
837 | static inline void kvm_arch_hardware_disable(void) {} | 840 | static inline void kvm_arch_hardware_disable(void) {} |
838 | static inline void kvm_arch_hardware_unsetup(void) {} | 841 | static inline void kvm_arch_hardware_unsetup(void) {} |
839 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} | 842 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} |
840 | static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} | 843 | static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} |
841 | static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} | 844 | static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} |
842 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} | 845 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} |
843 | static inline void kvm_arch_exit(void) {} | 846 | static inline void kvm_arch_exit(void) {} |
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index a6c8548ed9fa..ac22b28ae78d 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
@@ -36,6 +36,8 @@ | |||
36 | #endif | 36 | #endif |
37 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER | 37 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER |
38 | #include <asm/paca.h> | 38 | #include <asm/paca.h> |
39 | #include <asm/xive.h> | ||
40 | #include <asm/cpu_has_feature.h> | ||
39 | #endif | 41 | #endif |
40 | 42 | ||
41 | /* | 43 | /* |
@@ -617,6 +619,18 @@ static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 ir | |||
617 | static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { } | 619 | static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { } |
618 | #endif /* CONFIG_KVM_XIVE */ | 620 | #endif /* CONFIG_KVM_XIVE */ |
619 | 621 | ||
622 | #if defined(CONFIG_PPC_POWERNV) && defined(CONFIG_KVM_BOOK3S_64_HANDLER) | ||
623 | static inline bool xics_on_xive(void) | ||
624 | { | ||
625 | return xive_enabled() && cpu_has_feature(CPU_FTR_HVMODE); | ||
626 | } | ||
627 | #else | ||
628 | static inline bool xics_on_xive(void) | ||
629 | { | ||
630 | return false; | ||
631 | } | ||
632 | #endif | ||
633 | |||
620 | /* | 634 | /* |
621 | * Prototypes for functions called only from assembler code. | 635 | * Prototypes for functions called only from assembler code. |
622 | * Having prototypes reduces sparse errors. | 636 | * Having prototypes reduces sparse errors. |
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 8c876c166ef2..26ca425f4c2c 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h | |||
@@ -463,10 +463,12 @@ struct kvm_ppc_cpu_char { | |||
463 | #define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED (1ULL << 58) | 463 | #define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED (1ULL << 58) |
464 | #define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF (1ULL << 57) | 464 | #define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF (1ULL << 57) |
465 | #define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS (1ULL << 56) | 465 | #define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS (1ULL << 56) |
466 | #define KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST (1ull << 54) | ||
466 | 467 | ||
467 | #define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY (1ULL << 63) | 468 | #define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY (1ULL << 63) |
468 | #define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR (1ULL << 62) | 469 | #define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR (1ULL << 62) |
469 | #define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ULL << 61) | 470 | #define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ULL << 61) |
471 | #define KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE (1ull << 58) | ||
470 | 472 | ||
471 | /* Per-vcpu XICS interrupt controller state */ | 473 | /* Per-vcpu XICS interrupt controller state */ |
472 | #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) | 474 | #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) |
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 9a7dadbe1f17..10c5579d20ce 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include "book3s.h" | 39 | #include "book3s.h" |
40 | #include "trace.h" | 40 | #include "trace.h" |
41 | 41 | ||
42 | #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM | ||
42 | #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU | 43 | #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU |
43 | 44 | ||
44 | /* #define EXIT_DEBUG */ | 45 | /* #define EXIT_DEBUG */ |
@@ -71,6 +72,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
71 | { "pthru_all", VCPU_STAT(pthru_all) }, | 72 | { "pthru_all", VCPU_STAT(pthru_all) }, |
72 | { "pthru_host", VCPU_STAT(pthru_host) }, | 73 | { "pthru_host", VCPU_STAT(pthru_host) }, |
73 | { "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) }, | 74 | { "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) }, |
75 | { "largepages_2M", VM_STAT(num_2M_pages) }, | ||
76 | { "largepages_1G", VM_STAT(num_1G_pages) }, | ||
74 | { NULL } | 77 | { NULL } |
75 | }; | 78 | }; |
76 | 79 | ||
@@ -642,7 +645,7 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, | |||
642 | r = -ENXIO; | 645 | r = -ENXIO; |
643 | break; | 646 | break; |
644 | } | 647 | } |
645 | if (xive_enabled()) | 648 | if (xics_on_xive()) |
646 | *val = get_reg_val(id, kvmppc_xive_get_icp(vcpu)); | 649 | *val = get_reg_val(id, kvmppc_xive_get_icp(vcpu)); |
647 | else | 650 | else |
648 | *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu)); | 651 | *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu)); |
@@ -715,7 +718,7 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, | |||
715 | r = -ENXIO; | 718 | r = -ENXIO; |
716 | break; | 719 | break; |
717 | } | 720 | } |
718 | if (xive_enabled()) | 721 | if (xics_on_xive()) |
719 | r = kvmppc_xive_set_icp(vcpu, set_reg_val(id, *val)); | 722 | r = kvmppc_xive_set_icp(vcpu, set_reg_val(id, *val)); |
720 | else | 723 | else |
721 | r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val)); | 724 | r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val)); |
@@ -991,7 +994,7 @@ int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hcall) | |||
991 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, | 994 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, |
992 | bool line_status) | 995 | bool line_status) |
993 | { | 996 | { |
994 | if (xive_enabled()) | 997 | if (xics_on_xive()) |
995 | return kvmppc_xive_set_irq(kvm, irq_source_id, irq, level, | 998 | return kvmppc_xive_set_irq(kvm, irq_source_id, irq, level, |
996 | line_status); | 999 | line_status); |
997 | else | 1000 | else |
@@ -1044,7 +1047,7 @@ static int kvmppc_book3s_init(void) | |||
1044 | 1047 | ||
1045 | #ifdef CONFIG_KVM_XICS | 1048 | #ifdef CONFIG_KVM_XICS |
1046 | #ifdef CONFIG_KVM_XIVE | 1049 | #ifdef CONFIG_KVM_XIVE |
1047 | if (xive_enabled()) { | 1050 | if (xics_on_xive()) { |
1048 | kvmppc_xive_init_module(); | 1051 | kvmppc_xive_init_module(); |
1049 | kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS); | 1052 | kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS); |
1050 | } else | 1053 | } else |
@@ -1057,7 +1060,7 @@ static int kvmppc_book3s_init(void) | |||
1057 | static void kvmppc_book3s_exit(void) | 1060 | static void kvmppc_book3s_exit(void) |
1058 | { | 1061 | { |
1059 | #ifdef CONFIG_KVM_XICS | 1062 | #ifdef CONFIG_KVM_XICS |
1060 | if (xive_enabled()) | 1063 | if (xics_on_xive()) |
1061 | kvmppc_xive_exit_module(); | 1064 | kvmppc_xive_exit_module(); |
1062 | #endif | 1065 | #endif |
1063 | #ifdef CONFIG_KVM_BOOK3S_32_HANDLER | 1066 | #ifdef CONFIG_KVM_BOOK3S_32_HANDLER |
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 612169988a3d..6f789f674048 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c | |||
@@ -425,6 +425,7 @@ void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu) | |||
425 | mmu->slbmte = NULL; | 425 | mmu->slbmte = NULL; |
426 | mmu->slbmfee = NULL; | 426 | mmu->slbmfee = NULL; |
427 | mmu->slbmfev = NULL; | 427 | mmu->slbmfev = NULL; |
428 | mmu->slbfee = NULL; | ||
428 | mmu->slbie = NULL; | 429 | mmu->slbie = NULL; |
429 | mmu->slbia = NULL; | 430 | mmu->slbia = NULL; |
430 | } | 431 | } |
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index c92dd25bed23..d4b967f0e8d4 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c | |||
@@ -435,6 +435,19 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb) | |||
435 | kvmppc_mmu_map_segment(vcpu, esid << SID_SHIFT); | 435 | kvmppc_mmu_map_segment(vcpu, esid << SID_SHIFT); |
436 | } | 436 | } |
437 | 437 | ||
438 | static int kvmppc_mmu_book3s_64_slbfee(struct kvm_vcpu *vcpu, gva_t eaddr, | ||
439 | ulong *ret_slb) | ||
440 | { | ||
441 | struct kvmppc_slb *slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu, eaddr); | ||
442 | |||
443 | if (slbe) { | ||
444 | *ret_slb = slbe->origv; | ||
445 | return 0; | ||
446 | } | ||
447 | *ret_slb = 0; | ||
448 | return -ENOENT; | ||
449 | } | ||
450 | |||
438 | static u64 kvmppc_mmu_book3s_64_slbmfee(struct kvm_vcpu *vcpu, u64 slb_nr) | 451 | static u64 kvmppc_mmu_book3s_64_slbmfee(struct kvm_vcpu *vcpu, u64 slb_nr) |
439 | { | 452 | { |
440 | struct kvmppc_slb *slbe; | 453 | struct kvmppc_slb *slbe; |
@@ -670,6 +683,7 @@ void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu) | |||
670 | mmu->slbmte = kvmppc_mmu_book3s_64_slbmte; | 683 | mmu->slbmte = kvmppc_mmu_book3s_64_slbmte; |
671 | mmu->slbmfee = kvmppc_mmu_book3s_64_slbmfee; | 684 | mmu->slbmfee = kvmppc_mmu_book3s_64_slbmfee; |
672 | mmu->slbmfev = kvmppc_mmu_book3s_64_slbmfev; | 685 | mmu->slbmfev = kvmppc_mmu_book3s_64_slbmfev; |
686 | mmu->slbfee = kvmppc_mmu_book3s_64_slbfee; | ||
673 | mmu->slbie = kvmppc_mmu_book3s_64_slbie; | 687 | mmu->slbie = kvmppc_mmu_book3s_64_slbie; |
674 | mmu->slbia = kvmppc_mmu_book3s_64_slbia; | 688 | mmu->slbia = kvmppc_mmu_book3s_64_slbia; |
675 | mmu->xlate = kvmppc_mmu_book3s_64_xlate; | 689 | mmu->xlate = kvmppc_mmu_book3s_64_xlate; |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index bd2dcfbf00cd..be7bc070eae5 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
@@ -442,6 +442,24 @@ int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
442 | u32 last_inst; | 442 | u32 last_inst; |
443 | 443 | ||
444 | /* | 444 | /* |
445 | * Fast path - check if the guest physical address corresponds to a | ||
446 | * device on the FAST_MMIO_BUS, if so we can avoid loading the | ||
447 | * instruction all together, then we can just handle it and return. | ||
448 | */ | ||
449 | if (is_store) { | ||
450 | int idx, ret; | ||
451 | |||
452 | idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
453 | ret = kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, (gpa_t) gpa, 0, | ||
454 | NULL); | ||
455 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | ||
456 | if (!ret) { | ||
457 | kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); | ||
458 | return RESUME_GUEST; | ||
459 | } | ||
460 | } | ||
461 | |||
462 | /* | ||
445 | * If we fail, we just return to the guest and try executing it again. | 463 | * If we fail, we just return to the guest and try executing it again. |
446 | */ | 464 | */ |
447 | if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) != | 465 | if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) != |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 1b821c6efdef..f55ef071883f 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c | |||
@@ -403,8 +403,13 @@ void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa, | |||
403 | if (!memslot) | 403 | if (!memslot) |
404 | return; | 404 | return; |
405 | } | 405 | } |
406 | if (shift) | 406 | if (shift) { /* 1GB or 2MB page */ |
407 | page_size = 1ul << shift; | 407 | page_size = 1ul << shift; |
408 | if (shift == PMD_SHIFT) | ||
409 | kvm->stat.num_2M_pages--; | ||
410 | else if (shift == PUD_SHIFT) | ||
411 | kvm->stat.num_1G_pages--; | ||
412 | } | ||
408 | 413 | ||
409 | gpa &= ~(page_size - 1); | 414 | gpa &= ~(page_size - 1); |
410 | hpa = old & PTE_RPN_MASK; | 415 | hpa = old & PTE_RPN_MASK; |
@@ -878,6 +883,14 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, | |||
878 | put_page(page); | 883 | put_page(page); |
879 | } | 884 | } |
880 | 885 | ||
886 | /* Increment number of large pages if we (successfully) inserted one */ | ||
887 | if (!ret) { | ||
888 | if (level == 1) | ||
889 | kvm->stat.num_2M_pages++; | ||
890 | else if (level == 2) | ||
891 | kvm->stat.num_1G_pages++; | ||
892 | } | ||
893 | |||
881 | return ret; | 894 | return ret; |
882 | } | 895 | } |
883 | 896 | ||
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 532ab79734c7..f02b04973710 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c | |||
@@ -133,7 +133,6 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, | |||
133 | continue; | 133 | continue; |
134 | 134 | ||
135 | kref_put(&stit->kref, kvm_spapr_tce_liobn_put); | 135 | kref_put(&stit->kref, kvm_spapr_tce_liobn_put); |
136 | return; | ||
137 | } | 136 | } |
138 | } | 137 | } |
139 | } | 138 | } |
@@ -338,14 +337,15 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | |||
338 | } | 337 | } |
339 | } | 338 | } |
340 | 339 | ||
340 | kvm_get_kvm(kvm); | ||
341 | if (!ret) | 341 | if (!ret) |
342 | ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, | 342 | ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, |
343 | stt, O_RDWR | O_CLOEXEC); | 343 | stt, O_RDWR | O_CLOEXEC); |
344 | 344 | ||
345 | if (ret >= 0) { | 345 | if (ret >= 0) |
346 | list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); | 346 | list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); |
347 | kvm_get_kvm(kvm); | 347 | else |
348 | } | 348 | kvm_put_kvm(kvm); |
349 | 349 | ||
350 | mutex_unlock(&kvm->lock); | 350 | mutex_unlock(&kvm->lock); |
351 | 351 | ||
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 8c7e933e942e..6ef7c5f00a49 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #define OP_31_XOP_SLBMFEV 851 | 47 | #define OP_31_XOP_SLBMFEV 851 |
48 | #define OP_31_XOP_EIOIO 854 | 48 | #define OP_31_XOP_EIOIO 854 |
49 | #define OP_31_XOP_SLBMFEE 915 | 49 | #define OP_31_XOP_SLBMFEE 915 |
50 | #define OP_31_XOP_SLBFEE 979 | ||
50 | 51 | ||
51 | #define OP_31_XOP_TBEGIN 654 | 52 | #define OP_31_XOP_TBEGIN 654 |
52 | #define OP_31_XOP_TABORT 910 | 53 | #define OP_31_XOP_TABORT 910 |
@@ -416,6 +417,23 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
416 | 417 | ||
417 | vcpu->arch.mmu.slbia(vcpu); | 418 | vcpu->arch.mmu.slbia(vcpu); |
418 | break; | 419 | break; |
420 | case OP_31_XOP_SLBFEE: | ||
421 | if (!(inst & 1) || !vcpu->arch.mmu.slbfee) { | ||
422 | return EMULATE_FAIL; | ||
423 | } else { | ||
424 | ulong b, t; | ||
425 | ulong cr = kvmppc_get_cr(vcpu) & ~CR0_MASK; | ||
426 | |||
427 | b = kvmppc_get_gpr(vcpu, rb); | ||
428 | if (!vcpu->arch.mmu.slbfee(vcpu, b, &t)) | ||
429 | cr |= 2 << CR0_SHIFT; | ||
430 | kvmppc_set_gpr(vcpu, rt, t); | ||
431 | /* copy XER[SO] bit to CR0[SO] */ | ||
432 | cr |= (vcpu->arch.regs.xer & 0x80000000) >> | ||
433 | (31 - CR0_SHIFT); | ||
434 | kvmppc_set_cr(vcpu, cr); | ||
435 | } | ||
436 | break; | ||
419 | case OP_31_XOP_SLBMFEE: | 437 | case OP_31_XOP_SLBMFEE: |
420 | if (!vcpu->arch.mmu.slbmfee) { | 438 | if (!vcpu->arch.mmu.slbmfee) { |
421 | emulated = EMULATE_FAIL; | 439 | emulated = EMULATE_FAIL; |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index a3d5318f5d1e..06964350b97a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
@@ -922,7 +922,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | |||
922 | case H_IPOLL: | 922 | case H_IPOLL: |
923 | case H_XIRR_X: | 923 | case H_XIRR_X: |
924 | if (kvmppc_xics_enabled(vcpu)) { | 924 | if (kvmppc_xics_enabled(vcpu)) { |
925 | if (xive_enabled()) { | 925 | if (xics_on_xive()) { |
926 | ret = H_NOT_AVAILABLE; | 926 | ret = H_NOT_AVAILABLE; |
927 | return RESUME_GUEST; | 927 | return RESUME_GUEST; |
928 | } | 928 | } |
@@ -937,6 +937,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | |||
937 | ret = kvmppc_h_set_xdabr(vcpu, kvmppc_get_gpr(vcpu, 4), | 937 | ret = kvmppc_h_set_xdabr(vcpu, kvmppc_get_gpr(vcpu, 4), |
938 | kvmppc_get_gpr(vcpu, 5)); | 938 | kvmppc_get_gpr(vcpu, 5)); |
939 | break; | 939 | break; |
940 | #ifdef CONFIG_SPAPR_TCE_IOMMU | ||
940 | case H_GET_TCE: | 941 | case H_GET_TCE: |
941 | ret = kvmppc_h_get_tce(vcpu, kvmppc_get_gpr(vcpu, 4), | 942 | ret = kvmppc_h_get_tce(vcpu, kvmppc_get_gpr(vcpu, 4), |
942 | kvmppc_get_gpr(vcpu, 5)); | 943 | kvmppc_get_gpr(vcpu, 5)); |
@@ -966,6 +967,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | |||
966 | if (ret == H_TOO_HARD) | 967 | if (ret == H_TOO_HARD) |
967 | return RESUME_HOST; | 968 | return RESUME_HOST; |
968 | break; | 969 | break; |
970 | #endif | ||
969 | case H_RANDOM: | 971 | case H_RANDOM: |
970 | if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4])) | 972 | if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4])) |
971 | ret = H_HARDWARE; | 973 | ret = H_HARDWARE; |
@@ -1445,7 +1447,7 @@ static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
1445 | case BOOK3S_INTERRUPT_HV_RM_HARD: | 1447 | case BOOK3S_INTERRUPT_HV_RM_HARD: |
1446 | vcpu->arch.trap = 0; | 1448 | vcpu->arch.trap = 0; |
1447 | r = RESUME_GUEST; | 1449 | r = RESUME_GUEST; |
1448 | if (!xive_enabled()) | 1450 | if (!xics_on_xive()) |
1449 | kvmppc_xics_rm_complete(vcpu, 0); | 1451 | kvmppc_xics_rm_complete(vcpu, 0); |
1450 | break; | 1452 | break; |
1451 | default: | 1453 | default: |
@@ -3648,11 +3650,12 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc, | |||
3648 | 3650 | ||
3649 | static void grow_halt_poll_ns(struct kvmppc_vcore *vc) | 3651 | static void grow_halt_poll_ns(struct kvmppc_vcore *vc) |
3650 | { | 3652 | { |
3651 | /* 10us base */ | 3653 | if (!halt_poll_ns_grow) |
3652 | if (vc->halt_poll_ns == 0 && halt_poll_ns_grow) | 3654 | return; |
3653 | vc->halt_poll_ns = 10000; | 3655 | |
3654 | else | 3656 | vc->halt_poll_ns *= halt_poll_ns_grow; |
3655 | vc->halt_poll_ns *= halt_poll_ns_grow; | 3657 | if (vc->halt_poll_ns < halt_poll_ns_grow_start) |
3658 | vc->halt_poll_ns = halt_poll_ns_grow_start; | ||
3656 | } | 3659 | } |
3657 | 3660 | ||
3658 | static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) | 3661 | static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) |
@@ -3666,7 +3669,7 @@ static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) | |||
3666 | #ifdef CONFIG_KVM_XICS | 3669 | #ifdef CONFIG_KVM_XICS |
3667 | static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu) | 3670 | static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu) |
3668 | { | 3671 | { |
3669 | if (!xive_enabled()) | 3672 | if (!xics_on_xive()) |
3670 | return false; | 3673 | return false; |
3671 | return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr < | 3674 | return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr < |
3672 | vcpu->arch.xive_saved_state.cppr; | 3675 | vcpu->arch.xive_saved_state.cppr; |
@@ -4226,7 +4229,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
4226 | vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); | 4229 | vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); |
4227 | srcu_read_unlock(&kvm->srcu, srcu_idx); | 4230 | srcu_read_unlock(&kvm->srcu, srcu_idx); |
4228 | } else if (r == RESUME_PASSTHROUGH) { | 4231 | } else if (r == RESUME_PASSTHROUGH) { |
4229 | if (WARN_ON(xive_enabled())) | 4232 | if (WARN_ON(xics_on_xive())) |
4230 | r = H_SUCCESS; | 4233 | r = H_SUCCESS; |
4231 | else | 4234 | else |
4232 | r = kvmppc_xics_rm_complete(vcpu, 0); | 4235 | r = kvmppc_xics_rm_complete(vcpu, 0); |
@@ -4750,7 +4753,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) | |||
4750 | * If xive is enabled, we route 0x500 interrupts directly | 4753 | * If xive is enabled, we route 0x500 interrupts directly |
4751 | * to the guest. | 4754 | * to the guest. |
4752 | */ | 4755 | */ |
4753 | if (xive_enabled()) | 4756 | if (xics_on_xive()) |
4754 | lpcr |= LPCR_LPES; | 4757 | lpcr |= LPCR_LPES; |
4755 | } | 4758 | } |
4756 | 4759 | ||
@@ -4986,7 +4989,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) | |||
4986 | if (i == pimap->n_mapped) | 4989 | if (i == pimap->n_mapped) |
4987 | pimap->n_mapped++; | 4990 | pimap->n_mapped++; |
4988 | 4991 | ||
4989 | if (xive_enabled()) | 4992 | if (xics_on_xive()) |
4990 | rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc); | 4993 | rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc); |
4991 | else | 4994 | else |
4992 | kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq); | 4995 | kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq); |
@@ -5027,7 +5030,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) | |||
5027 | return -ENODEV; | 5030 | return -ENODEV; |
5028 | } | 5031 | } |
5029 | 5032 | ||
5030 | if (xive_enabled()) | 5033 | if (xics_on_xive()) |
5031 | rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc); | 5034 | rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc); |
5032 | else | 5035 | else |
5033 | kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq); | 5036 | kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq); |
@@ -5359,13 +5362,11 @@ static int kvm_init_subcore_bitmap(void) | |||
5359 | continue; | 5362 | continue; |
5360 | 5363 | ||
5361 | sibling_subcore_state = | 5364 | sibling_subcore_state = |
5362 | kmalloc_node(sizeof(struct sibling_subcore_state), | 5365 | kzalloc_node(sizeof(struct sibling_subcore_state), |
5363 | GFP_KERNEL, node); | 5366 | GFP_KERNEL, node); |
5364 | if (!sibling_subcore_state) | 5367 | if (!sibling_subcore_state) |
5365 | return -ENOMEM; | 5368 | return -ENOMEM; |
5366 | 5369 | ||
5367 | memset(sibling_subcore_state, 0, | ||
5368 | sizeof(struct sibling_subcore_state)); | ||
5369 | 5370 | ||
5370 | for (j = 0; j < threads_per_core; j++) { | 5371 | for (j = 0; j < threads_per_core; j++) { |
5371 | int cpu = first_cpu + j; | 5372 | int cpu = first_cpu + j; |
@@ -5406,7 +5407,7 @@ static int kvmppc_book3s_init_hv(void) | |||
5406 | * indirectly, via OPAL. | 5407 | * indirectly, via OPAL. |
5407 | */ | 5408 | */ |
5408 | #ifdef CONFIG_SMP | 5409 | #ifdef CONFIG_SMP |
5409 | if (!xive_enabled() && !kvmhv_on_pseries() && | 5410 | if (!xics_on_xive() && !kvmhv_on_pseries() && |
5410 | !local_paca->kvm_hstate.xics_phys) { | 5411 | !local_paca->kvm_hstate.xics_phys) { |
5411 | struct device_node *np; | 5412 | struct device_node *np; |
5412 | 5413 | ||
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index a71e2fc00a4e..b0cf22477e87 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c | |||
@@ -257,7 +257,7 @@ void kvmhv_rm_send_ipi(int cpu) | |||
257 | } | 257 | } |
258 | 258 | ||
259 | /* We should never reach this */ | 259 | /* We should never reach this */ |
260 | if (WARN_ON_ONCE(xive_enabled())) | 260 | if (WARN_ON_ONCE(xics_on_xive())) |
261 | return; | 261 | return; |
262 | 262 | ||
263 | /* Else poke the target with an IPI */ | 263 | /* Else poke the target with an IPI */ |
@@ -577,7 +577,7 @@ unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu) | |||
577 | { | 577 | { |
578 | if (!kvmppc_xics_enabled(vcpu)) | 578 | if (!kvmppc_xics_enabled(vcpu)) |
579 | return H_TOO_HARD; | 579 | return H_TOO_HARD; |
580 | if (xive_enabled()) { | 580 | if (xics_on_xive()) { |
581 | if (is_rm()) | 581 | if (is_rm()) |
582 | return xive_rm_h_xirr(vcpu); | 582 | return xive_rm_h_xirr(vcpu); |
583 | if (unlikely(!__xive_vm_h_xirr)) | 583 | if (unlikely(!__xive_vm_h_xirr)) |
@@ -592,7 +592,7 @@ unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu) | |||
592 | if (!kvmppc_xics_enabled(vcpu)) | 592 | if (!kvmppc_xics_enabled(vcpu)) |
593 | return H_TOO_HARD; | 593 | return H_TOO_HARD; |
594 | vcpu->arch.regs.gpr[5] = get_tb(); | 594 | vcpu->arch.regs.gpr[5] = get_tb(); |
595 | if (xive_enabled()) { | 595 | if (xics_on_xive()) { |
596 | if (is_rm()) | 596 | if (is_rm()) |
597 | return xive_rm_h_xirr(vcpu); | 597 | return xive_rm_h_xirr(vcpu); |
598 | if (unlikely(!__xive_vm_h_xirr)) | 598 | if (unlikely(!__xive_vm_h_xirr)) |
@@ -606,7 +606,7 @@ unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server) | |||
606 | { | 606 | { |
607 | if (!kvmppc_xics_enabled(vcpu)) | 607 | if (!kvmppc_xics_enabled(vcpu)) |
608 | return H_TOO_HARD; | 608 | return H_TOO_HARD; |
609 | if (xive_enabled()) { | 609 | if (xics_on_xive()) { |
610 | if (is_rm()) | 610 | if (is_rm()) |
611 | return xive_rm_h_ipoll(vcpu, server); | 611 | return xive_rm_h_ipoll(vcpu, server); |
612 | if (unlikely(!__xive_vm_h_ipoll)) | 612 | if (unlikely(!__xive_vm_h_ipoll)) |
@@ -621,7 +621,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, | |||
621 | { | 621 | { |
622 | if (!kvmppc_xics_enabled(vcpu)) | 622 | if (!kvmppc_xics_enabled(vcpu)) |
623 | return H_TOO_HARD; | 623 | return H_TOO_HARD; |
624 | if (xive_enabled()) { | 624 | if (xics_on_xive()) { |
625 | if (is_rm()) | 625 | if (is_rm()) |
626 | return xive_rm_h_ipi(vcpu, server, mfrr); | 626 | return xive_rm_h_ipi(vcpu, server, mfrr); |
627 | if (unlikely(!__xive_vm_h_ipi)) | 627 | if (unlikely(!__xive_vm_h_ipi)) |
@@ -635,7 +635,7 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) | |||
635 | { | 635 | { |
636 | if (!kvmppc_xics_enabled(vcpu)) | 636 | if (!kvmppc_xics_enabled(vcpu)) |
637 | return H_TOO_HARD; | 637 | return H_TOO_HARD; |
638 | if (xive_enabled()) { | 638 | if (xics_on_xive()) { |
639 | if (is_rm()) | 639 | if (is_rm()) |
640 | return xive_rm_h_cppr(vcpu, cppr); | 640 | return xive_rm_h_cppr(vcpu, cppr); |
641 | if (unlikely(!__xive_vm_h_cppr)) | 641 | if (unlikely(!__xive_vm_h_cppr)) |
@@ -649,7 +649,7 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) | |||
649 | { | 649 | { |
650 | if (!kvmppc_xics_enabled(vcpu)) | 650 | if (!kvmppc_xics_enabled(vcpu)) |
651 | return H_TOO_HARD; | 651 | return H_TOO_HARD; |
652 | if (xive_enabled()) { | 652 | if (xics_on_xive()) { |
653 | if (is_rm()) | 653 | if (is_rm()) |
654 | return xive_rm_h_eoi(vcpu, xirr); | 654 | return xive_rm_h_eoi(vcpu, xirr); |
655 | if (unlikely(!__xive_vm_h_eoi)) | 655 | if (unlikely(!__xive_vm_h_eoi)) |
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index b3f5786b20dc..3b9662a4207e 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c | |||
@@ -144,6 +144,13 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, | |||
144 | return; | 144 | return; |
145 | } | 145 | } |
146 | 146 | ||
147 | if (xive_enabled() && kvmhv_on_pseries()) { | ||
148 | /* No XICS access or hypercalls available, too hard */ | ||
149 | this_icp->rm_action |= XICS_RM_KICK_VCPU; | ||
150 | this_icp->rm_kick_target = vcpu; | ||
151 | return; | ||
152 | } | ||
153 | |||
147 | /* | 154 | /* |
148 | * Check if the core is loaded, | 155 | * Check if the core is loaded, |
149 | * if not, find an available host core to post to wake the VCPU, | 156 | * if not, find an available host core to post to wake the VCPU, |
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 25043b50cb30..3a5e719ef032 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S | |||
@@ -2272,8 +2272,13 @@ hcall_real_table: | |||
2272 | .long DOTSYM(kvmppc_h_clear_mod) - hcall_real_table | 2272 | .long DOTSYM(kvmppc_h_clear_mod) - hcall_real_table |
2273 | .long DOTSYM(kvmppc_h_clear_ref) - hcall_real_table | 2273 | .long DOTSYM(kvmppc_h_clear_ref) - hcall_real_table |
2274 | .long DOTSYM(kvmppc_h_protect) - hcall_real_table | 2274 | .long DOTSYM(kvmppc_h_protect) - hcall_real_table |
2275 | #ifdef CONFIG_SPAPR_TCE_IOMMU | ||
2275 | .long DOTSYM(kvmppc_h_get_tce) - hcall_real_table | 2276 | .long DOTSYM(kvmppc_h_get_tce) - hcall_real_table |
2276 | .long DOTSYM(kvmppc_rm_h_put_tce) - hcall_real_table | 2277 | .long DOTSYM(kvmppc_rm_h_put_tce) - hcall_real_table |
2278 | #else | ||
2279 | .long 0 /* 0x1c */ | ||
2280 | .long 0 /* 0x20 */ | ||
2281 | #endif | ||
2277 | .long 0 /* 0x24 - H_SET_SPRG0 */ | 2282 | .long 0 /* 0x24 - H_SET_SPRG0 */ |
2278 | .long DOTSYM(kvmppc_h_set_dabr) - hcall_real_table | 2283 | .long DOTSYM(kvmppc_h_set_dabr) - hcall_real_table |
2279 | .long 0 /* 0x2c */ | 2284 | .long 0 /* 0x2c */ |
@@ -2351,8 +2356,13 @@ hcall_real_table: | |||
2351 | .long 0 /* 0x12c */ | 2356 | .long 0 /* 0x12c */ |
2352 | .long 0 /* 0x130 */ | 2357 | .long 0 /* 0x130 */ |
2353 | .long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table | 2358 | .long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table |
2359 | #ifdef CONFIG_SPAPR_TCE_IOMMU | ||
2354 | .long DOTSYM(kvmppc_rm_h_stuff_tce) - hcall_real_table | 2360 | .long DOTSYM(kvmppc_rm_h_stuff_tce) - hcall_real_table |
2355 | .long DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table | 2361 | .long DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table |
2362 | #else | ||
2363 | .long 0 /* 0x138 */ | ||
2364 | .long 0 /* 0x13c */ | ||
2365 | #endif | ||
2356 | .long 0 /* 0x140 */ | 2366 | .long 0 /* 0x140 */ |
2357 | .long 0 /* 0x144 */ | 2367 | .long 0 /* 0x144 */ |
2358 | .long 0 /* 0x148 */ | 2368 | .long 0 /* 0x148 */ |
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index 2d3b2b1cc272..4e178c4c1ea5 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c | |||
@@ -33,7 +33,7 @@ static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) | |||
33 | server = be32_to_cpu(args->args[1]); | 33 | server = be32_to_cpu(args->args[1]); |
34 | priority = be32_to_cpu(args->args[2]); | 34 | priority = be32_to_cpu(args->args[2]); |
35 | 35 | ||
36 | if (xive_enabled()) | 36 | if (xics_on_xive()) |
37 | rc = kvmppc_xive_set_xive(vcpu->kvm, irq, server, priority); | 37 | rc = kvmppc_xive_set_xive(vcpu->kvm, irq, server, priority); |
38 | else | 38 | else |
39 | rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority); | 39 | rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority); |
@@ -56,7 +56,7 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) | |||
56 | irq = be32_to_cpu(args->args[0]); | 56 | irq = be32_to_cpu(args->args[0]); |
57 | 57 | ||
58 | server = priority = 0; | 58 | server = priority = 0; |
59 | if (xive_enabled()) | 59 | if (xics_on_xive()) |
60 | rc = kvmppc_xive_get_xive(vcpu->kvm, irq, &server, &priority); | 60 | rc = kvmppc_xive_get_xive(vcpu->kvm, irq, &server, &priority); |
61 | else | 61 | else |
62 | rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority); | 62 | rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority); |
@@ -83,7 +83,7 @@ static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args) | |||
83 | 83 | ||
84 | irq = be32_to_cpu(args->args[0]); | 84 | irq = be32_to_cpu(args->args[0]); |
85 | 85 | ||
86 | if (xive_enabled()) | 86 | if (xics_on_xive()) |
87 | rc = kvmppc_xive_int_off(vcpu->kvm, irq); | 87 | rc = kvmppc_xive_int_off(vcpu->kvm, irq); |
88 | else | 88 | else |
89 | rc = kvmppc_xics_int_off(vcpu->kvm, irq); | 89 | rc = kvmppc_xics_int_off(vcpu->kvm, irq); |
@@ -105,7 +105,7 @@ static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args) | |||
105 | 105 | ||
106 | irq = be32_to_cpu(args->args[0]); | 106 | irq = be32_to_cpu(args->args[0]); |
107 | 107 | ||
108 | if (xive_enabled()) | 108 | if (xics_on_xive()) |
109 | rc = kvmppc_xive_int_on(vcpu->kvm, irq); | 109 | rc = kvmppc_xive_int_on(vcpu->kvm, irq); |
110 | else | 110 | else |
111 | rc = kvmppc_xics_int_on(vcpu->kvm, irq); | 111 | rc = kvmppc_xics_int_on(vcpu->kvm, irq); |
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index b90a7d154180..8885377ec3e0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -748,7 +748,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | |||
748 | kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu); | 748 | kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu); |
749 | break; | 749 | break; |
750 | case KVMPPC_IRQ_XICS: | 750 | case KVMPPC_IRQ_XICS: |
751 | if (xive_enabled()) | 751 | if (xics_on_xive()) |
752 | kvmppc_xive_cleanup_vcpu(vcpu); | 752 | kvmppc_xive_cleanup_vcpu(vcpu); |
753 | else | 753 | else |
754 | kvmppc_xics_free_icp(vcpu); | 754 | kvmppc_xics_free_icp(vcpu); |
@@ -1931,7 +1931,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, | |||
1931 | r = -EPERM; | 1931 | r = -EPERM; |
1932 | dev = kvm_device_from_filp(f.file); | 1932 | dev = kvm_device_from_filp(f.file); |
1933 | if (dev) { | 1933 | if (dev) { |
1934 | if (xive_enabled()) | 1934 | if (xics_on_xive()) |
1935 | r = kvmppc_xive_connect_vcpu(dev, vcpu, cap->args[1]); | 1935 | r = kvmppc_xive_connect_vcpu(dev, vcpu, cap->args[1]); |
1936 | else | 1936 | else |
1937 | r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]); | 1937 | r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]); |
@@ -2189,10 +2189,12 @@ static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp) | |||
2189 | KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV | | 2189 | KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV | |
2190 | KVM_PPC_CPU_CHAR_BR_HINT_HONOURED | | 2190 | KVM_PPC_CPU_CHAR_BR_HINT_HONOURED | |
2191 | KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF | | 2191 | KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF | |
2192 | KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; | 2192 | KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS | |
2193 | KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST; | ||
2193 | cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY | | 2194 | cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY | |
2194 | KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR | | 2195 | KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR | |
2195 | KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; | 2196 | KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR | |
2197 | KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE; | ||
2196 | } | 2198 | } |
2197 | return 0; | 2199 | return 0; |
2198 | } | 2200 | } |
@@ -2251,12 +2253,16 @@ static int kvmppc_get_cpu_char(struct kvm_ppc_cpu_char *cp) | |||
2251 | if (have_fw_feat(fw_features, "enabled", | 2253 | if (have_fw_feat(fw_features, "enabled", |
2252 | "fw-count-cache-disabled")) | 2254 | "fw-count-cache-disabled")) |
2253 | cp->character |= KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; | 2255 | cp->character |= KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; |
2256 | if (have_fw_feat(fw_features, "enabled", | ||
2257 | "fw-count-cache-flush-bcctr2,0,0")) | ||
2258 | cp->character |= KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST; | ||
2254 | cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 | | 2259 | cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 | |
2255 | KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED | | 2260 | KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED | |
2256 | KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 | | 2261 | KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 | |
2257 | KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 | | 2262 | KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 | |
2258 | KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV | | 2263 | KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV | |
2259 | KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; | 2264 | KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS | |
2265 | KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST; | ||
2260 | 2266 | ||
2261 | if (have_fw_feat(fw_features, "enabled", | 2267 | if (have_fw_feat(fw_features, "enabled", |
2262 | "speculation-policy-favor-security")) | 2268 | "speculation-policy-favor-security")) |
@@ -2267,9 +2273,13 @@ static int kvmppc_get_cpu_char(struct kvm_ppc_cpu_char *cp) | |||
2267 | if (!have_fw_feat(fw_features, "disabled", | 2273 | if (!have_fw_feat(fw_features, "disabled", |
2268 | "needs-spec-barrier-for-bound-checks")) | 2274 | "needs-spec-barrier-for-bound-checks")) |
2269 | cp->behaviour |= KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; | 2275 | cp->behaviour |= KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; |
2276 | if (have_fw_feat(fw_features, "enabled", | ||
2277 | "needs-count-cache-flush-on-context-switch")) | ||
2278 | cp->behaviour |= KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE; | ||
2270 | cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY | | 2279 | cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY | |
2271 | KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR | | 2280 | KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR | |
2272 | KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; | 2281 | KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR | |
2282 | KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE; | ||
2273 | 2283 | ||
2274 | of_node_put(fw_features); | 2284 | of_node_put(fw_features); |
2275 | } | 2285 | } |
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index 225667652069..1727180e8ca1 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h | |||
@@ -331,5 +331,6 @@ extern void css_schedule_reprobe(void); | |||
331 | /* Function from drivers/s390/cio/chsc.c */ | 331 | /* Function from drivers/s390/cio/chsc.c */ |
332 | int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta); | 332 | int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta); |
333 | int chsc_sstpi(void *page, void *result, size_t size); | 333 | int chsc_sstpi(void *page, void *result, size_t size); |
334 | int chsc_sgib(u32 origin); | ||
334 | 335 | ||
335 | #endif | 336 | #endif |
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index 2f7f27e5493f..afaf5e3c57fd 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h | |||
@@ -62,6 +62,7 @@ enum interruption_class { | |||
62 | IRQIO_MSI, | 62 | IRQIO_MSI, |
63 | IRQIO_VIR, | 63 | IRQIO_VIR, |
64 | IRQIO_VAI, | 64 | IRQIO_VAI, |
65 | IRQIO_GAL, | ||
65 | NMI_NMI, | 66 | NMI_NMI, |
66 | CPU_RST, | 67 | CPU_RST, |
67 | NR_ARCH_IRQS | 68 | NR_ARCH_IRQS |
diff --git a/arch/s390/include/asm/isc.h b/arch/s390/include/asm/isc.h index 6cb9e2ed05b6..b2cc1ec78d06 100644 --- a/arch/s390/include/asm/isc.h +++ b/arch/s390/include/asm/isc.h | |||
@@ -21,6 +21,7 @@ | |||
21 | /* Adapter interrupts. */ | 21 | /* Adapter interrupts. */ |
22 | #define QDIO_AIRQ_ISC IO_SCH_ISC /* I/O subchannel in qdio mode */ | 22 | #define QDIO_AIRQ_ISC IO_SCH_ISC /* I/O subchannel in qdio mode */ |
23 | #define PCI_ISC 2 /* PCI I/O subchannels */ | 23 | #define PCI_ISC 2 /* PCI I/O subchannels */ |
24 | #define GAL_ISC 5 /* GIB alert */ | ||
24 | #define AP_ISC 6 /* adjunct processor (crypto) devices */ | 25 | #define AP_ISC 6 /* adjunct processor (crypto) devices */ |
25 | 26 | ||
26 | /* Functions for registration of I/O interruption subclasses */ | 27 | /* Functions for registration of I/O interruption subclasses */ |
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index d5d24889c3bc..c47e22bba87f 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h | |||
@@ -591,7 +591,6 @@ struct kvm_s390_float_interrupt { | |||
591 | struct kvm_s390_mchk_info mchk; | 591 | struct kvm_s390_mchk_info mchk; |
592 | struct kvm_s390_ext_info srv_signal; | 592 | struct kvm_s390_ext_info srv_signal; |
593 | int next_rr_cpu; | 593 | int next_rr_cpu; |
594 | unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; | ||
595 | struct mutex ais_lock; | 594 | struct mutex ais_lock; |
596 | u8 simm; | 595 | u8 simm; |
597 | u8 nimm; | 596 | u8 nimm; |
@@ -712,6 +711,7 @@ struct s390_io_adapter { | |||
712 | struct kvm_s390_cpu_model { | 711 | struct kvm_s390_cpu_model { |
713 | /* facility mask supported by kvm & hosting machine */ | 712 | /* facility mask supported by kvm & hosting machine */ |
714 | __u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64]; | 713 | __u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64]; |
714 | struct kvm_s390_vm_cpu_subfunc subfuncs; | ||
715 | /* facility list requested by guest (in dma page) */ | 715 | /* facility list requested by guest (in dma page) */ |
716 | __u64 *fac_list; | 716 | __u64 *fac_list; |
717 | u64 cpuid; | 717 | u64 cpuid; |
@@ -782,9 +782,21 @@ struct kvm_s390_gisa { | |||
782 | u8 reserved03[11]; | 782 | u8 reserved03[11]; |
783 | u32 airq_count; | 783 | u32 airq_count; |
784 | } g1; | 784 | } g1; |
785 | struct { | ||
786 | u64 word[4]; | ||
787 | } u64; | ||
785 | }; | 788 | }; |
786 | }; | 789 | }; |
787 | 790 | ||
791 | struct kvm_s390_gib { | ||
792 | u32 alert_list_origin; | ||
793 | u32 reserved01; | ||
794 | u8:5; | ||
795 | u8 nisc:3; | ||
796 | u8 reserved03[3]; | ||
797 | u32 reserved04[5]; | ||
798 | }; | ||
799 | |||
788 | /* | 800 | /* |
789 | * sie_page2 has to be allocated as DMA because fac_list, crycb and | 801 | * sie_page2 has to be allocated as DMA because fac_list, crycb and |
790 | * gisa need 31bit addresses in the sie control block. | 802 | * gisa need 31bit addresses in the sie control block. |
@@ -793,7 +805,8 @@ struct sie_page2 { | |||
793 | __u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */ | 805 | __u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */ |
794 | struct kvm_s390_crypto_cb crycb; /* 0x0800 */ | 806 | struct kvm_s390_crypto_cb crycb; /* 0x0800 */ |
795 | struct kvm_s390_gisa gisa; /* 0x0900 */ | 807 | struct kvm_s390_gisa gisa; /* 0x0900 */ |
796 | u8 reserved920[0x1000 - 0x920]; /* 0x0920 */ | 808 | struct kvm *kvm; /* 0x0920 */ |
809 | u8 reserved928[0x1000 - 0x928]; /* 0x0928 */ | ||
797 | }; | 810 | }; |
798 | 811 | ||
799 | struct kvm_s390_vsie { | 812 | struct kvm_s390_vsie { |
@@ -804,6 +817,20 @@ struct kvm_s390_vsie { | |||
804 | struct page *pages[KVM_MAX_VCPUS]; | 817 | struct page *pages[KVM_MAX_VCPUS]; |
805 | }; | 818 | }; |
806 | 819 | ||
820 | struct kvm_s390_gisa_iam { | ||
821 | u8 mask; | ||
822 | spinlock_t ref_lock; | ||
823 | u32 ref_count[MAX_ISC + 1]; | ||
824 | }; | ||
825 | |||
826 | struct kvm_s390_gisa_interrupt { | ||
827 | struct kvm_s390_gisa *origin; | ||
828 | struct kvm_s390_gisa_iam alert; | ||
829 | struct hrtimer timer; | ||
830 | u64 expires; | ||
831 | DECLARE_BITMAP(kicked_mask, KVM_MAX_VCPUS); | ||
832 | }; | ||
833 | |||
807 | struct kvm_arch{ | 834 | struct kvm_arch{ |
808 | void *sca; | 835 | void *sca; |
809 | int use_esca; | 836 | int use_esca; |
@@ -837,7 +864,8 @@ struct kvm_arch{ | |||
837 | atomic64_t cmma_dirty_pages; | 864 | atomic64_t cmma_dirty_pages; |
838 | /* subset of available cpu features enabled by user space */ | 865 | /* subset of available cpu features enabled by user space */ |
839 | DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); | 866 | DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); |
840 | struct kvm_s390_gisa *gisa; | 867 | DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS); |
868 | struct kvm_s390_gisa_interrupt gisa_int; | ||
841 | }; | 869 | }; |
842 | 870 | ||
843 | #define KVM_HVA_ERR_BAD (-1UL) | 871 | #define KVM_HVA_ERR_BAD (-1UL) |
@@ -871,6 +899,9 @@ void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, | |||
871 | extern int sie64a(struct kvm_s390_sie_block *, u64 *); | 899 | extern int sie64a(struct kvm_s390_sie_block *, u64 *); |
872 | extern char sie_exit; | 900 | extern char sie_exit; |
873 | 901 | ||
902 | extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc); | ||
903 | extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc); | ||
904 | |||
874 | static inline void kvm_arch_hardware_disable(void) {} | 905 | static inline void kvm_arch_hardware_disable(void) {} |
875 | static inline void kvm_arch_check_processor_compat(void *rtn) {} | 906 | static inline void kvm_arch_check_processor_compat(void *rtn) {} |
876 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} | 907 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} |
@@ -878,7 +909,7 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} | |||
878 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} | 909 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} |
879 | static inline void kvm_arch_free_memslot(struct kvm *kvm, | 910 | static inline void kvm_arch_free_memslot(struct kvm *kvm, |
880 | struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} | 911 | struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} |
881 | static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} | 912 | static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} |
882 | static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} | 913 | static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} |
883 | static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | 914 | static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, |
884 | struct kvm_memory_slot *slot) {} | 915 | struct kvm_memory_slot *slot) {} |
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 0e8d68bac82c..0cd5a5f96729 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c | |||
@@ -88,6 +88,7 @@ static const struct irq_class irqclass_sub_desc[] = { | |||
88 | {.irq = IRQIO_MSI, .name = "MSI", .desc = "[I/O] MSI Interrupt" }, | 88 | {.irq = IRQIO_MSI, .name = "MSI", .desc = "[I/O] MSI Interrupt" }, |
89 | {.irq = IRQIO_VIR, .name = "VIR", .desc = "[I/O] Virtual I/O Devices"}, | 89 | {.irq = IRQIO_VIR, .name = "VIR", .desc = "[I/O] Virtual I/O Devices"}, |
90 | {.irq = IRQIO_VAI, .name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"}, | 90 | {.irq = IRQIO_VAI, .name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"}, |
91 | {.irq = IRQIO_GAL, .name = "GAL", .desc = "[I/O] GIB Alert"}, | ||
91 | {.irq = NMI_NMI, .name = "NMI", .desc = "[NMI] Machine Check"}, | 92 | {.irq = NMI_NMI, .name = "NMI", .desc = "[NMI] Machine Check"}, |
92 | {.irq = CPU_RST, .name = "RST", .desc = "[CPU] CPU Restart"}, | 93 | {.irq = CPU_RST, .name = "RST", .desc = "[CPU] CPU Restart"}, |
93 | }; | 94 | }; |
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index fcb55b02990e..82162867f378 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c | |||
@@ -7,6 +7,9 @@ | |||
7 | * Author(s): Carsten Otte <cotte@de.ibm.com> | 7 | * Author(s): Carsten Otte <cotte@de.ibm.com> |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #define KMSG_COMPONENT "kvm-s390" | ||
11 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | ||
12 | |||
10 | #include <linux/interrupt.h> | 13 | #include <linux/interrupt.h> |
11 | #include <linux/kvm_host.h> | 14 | #include <linux/kvm_host.h> |
12 | #include <linux/hrtimer.h> | 15 | #include <linux/hrtimer.h> |
@@ -23,6 +26,7 @@ | |||
23 | #include <asm/gmap.h> | 26 | #include <asm/gmap.h> |
24 | #include <asm/switch_to.h> | 27 | #include <asm/switch_to.h> |
25 | #include <asm/nmi.h> | 28 | #include <asm/nmi.h> |
29 | #include <asm/airq.h> | ||
26 | #include "kvm-s390.h" | 30 | #include "kvm-s390.h" |
27 | #include "gaccess.h" | 31 | #include "gaccess.h" |
28 | #include "trace-s390.h" | 32 | #include "trace-s390.h" |
@@ -31,6 +35,8 @@ | |||
31 | #define PFAULT_DONE 0x0680 | 35 | #define PFAULT_DONE 0x0680 |
32 | #define VIRTIO_PARAM 0x0d00 | 36 | #define VIRTIO_PARAM 0x0d00 |
33 | 37 | ||
38 | static struct kvm_s390_gib *gib; | ||
39 | |||
34 | /* handle external calls via sigp interpretation facility */ | 40 | /* handle external calls via sigp interpretation facility */ |
35 | static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id) | 41 | static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id) |
36 | { | 42 | { |
@@ -217,22 +223,100 @@ static inline u8 int_word_to_isc(u32 int_word) | |||
217 | */ | 223 | */ |
218 | #define IPM_BIT_OFFSET (offsetof(struct kvm_s390_gisa, ipm) * BITS_PER_BYTE) | 224 | #define IPM_BIT_OFFSET (offsetof(struct kvm_s390_gisa, ipm) * BITS_PER_BYTE) |
219 | 225 | ||
220 | static inline void kvm_s390_gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) | 226 | /** |
227 | * gisa_set_iam - change the GISA interruption alert mask | ||
228 | * | ||
229 | * @gisa: gisa to operate on | ||
230 | * @iam: new IAM value to use | ||
231 | * | ||
232 | * Change the IAM atomically with the next alert address and the IPM | ||
233 | * of the GISA if the GISA is not part of the GIB alert list. All three | ||
234 | * fields are located in the first long word of the GISA. | ||
235 | * | ||
236 | * Returns: 0 on success | ||
237 | * -EBUSY in case the gisa is part of the alert list | ||
238 | */ | ||
239 | static inline int gisa_set_iam(struct kvm_s390_gisa *gisa, u8 iam) | ||
240 | { | ||
241 | u64 word, _word; | ||
242 | |||
243 | do { | ||
244 | word = READ_ONCE(gisa->u64.word[0]); | ||
245 | if ((u64)gisa != word >> 32) | ||
246 | return -EBUSY; | ||
247 | _word = (word & ~0xffUL) | iam; | ||
248 | } while (cmpxchg(&gisa->u64.word[0], word, _word) != word); | ||
249 | |||
250 | return 0; | ||
251 | } | ||
252 | |||
253 | /** | ||
254 | * gisa_clear_ipm - clear the GISA interruption pending mask | ||
255 | * | ||
256 | * @gisa: gisa to operate on | ||
257 | * | ||
258 | * Clear the IPM atomically with the next alert address and the IAM | ||
259 | * of the GISA unconditionally. All three fields are located in the | ||
260 | * first long word of the GISA. | ||
261 | */ | ||
262 | static inline void gisa_clear_ipm(struct kvm_s390_gisa *gisa) | ||
263 | { | ||
264 | u64 word, _word; | ||
265 | |||
266 | do { | ||
267 | word = READ_ONCE(gisa->u64.word[0]); | ||
268 | _word = word & ~(0xffUL << 24); | ||
269 | } while (cmpxchg(&gisa->u64.word[0], word, _word) != word); | ||
270 | } | ||
271 | |||
272 | /** | ||
273 | * gisa_get_ipm_or_restore_iam - return IPM or restore GISA IAM | ||
274 | * | ||
275 | * @gi: gisa interrupt struct to work on | ||
276 | * | ||
277 | * Atomically restores the interruption alert mask if none of the | ||
278 | * relevant ISCs are pending and return the IPM. | ||
279 | * | ||
280 | * Returns: the relevant pending ISCs | ||
281 | */ | ||
282 | static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi) | ||
283 | { | ||
284 | u8 pending_mask, alert_mask; | ||
285 | u64 word, _word; | ||
286 | |||
287 | do { | ||
288 | word = READ_ONCE(gi->origin->u64.word[0]); | ||
289 | alert_mask = READ_ONCE(gi->alert.mask); | ||
290 | pending_mask = (u8)(word >> 24) & alert_mask; | ||
291 | if (pending_mask) | ||
292 | return pending_mask; | ||
293 | _word = (word & ~0xffUL) | alert_mask; | ||
294 | } while (cmpxchg(&gi->origin->u64.word[0], word, _word) != word); | ||
295 | |||
296 | return 0; | ||
297 | } | ||
298 | |||
299 | static inline int gisa_in_alert_list(struct kvm_s390_gisa *gisa) | ||
300 | { | ||
301 | return READ_ONCE(gisa->next_alert) != (u32)(u64)gisa; | ||
302 | } | ||
303 | |||
304 | static inline void gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) | ||
221 | { | 305 | { |
222 | set_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); | 306 | set_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); |
223 | } | 307 | } |
224 | 308 | ||
225 | static inline u8 kvm_s390_gisa_get_ipm(struct kvm_s390_gisa *gisa) | 309 | static inline u8 gisa_get_ipm(struct kvm_s390_gisa *gisa) |
226 | { | 310 | { |
227 | return READ_ONCE(gisa->ipm); | 311 | return READ_ONCE(gisa->ipm); |
228 | } | 312 | } |
229 | 313 | ||
230 | static inline void kvm_s390_gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) | 314 | static inline void gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) |
231 | { | 315 | { |
232 | clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); | 316 | clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); |
233 | } | 317 | } |
234 | 318 | ||
235 | static inline int kvm_s390_gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) | 319 | static inline int gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) |
236 | { | 320 | { |
237 | return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); | 321 | return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); |
238 | } | 322 | } |
@@ -245,8 +329,13 @@ static inline unsigned long pending_irqs_no_gisa(struct kvm_vcpu *vcpu) | |||
245 | 329 | ||
246 | static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) | 330 | static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) |
247 | { | 331 | { |
248 | return pending_irqs_no_gisa(vcpu) | | 332 | struct kvm_s390_gisa_interrupt *gi = &vcpu->kvm->arch.gisa_int; |
249 | kvm_s390_gisa_get_ipm(vcpu->kvm->arch.gisa) << IRQ_PEND_IO_ISC_7; | 333 | unsigned long pending_mask; |
334 | |||
335 | pending_mask = pending_irqs_no_gisa(vcpu); | ||
336 | if (gi->origin) | ||
337 | pending_mask |= gisa_get_ipm(gi->origin) << IRQ_PEND_IO_ISC_7; | ||
338 | return pending_mask; | ||
250 | } | 339 | } |
251 | 340 | ||
252 | static inline int isc_to_irq_type(unsigned long isc) | 341 | static inline int isc_to_irq_type(unsigned long isc) |
@@ -318,13 +407,13 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu) | |||
318 | static void __set_cpu_idle(struct kvm_vcpu *vcpu) | 407 | static void __set_cpu_idle(struct kvm_vcpu *vcpu) |
319 | { | 408 | { |
320 | kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT); | 409 | kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT); |
321 | set_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask); | 410 | set_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask); |
322 | } | 411 | } |
323 | 412 | ||
324 | static void __unset_cpu_idle(struct kvm_vcpu *vcpu) | 413 | static void __unset_cpu_idle(struct kvm_vcpu *vcpu) |
325 | { | 414 | { |
326 | kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT); | 415 | kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT); |
327 | clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask); | 416 | clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask); |
328 | } | 417 | } |
329 | 418 | ||
330 | static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) | 419 | static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) |
@@ -345,7 +434,7 @@ static void set_intercept_indicators_io(struct kvm_vcpu *vcpu) | |||
345 | { | 434 | { |
346 | if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_IO_MASK)) | 435 | if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_IO_MASK)) |
347 | return; | 436 | return; |
348 | else if (psw_ioint_disabled(vcpu)) | 437 | if (psw_ioint_disabled(vcpu)) |
349 | kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT); | 438 | kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT); |
350 | else | 439 | else |
351 | vcpu->arch.sie_block->lctl |= LCTL_CR6; | 440 | vcpu->arch.sie_block->lctl |= LCTL_CR6; |
@@ -353,7 +442,7 @@ static void set_intercept_indicators_io(struct kvm_vcpu *vcpu) | |||
353 | 442 | ||
354 | static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu) | 443 | static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu) |
355 | { | 444 | { |
356 | if (!(pending_irqs(vcpu) & IRQ_PEND_EXT_MASK)) | 445 | if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_EXT_MASK)) |
357 | return; | 446 | return; |
358 | if (psw_extint_disabled(vcpu)) | 447 | if (psw_extint_disabled(vcpu)) |
359 | kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT); | 448 | kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT); |
@@ -363,7 +452,7 @@ static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu) | |||
363 | 452 | ||
364 | static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu) | 453 | static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu) |
365 | { | 454 | { |
366 | if (!(pending_irqs(vcpu) & IRQ_PEND_MCHK_MASK)) | 455 | if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_MCHK_MASK)) |
367 | return; | 456 | return; |
368 | if (psw_mchk_disabled(vcpu)) | 457 | if (psw_mchk_disabled(vcpu)) |
369 | vcpu->arch.sie_block->ictl |= ICTL_LPSW; | 458 | vcpu->arch.sie_block->ictl |= ICTL_LPSW; |
@@ -956,6 +1045,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu, | |||
956 | { | 1045 | { |
957 | struct list_head *isc_list; | 1046 | struct list_head *isc_list; |
958 | struct kvm_s390_float_interrupt *fi; | 1047 | struct kvm_s390_float_interrupt *fi; |
1048 | struct kvm_s390_gisa_interrupt *gi = &vcpu->kvm->arch.gisa_int; | ||
959 | struct kvm_s390_interrupt_info *inti = NULL; | 1049 | struct kvm_s390_interrupt_info *inti = NULL; |
960 | struct kvm_s390_io_info io; | 1050 | struct kvm_s390_io_info io; |
961 | u32 isc; | 1051 | u32 isc; |
@@ -998,8 +1088,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu, | |||
998 | goto out; | 1088 | goto out; |
999 | } | 1089 | } |
1000 | 1090 | ||
1001 | if (vcpu->kvm->arch.gisa && | 1091 | if (gi->origin && gisa_tac_ipm_gisc(gi->origin, isc)) { |
1002 | kvm_s390_gisa_tac_ipm_gisc(vcpu->kvm->arch.gisa, isc)) { | ||
1003 | /* | 1092 | /* |
1004 | * in case an adapter interrupt was not delivered | 1093 | * in case an adapter interrupt was not delivered |
1005 | * in SIE context KVM will handle the delivery | 1094 | * in SIE context KVM will handle the delivery |
@@ -1089,6 +1178,7 @@ static u64 __calculate_sltime(struct kvm_vcpu *vcpu) | |||
1089 | 1178 | ||
1090 | int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) | 1179 | int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) |
1091 | { | 1180 | { |
1181 | struct kvm_s390_gisa_interrupt *gi = &vcpu->kvm->arch.gisa_int; | ||
1092 | u64 sltime; | 1182 | u64 sltime; |
1093 | 1183 | ||
1094 | vcpu->stat.exit_wait_state++; | 1184 | vcpu->stat.exit_wait_state++; |
@@ -1102,6 +1192,11 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) | |||
1102 | return -EOPNOTSUPP; /* disabled wait */ | 1192 | return -EOPNOTSUPP; /* disabled wait */ |
1103 | } | 1193 | } |
1104 | 1194 | ||
1195 | if (gi->origin && | ||
1196 | (gisa_get_ipm_or_restore_iam(gi) & | ||
1197 | vcpu->arch.sie_block->gcr[6] >> 24)) | ||
1198 | return 0; | ||
1199 | |||
1105 | if (!ckc_interrupts_enabled(vcpu) && | 1200 | if (!ckc_interrupts_enabled(vcpu) && |
1106 | !cpu_timer_interrupts_enabled(vcpu)) { | 1201 | !cpu_timer_interrupts_enabled(vcpu)) { |
1107 | VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); | 1202 | VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); |
@@ -1533,18 +1628,19 @@ static struct kvm_s390_interrupt_info *get_top_io_int(struct kvm *kvm, | |||
1533 | 1628 | ||
1534 | static int get_top_gisa_isc(struct kvm *kvm, u64 isc_mask, u32 schid) | 1629 | static int get_top_gisa_isc(struct kvm *kvm, u64 isc_mask, u32 schid) |
1535 | { | 1630 | { |
1631 | struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; | ||
1536 | unsigned long active_mask; | 1632 | unsigned long active_mask; |
1537 | int isc; | 1633 | int isc; |
1538 | 1634 | ||
1539 | if (schid) | 1635 | if (schid) |
1540 | goto out; | 1636 | goto out; |
1541 | if (!kvm->arch.gisa) | 1637 | if (!gi->origin) |
1542 | goto out; | 1638 | goto out; |
1543 | 1639 | ||
1544 | active_mask = (isc_mask & kvm_s390_gisa_get_ipm(kvm->arch.gisa) << 24) << 32; | 1640 | active_mask = (isc_mask & gisa_get_ipm(gi->origin) << 24) << 32; |
1545 | while (active_mask) { | 1641 | while (active_mask) { |
1546 | isc = __fls(active_mask) ^ (BITS_PER_LONG - 1); | 1642 | isc = __fls(active_mask) ^ (BITS_PER_LONG - 1); |
1547 | if (kvm_s390_gisa_tac_ipm_gisc(kvm->arch.gisa, isc)) | 1643 | if (gisa_tac_ipm_gisc(gi->origin, isc)) |
1548 | return isc; | 1644 | return isc; |
1549 | clear_bit_inv(isc, &active_mask); | 1645 | clear_bit_inv(isc, &active_mask); |
1550 | } | 1646 | } |
@@ -1567,6 +1663,7 @@ out: | |||
1567 | struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, | 1663 | struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, |
1568 | u64 isc_mask, u32 schid) | 1664 | u64 isc_mask, u32 schid) |
1569 | { | 1665 | { |
1666 | struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; | ||
1570 | struct kvm_s390_interrupt_info *inti, *tmp_inti; | 1667 | struct kvm_s390_interrupt_info *inti, *tmp_inti; |
1571 | int isc; | 1668 | int isc; |
1572 | 1669 | ||
@@ -1584,7 +1681,7 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, | |||
1584 | /* both types of interrupts present */ | 1681 | /* both types of interrupts present */ |
1585 | if (int_word_to_isc(inti->io.io_int_word) <= isc) { | 1682 | if (int_word_to_isc(inti->io.io_int_word) <= isc) { |
1586 | /* classical IO int with higher priority */ | 1683 | /* classical IO int with higher priority */ |
1587 | kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc); | 1684 | gisa_set_ipm_gisc(gi->origin, isc); |
1588 | goto out; | 1685 | goto out; |
1589 | } | 1686 | } |
1590 | gisa_out: | 1687 | gisa_out: |
@@ -1596,7 +1693,7 @@ gisa_out: | |||
1596 | kvm_s390_reinject_io_int(kvm, inti); | 1693 | kvm_s390_reinject_io_int(kvm, inti); |
1597 | inti = tmp_inti; | 1694 | inti = tmp_inti; |
1598 | } else | 1695 | } else |
1599 | kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc); | 1696 | gisa_set_ipm_gisc(gi->origin, isc); |
1600 | out: | 1697 | out: |
1601 | return inti; | 1698 | return inti; |
1602 | } | 1699 | } |
@@ -1685,6 +1782,7 @@ static int __inject_float_mchk(struct kvm *kvm, | |||
1685 | 1782 | ||
1686 | static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) | 1783 | static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) |
1687 | { | 1784 | { |
1785 | struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; | ||
1688 | struct kvm_s390_float_interrupt *fi; | 1786 | struct kvm_s390_float_interrupt *fi; |
1689 | struct list_head *list; | 1787 | struct list_head *list; |
1690 | int isc; | 1788 | int isc; |
@@ -1692,9 +1790,9 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) | |||
1692 | kvm->stat.inject_io++; | 1790 | kvm->stat.inject_io++; |
1693 | isc = int_word_to_isc(inti->io.io_int_word); | 1791 | isc = int_word_to_isc(inti->io.io_int_word); |
1694 | 1792 | ||
1695 | if (kvm->arch.gisa && inti->type & KVM_S390_INT_IO_AI_MASK) { | 1793 | if (gi->origin && inti->type & KVM_S390_INT_IO_AI_MASK) { |
1696 | VM_EVENT(kvm, 4, "%s isc %1u", "inject: I/O (AI/gisa)", isc); | 1794 | VM_EVENT(kvm, 4, "%s isc %1u", "inject: I/O (AI/gisa)", isc); |
1697 | kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc); | 1795 | gisa_set_ipm_gisc(gi->origin, isc); |
1698 | kfree(inti); | 1796 | kfree(inti); |
1699 | return 0; | 1797 | return 0; |
1700 | } | 1798 | } |
@@ -1726,7 +1824,6 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) | |||
1726 | */ | 1824 | */ |
1727 | static void __floating_irq_kick(struct kvm *kvm, u64 type) | 1825 | static void __floating_irq_kick(struct kvm *kvm, u64 type) |
1728 | { | 1826 | { |
1729 | struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; | ||
1730 | struct kvm_vcpu *dst_vcpu; | 1827 | struct kvm_vcpu *dst_vcpu; |
1731 | int sigcpu, online_vcpus, nr_tries = 0; | 1828 | int sigcpu, online_vcpus, nr_tries = 0; |
1732 | 1829 | ||
@@ -1735,11 +1832,11 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type) | |||
1735 | return; | 1832 | return; |
1736 | 1833 | ||
1737 | /* find idle VCPUs first, then round robin */ | 1834 | /* find idle VCPUs first, then round robin */ |
1738 | sigcpu = find_first_bit(fi->idle_mask, online_vcpus); | 1835 | sigcpu = find_first_bit(kvm->arch.idle_mask, online_vcpus); |
1739 | if (sigcpu == online_vcpus) { | 1836 | if (sigcpu == online_vcpus) { |
1740 | do { | 1837 | do { |
1741 | sigcpu = fi->next_rr_cpu; | 1838 | sigcpu = kvm->arch.float_int.next_rr_cpu++; |
1742 | fi->next_rr_cpu = (fi->next_rr_cpu + 1) % online_vcpus; | 1839 | kvm->arch.float_int.next_rr_cpu %= online_vcpus; |
1743 | /* avoid endless loops if all vcpus are stopped */ | 1840 | /* avoid endless loops if all vcpus are stopped */ |
1744 | if (nr_tries++ >= online_vcpus) | 1841 | if (nr_tries++ >= online_vcpus) |
1745 | return; | 1842 | return; |
@@ -1753,7 +1850,8 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type) | |||
1753 | kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT); | 1850 | kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT); |
1754 | break; | 1851 | break; |
1755 | case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: | 1852 | case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: |
1756 | if (!(type & KVM_S390_INT_IO_AI_MASK && kvm->arch.gisa)) | 1853 | if (!(type & KVM_S390_INT_IO_AI_MASK && |
1854 | kvm->arch.gisa_int.origin)) | ||
1757 | kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT); | 1855 | kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT); |
1758 | break; | 1856 | break; |
1759 | default: | 1857 | default: |
@@ -2003,6 +2101,7 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm) | |||
2003 | 2101 | ||
2004 | static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len) | 2102 | static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len) |
2005 | { | 2103 | { |
2104 | struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; | ||
2006 | struct kvm_s390_interrupt_info *inti; | 2105 | struct kvm_s390_interrupt_info *inti; |
2007 | struct kvm_s390_float_interrupt *fi; | 2106 | struct kvm_s390_float_interrupt *fi; |
2008 | struct kvm_s390_irq *buf; | 2107 | struct kvm_s390_irq *buf; |
@@ -2026,15 +2125,14 @@ static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len) | |||
2026 | 2125 | ||
2027 | max_irqs = len / sizeof(struct kvm_s390_irq); | 2126 | max_irqs = len / sizeof(struct kvm_s390_irq); |
2028 | 2127 | ||
2029 | if (kvm->arch.gisa && | 2128 | if (gi->origin && gisa_get_ipm(gi->origin)) { |
2030 | kvm_s390_gisa_get_ipm(kvm->arch.gisa)) { | ||
2031 | for (i = 0; i <= MAX_ISC; i++) { | 2129 | for (i = 0; i <= MAX_ISC; i++) { |
2032 | if (n == max_irqs) { | 2130 | if (n == max_irqs) { |
2033 | /* signal userspace to try again */ | 2131 | /* signal userspace to try again */ |
2034 | ret = -ENOMEM; | 2132 | ret = -ENOMEM; |
2035 | goto out_nolock; | 2133 | goto out_nolock; |
2036 | } | 2134 | } |
2037 | if (kvm_s390_gisa_tac_ipm_gisc(kvm->arch.gisa, i)) { | 2135 | if (gisa_tac_ipm_gisc(gi->origin, i)) { |
2038 | irq = (struct kvm_s390_irq *) &buf[n]; | 2136 | irq = (struct kvm_s390_irq *) &buf[n]; |
2039 | irq->type = KVM_S390_INT_IO(1, 0, 0, 0); | 2137 | irq->type = KVM_S390_INT_IO(1, 0, 0, 0); |
2040 | irq->u.io.io_int_word = isc_to_int_word(i); | 2138 | irq->u.io.io_int_word = isc_to_int_word(i); |
@@ -2831,7 +2929,7 @@ static void store_local_irq(struct kvm_s390_local_interrupt *li, | |||
2831 | int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) | 2929 | int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) |
2832 | { | 2930 | { |
2833 | int scn; | 2931 | int scn; |
2834 | unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)]; | 2932 | DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS); |
2835 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 2933 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
2836 | unsigned long pending_irqs; | 2934 | unsigned long pending_irqs; |
2837 | struct kvm_s390_irq irq; | 2935 | struct kvm_s390_irq irq; |
@@ -2884,27 +2982,278 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) | |||
2884 | return n; | 2982 | return n; |
2885 | } | 2983 | } |
2886 | 2984 | ||
2887 | void kvm_s390_gisa_clear(struct kvm *kvm) | 2985 | static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask) |
2888 | { | 2986 | { |
2889 | if (kvm->arch.gisa) { | 2987 | int vcpu_id, online_vcpus = atomic_read(&kvm->online_vcpus); |
2890 | memset(kvm->arch.gisa, 0, sizeof(struct kvm_s390_gisa)); | 2988 | struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; |
2891 | kvm->arch.gisa->next_alert = (u32)(u64)kvm->arch.gisa; | 2989 | struct kvm_vcpu *vcpu; |
2892 | VM_EVENT(kvm, 3, "gisa 0x%pK cleared", kvm->arch.gisa); | 2990 | |
2991 | for_each_set_bit(vcpu_id, kvm->arch.idle_mask, online_vcpus) { | ||
2992 | vcpu = kvm_get_vcpu(kvm, vcpu_id); | ||
2993 | if (psw_ioint_disabled(vcpu)) | ||
2994 | continue; | ||
2995 | deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24); | ||
2996 | if (deliverable_mask) { | ||
2997 | /* lately kicked but not yet running */ | ||
2998 | if (test_and_set_bit(vcpu_id, gi->kicked_mask)) | ||
2999 | return; | ||
3000 | kvm_s390_vcpu_wakeup(vcpu); | ||
3001 | return; | ||
3002 | } | ||
2893 | } | 3003 | } |
2894 | } | 3004 | } |
2895 | 3005 | ||
3006 | static enum hrtimer_restart gisa_vcpu_kicker(struct hrtimer *timer) | ||
3007 | { | ||
3008 | struct kvm_s390_gisa_interrupt *gi = | ||
3009 | container_of(timer, struct kvm_s390_gisa_interrupt, timer); | ||
3010 | struct kvm *kvm = | ||
3011 | container_of(gi->origin, struct sie_page2, gisa)->kvm; | ||
3012 | u8 pending_mask; | ||
3013 | |||
3014 | pending_mask = gisa_get_ipm_or_restore_iam(gi); | ||
3015 | if (pending_mask) { | ||
3016 | __airqs_kick_single_vcpu(kvm, pending_mask); | ||
3017 | hrtimer_forward_now(timer, ns_to_ktime(gi->expires)); | ||
3018 | return HRTIMER_RESTART; | ||
3019 | }; | ||
3020 | |||
3021 | return HRTIMER_NORESTART; | ||
3022 | } | ||
3023 | |||
3024 | #define NULL_GISA_ADDR 0x00000000UL | ||
3025 | #define NONE_GISA_ADDR 0x00000001UL | ||
3026 | #define GISA_ADDR_MASK 0xfffff000UL | ||
3027 | |||
3028 | static void process_gib_alert_list(void) | ||
3029 | { | ||
3030 | struct kvm_s390_gisa_interrupt *gi; | ||
3031 | struct kvm_s390_gisa *gisa; | ||
3032 | struct kvm *kvm; | ||
3033 | u32 final, origin = 0UL; | ||
3034 | |||
3035 | do { | ||
3036 | /* | ||
3037 | * If the NONE_GISA_ADDR is still stored in the alert list | ||
3038 | * origin, we will leave the outer loop. No further GISA has | ||
3039 | * been added to the alert list by millicode while processing | ||
3040 | * the current alert list. | ||
3041 | */ | ||
3042 | final = (origin & NONE_GISA_ADDR); | ||
3043 | /* | ||
3044 | * Cut off the alert list and store the NONE_GISA_ADDR in the | ||
3045 | * alert list origin to avoid further GAL interruptions. | ||
3046 | * A new alert list can be build up by millicode in parallel | ||
3047 | * for guests not in the yet cut-off alert list. When in the | ||
3048 | * final loop, store the NULL_GISA_ADDR instead. This will re- | ||
3049 | * enable GAL interruptions on the host again. | ||
3050 | */ | ||
3051 | origin = xchg(&gib->alert_list_origin, | ||
3052 | (!final) ? NONE_GISA_ADDR : NULL_GISA_ADDR); | ||
3053 | /* | ||
3054 | * Loop through the just cut-off alert list and start the | ||
3055 | * gisa timers to kick idle vcpus to consume the pending | ||
3056 | * interruptions asap. | ||
3057 | */ | ||
3058 | while (origin & GISA_ADDR_MASK) { | ||
3059 | gisa = (struct kvm_s390_gisa *)(u64)origin; | ||
3060 | origin = gisa->next_alert; | ||
3061 | gisa->next_alert = (u32)(u64)gisa; | ||
3062 | kvm = container_of(gisa, struct sie_page2, gisa)->kvm; | ||
3063 | gi = &kvm->arch.gisa_int; | ||
3064 | if (hrtimer_active(&gi->timer)) | ||
3065 | hrtimer_cancel(&gi->timer); | ||
3066 | hrtimer_start(&gi->timer, 0, HRTIMER_MODE_REL); | ||
3067 | } | ||
3068 | } while (!final); | ||
3069 | |||
3070 | } | ||
3071 | |||
3072 | void kvm_s390_gisa_clear(struct kvm *kvm) | ||
3073 | { | ||
3074 | struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; | ||
3075 | |||
3076 | if (!gi->origin) | ||
3077 | return; | ||
3078 | gisa_clear_ipm(gi->origin); | ||
3079 | VM_EVENT(kvm, 3, "gisa 0x%pK cleared", gi->origin); | ||
3080 | } | ||
3081 | |||
2896 | void kvm_s390_gisa_init(struct kvm *kvm) | 3082 | void kvm_s390_gisa_init(struct kvm *kvm) |
2897 | { | 3083 | { |
2898 | if (css_general_characteristics.aiv) { | 3084 | struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; |
2899 | kvm->arch.gisa = &kvm->arch.sie_page2->gisa; | 3085 | |
2900 | VM_EVENT(kvm, 3, "gisa 0x%pK initialized", kvm->arch.gisa); | 3086 | if (!css_general_characteristics.aiv) |
2901 | kvm_s390_gisa_clear(kvm); | 3087 | return; |
2902 | } | 3088 | gi->origin = &kvm->arch.sie_page2->gisa; |
3089 | gi->alert.mask = 0; | ||
3090 | spin_lock_init(&gi->alert.ref_lock); | ||
3091 | gi->expires = 50 * 1000; /* 50 usec */ | ||
3092 | hrtimer_init(&gi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
3093 | gi->timer.function = gisa_vcpu_kicker; | ||
3094 | memset(gi->origin, 0, sizeof(struct kvm_s390_gisa)); | ||
3095 | gi->origin->next_alert = (u32)(u64)gi->origin; | ||
3096 | VM_EVENT(kvm, 3, "gisa 0x%pK initialized", gi->origin); | ||
2903 | } | 3097 | } |
2904 | 3098 | ||
2905 | void kvm_s390_gisa_destroy(struct kvm *kvm) | 3099 | void kvm_s390_gisa_destroy(struct kvm *kvm) |
2906 | { | 3100 | { |
2907 | if (!kvm->arch.gisa) | 3101 | struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; |
3102 | |||
3103 | if (!gi->origin) | ||
3104 | return; | ||
3105 | if (gi->alert.mask) | ||
3106 | KVM_EVENT(3, "vm 0x%pK has unexpected iam 0x%02x", | ||
3107 | kvm, gi->alert.mask); | ||
3108 | while (gisa_in_alert_list(gi->origin)) | ||
3109 | cpu_relax(); | ||
3110 | hrtimer_cancel(&gi->timer); | ||
3111 | gi->origin = NULL; | ||
3112 | } | ||
3113 | |||
3114 | /** | ||
3115 | * kvm_s390_gisc_register - register a guest ISC | ||
3116 | * | ||
3117 | * @kvm: the kernel vm to work with | ||
3118 | * @gisc: the guest interruption sub class to register | ||
3119 | * | ||
3120 | * The function extends the vm specific alert mask to use. | ||
3121 | * The effective IAM mask in the GISA is updated as well | ||
3122 | * in case the GISA is not part of the GIB alert list. | ||
3123 | * It will be updated latest when the IAM gets restored | ||
3124 | * by gisa_get_ipm_or_restore_iam(). | ||
3125 | * | ||
3126 | * Returns: the nonspecific ISC (NISC) the gib alert mechanism | ||
3127 | * has registered with the channel subsystem. | ||
3128 | * -ENODEV in case the vm uses no GISA | ||
3129 | * -ERANGE in case the guest ISC is invalid | ||
3130 | */ | ||
3131 | int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc) | ||
3132 | { | ||
3133 | struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; | ||
3134 | |||
3135 | if (!gi->origin) | ||
3136 | return -ENODEV; | ||
3137 | if (gisc > MAX_ISC) | ||
3138 | return -ERANGE; | ||
3139 | |||
3140 | spin_lock(&gi->alert.ref_lock); | ||
3141 | gi->alert.ref_count[gisc]++; | ||
3142 | if (gi->alert.ref_count[gisc] == 1) { | ||
3143 | gi->alert.mask |= 0x80 >> gisc; | ||
3144 | gisa_set_iam(gi->origin, gi->alert.mask); | ||
3145 | } | ||
3146 | spin_unlock(&gi->alert.ref_lock); | ||
3147 | |||
3148 | return gib->nisc; | ||
3149 | } | ||
3150 | EXPORT_SYMBOL_GPL(kvm_s390_gisc_register); | ||
3151 | |||
3152 | /** | ||
3153 | * kvm_s390_gisc_unregister - unregister a guest ISC | ||
3154 | * | ||
3155 | * @kvm: the kernel vm to work with | ||
3156 | * @gisc: the guest interruption sub class to register | ||
3157 | * | ||
3158 | * The function reduces the vm specific alert mask to use. | ||
3159 | * The effective IAM mask in the GISA is updated as well | ||
3160 | * in case the GISA is not part of the GIB alert list. | ||
3161 | * It will be updated latest when the IAM gets restored | ||
3162 | * by gisa_get_ipm_or_restore_iam(). | ||
3163 | * | ||
3164 | * Returns: the nonspecific ISC (NISC) the gib alert mechanism | ||
3165 | * has registered with the channel subsystem. | ||
3166 | * -ENODEV in case the vm uses no GISA | ||
3167 | * -ERANGE in case the guest ISC is invalid | ||
3168 | * -EINVAL in case the guest ISC is not registered | ||
3169 | */ | ||
3170 | int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc) | ||
3171 | { | ||
3172 | struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; | ||
3173 | int rc = 0; | ||
3174 | |||
3175 | if (!gi->origin) | ||
3176 | return -ENODEV; | ||
3177 | if (gisc > MAX_ISC) | ||
3178 | return -ERANGE; | ||
3179 | |||
3180 | spin_lock(&gi->alert.ref_lock); | ||
3181 | if (gi->alert.ref_count[gisc] == 0) { | ||
3182 | rc = -EINVAL; | ||
3183 | goto out; | ||
3184 | } | ||
3185 | gi->alert.ref_count[gisc]--; | ||
3186 | if (gi->alert.ref_count[gisc] == 0) { | ||
3187 | gi->alert.mask &= ~(0x80 >> gisc); | ||
3188 | gisa_set_iam(gi->origin, gi->alert.mask); | ||
3189 | } | ||
3190 | out: | ||
3191 | spin_unlock(&gi->alert.ref_lock); | ||
3192 | |||
3193 | return rc; | ||
3194 | } | ||
3195 | EXPORT_SYMBOL_GPL(kvm_s390_gisc_unregister); | ||
3196 | |||
3197 | static void gib_alert_irq_handler(struct airq_struct *airq) | ||
3198 | { | ||
3199 | inc_irq_stat(IRQIO_GAL); | ||
3200 | process_gib_alert_list(); | ||
3201 | } | ||
3202 | |||
3203 | static struct airq_struct gib_alert_irq = { | ||
3204 | .handler = gib_alert_irq_handler, | ||
3205 | .lsi_ptr = &gib_alert_irq.lsi_mask, | ||
3206 | }; | ||
3207 | |||
3208 | void kvm_s390_gib_destroy(void) | ||
3209 | { | ||
3210 | if (!gib) | ||
2908 | return; | 3211 | return; |
2909 | kvm->arch.gisa = NULL; | 3212 | chsc_sgib(0); |
3213 | unregister_adapter_interrupt(&gib_alert_irq); | ||
3214 | free_page((unsigned long)gib); | ||
3215 | gib = NULL; | ||
3216 | } | ||
3217 | |||
3218 | int kvm_s390_gib_init(u8 nisc) | ||
3219 | { | ||
3220 | int rc = 0; | ||
3221 | |||
3222 | if (!css_general_characteristics.aiv) { | ||
3223 | KVM_EVENT(3, "%s", "gib not initialized, no AIV facility"); | ||
3224 | goto out; | ||
3225 | } | ||
3226 | |||
3227 | gib = (struct kvm_s390_gib *)get_zeroed_page(GFP_KERNEL | GFP_DMA); | ||
3228 | if (!gib) { | ||
3229 | rc = -ENOMEM; | ||
3230 | goto out; | ||
3231 | } | ||
3232 | |||
3233 | gib_alert_irq.isc = nisc; | ||
3234 | if (register_adapter_interrupt(&gib_alert_irq)) { | ||
3235 | pr_err("Registering the GIB alert interruption handler failed\n"); | ||
3236 | rc = -EIO; | ||
3237 | goto out_free_gib; | ||
3238 | } | ||
3239 | |||
3240 | gib->nisc = nisc; | ||
3241 | if (chsc_sgib((u32)(u64)gib)) { | ||
3242 | pr_err("Associating the GIB with the AIV facility failed\n"); | ||
3243 | free_page((unsigned long)gib); | ||
3244 | gib = NULL; | ||
3245 | rc = -EIO; | ||
3246 | goto out_unreg_gal; | ||
3247 | } | ||
3248 | |||
3249 | KVM_EVENT(3, "gib 0x%pK (nisc=%d) initialized", gib, gib->nisc); | ||
3250 | goto out; | ||
3251 | |||
3252 | out_unreg_gal: | ||
3253 | unregister_adapter_interrupt(&gib_alert_irq); | ||
3254 | out_free_gib: | ||
3255 | free_page((unsigned long)gib); | ||
3256 | gib = NULL; | ||
3257 | out: | ||
3258 | return rc; | ||
2910 | } | 3259 | } |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 7f4bc58a53b9..4638303ba6a8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
@@ -432,11 +432,18 @@ int kvm_arch_init(void *opaque) | |||
432 | /* Register floating interrupt controller interface. */ | 432 | /* Register floating interrupt controller interface. */ |
433 | rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); | 433 | rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); |
434 | if (rc) { | 434 | if (rc) { |
435 | pr_err("Failed to register FLIC rc=%d\n", rc); | 435 | pr_err("A FLIC registration call failed with rc=%d\n", rc); |
436 | goto out_debug_unreg; | 436 | goto out_debug_unreg; |
437 | } | 437 | } |
438 | |||
439 | rc = kvm_s390_gib_init(GAL_ISC); | ||
440 | if (rc) | ||
441 | goto out_gib_destroy; | ||
442 | |||
438 | return 0; | 443 | return 0; |
439 | 444 | ||
445 | out_gib_destroy: | ||
446 | kvm_s390_gib_destroy(); | ||
440 | out_debug_unreg: | 447 | out_debug_unreg: |
441 | debug_unregister(kvm_s390_dbf); | 448 | debug_unregister(kvm_s390_dbf); |
442 | return rc; | 449 | return rc; |
@@ -444,6 +451,7 @@ out_debug_unreg: | |||
444 | 451 | ||
445 | void kvm_arch_exit(void) | 452 | void kvm_arch_exit(void) |
446 | { | 453 | { |
454 | kvm_s390_gib_destroy(); | ||
447 | debug_unregister(kvm_s390_dbf); | 455 | debug_unregister(kvm_s390_dbf); |
448 | } | 456 | } |
449 | 457 | ||
@@ -1258,11 +1266,65 @@ static int kvm_s390_set_processor_feat(struct kvm *kvm, | |||
1258 | static int kvm_s390_set_processor_subfunc(struct kvm *kvm, | 1266 | static int kvm_s390_set_processor_subfunc(struct kvm *kvm, |
1259 | struct kvm_device_attr *attr) | 1267 | struct kvm_device_attr *attr) |
1260 | { | 1268 | { |
1261 | /* | 1269 | mutex_lock(&kvm->lock); |
1262 | * Once supported by kernel + hw, we have to store the subfunctions | 1270 | if (kvm->created_vcpus) { |
1263 | * in kvm->arch and remember that user space configured them. | 1271 | mutex_unlock(&kvm->lock); |
1264 | */ | 1272 | return -EBUSY; |
1265 | return -ENXIO; | 1273 | } |
1274 | |||
1275 | if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr, | ||
1276 | sizeof(struct kvm_s390_vm_cpu_subfunc))) { | ||
1277 | mutex_unlock(&kvm->lock); | ||
1278 | return -EFAULT; | ||
1279 | } | ||
1280 | mutex_unlock(&kvm->lock); | ||
1281 | |||
1282 | VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", | ||
1283 | ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], | ||
1284 | ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], | ||
1285 | ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], | ||
1286 | ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); | ||
1287 | VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx", | ||
1288 | ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], | ||
1289 | ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); | ||
1290 | VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx", | ||
1291 | ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], | ||
1292 | ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); | ||
1293 | VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx", | ||
1294 | ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], | ||
1295 | ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); | ||
1296 | VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx", | ||
1297 | ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], | ||
1298 | ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); | ||
1299 | VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx", | ||
1300 | ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], | ||
1301 | ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); | ||
1302 | VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx", | ||
1303 | ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], | ||
1304 | ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); | ||
1305 | VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", | ||
1306 | ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], | ||
1307 | ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); | ||
1308 | VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", | ||
1309 | ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], | ||
1310 | ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); | ||
1311 | VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx", | ||
1312 | ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], | ||
1313 | ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); | ||
1314 | VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx", | ||
1315 | ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], | ||
1316 | ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); | ||
1317 | VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx", | ||
1318 | ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], | ||
1319 | ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); | ||
1320 | VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx", | ||
1321 | ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], | ||
1322 | ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); | ||
1323 | VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx", | ||
1324 | ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], | ||
1325 | ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); | ||
1326 | |||
1327 | return 0; | ||
1266 | } | 1328 | } |
1267 | 1329 | ||
1268 | static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) | 1330 | static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) |
@@ -1381,12 +1443,56 @@ static int kvm_s390_get_machine_feat(struct kvm *kvm, | |||
1381 | static int kvm_s390_get_processor_subfunc(struct kvm *kvm, | 1443 | static int kvm_s390_get_processor_subfunc(struct kvm *kvm, |
1382 | struct kvm_device_attr *attr) | 1444 | struct kvm_device_attr *attr) |
1383 | { | 1445 | { |
1384 | /* | 1446 | if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs, |
1385 | * Once we can actually configure subfunctions (kernel + hw support), | 1447 | sizeof(struct kvm_s390_vm_cpu_subfunc))) |
1386 | * we have to check if they were already set by user space, if so copy | 1448 | return -EFAULT; |
1387 | * them from kvm->arch. | 1449 | |
1388 | */ | 1450 | VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", |
1389 | return -ENXIO; | 1451 | ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], |
1452 | ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], | ||
1453 | ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], | ||
1454 | ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); | ||
1455 | VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx", | ||
1456 | ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], | ||
1457 | ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); | ||
1458 | VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx", | ||
1459 | ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], | ||
1460 | ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); | ||
1461 | VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx", | ||
1462 | ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], | ||
1463 | ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); | ||
1464 | VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx", | ||
1465 | ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], | ||
1466 | ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); | ||
1467 | VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx", | ||
1468 | ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], | ||
1469 | ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); | ||
1470 | VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx", | ||
1471 | ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], | ||
1472 | ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); | ||
1473 | VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", | ||
1474 | ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], | ||
1475 | ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); | ||
1476 | VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", | ||
1477 | ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], | ||
1478 | ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); | ||
1479 | VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx", | ||
1480 | ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], | ||
1481 | ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); | ||
1482 | VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx", | ||
1483 | ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], | ||
1484 | ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); | ||
1485 | VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx", | ||
1486 | ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], | ||
1487 | ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); | ||
1488 | VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx", | ||
1489 | ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], | ||
1490 | ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); | ||
1491 | VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx", | ||
1492 | ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], | ||
1493 | ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); | ||
1494 | |||
1495 | return 0; | ||
1390 | } | 1496 | } |
1391 | 1497 | ||
1392 | static int kvm_s390_get_machine_subfunc(struct kvm *kvm, | 1498 | static int kvm_s390_get_machine_subfunc(struct kvm *kvm, |
@@ -1395,8 +1501,55 @@ static int kvm_s390_get_machine_subfunc(struct kvm *kvm, | |||
1395 | if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, | 1501 | if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, |
1396 | sizeof(struct kvm_s390_vm_cpu_subfunc))) | 1502 | sizeof(struct kvm_s390_vm_cpu_subfunc))) |
1397 | return -EFAULT; | 1503 | return -EFAULT; |
1504 | |||
1505 | VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", | ||
1506 | ((unsigned long *) &kvm_s390_available_subfunc.plo)[0], | ||
1507 | ((unsigned long *) &kvm_s390_available_subfunc.plo)[1], | ||
1508 | ((unsigned long *) &kvm_s390_available_subfunc.plo)[2], | ||
1509 | ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]); | ||
1510 | VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx", | ||
1511 | ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0], | ||
1512 | ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]); | ||
1513 | VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx", | ||
1514 | ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0], | ||
1515 | ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]); | ||
1516 | VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx", | ||
1517 | ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0], | ||
1518 | ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]); | ||
1519 | VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx", | ||
1520 | ((unsigned long *) &kvm_s390_available_subfunc.km)[0], | ||
1521 | ((unsigned long *) &kvm_s390_available_subfunc.km)[1]); | ||
1522 | VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx", | ||
1523 | ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0], | ||
1524 | ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]); | ||
1525 | VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx", | ||
1526 | ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0], | ||
1527 | ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]); | ||
1528 | VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx", | ||
1529 | ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0], | ||
1530 | ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]); | ||
1531 | VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx", | ||
1532 | ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0], | ||
1533 | ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]); | ||
1534 | VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx", | ||
1535 | ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0], | ||
1536 | ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]); | ||
1537 | VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx", | ||
1538 | ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0], | ||
1539 | ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]); | ||
1540 | VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx", | ||
1541 | ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0], | ||
1542 | ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]); | ||
1543 | VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx", | ||
1544 | ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0], | ||
1545 | ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]); | ||
1546 | VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx", | ||
1547 | ((unsigned long *) &kvm_s390_available_subfunc.kma)[0], | ||
1548 | ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]); | ||
1549 | |||
1398 | return 0; | 1550 | return 0; |
1399 | } | 1551 | } |
1552 | |||
1400 | static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) | 1553 | static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) |
1401 | { | 1554 | { |
1402 | int ret = -ENXIO; | 1555 | int ret = -ENXIO; |
@@ -1514,10 +1667,9 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) | |||
1514 | case KVM_S390_VM_CPU_PROCESSOR_FEAT: | 1667 | case KVM_S390_VM_CPU_PROCESSOR_FEAT: |
1515 | case KVM_S390_VM_CPU_MACHINE_FEAT: | 1668 | case KVM_S390_VM_CPU_MACHINE_FEAT: |
1516 | case KVM_S390_VM_CPU_MACHINE_SUBFUNC: | 1669 | case KVM_S390_VM_CPU_MACHINE_SUBFUNC: |
1670 | case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: | ||
1517 | ret = 0; | 1671 | ret = 0; |
1518 | break; | 1672 | break; |
1519 | /* configuring subfunctions is not supported yet */ | ||
1520 | case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: | ||
1521 | default: | 1673 | default: |
1522 | ret = -ENXIO; | 1674 | ret = -ENXIO; |
1523 | break; | 1675 | break; |
@@ -2209,6 +2361,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
2209 | if (!kvm->arch.sie_page2) | 2361 | if (!kvm->arch.sie_page2) |
2210 | goto out_err; | 2362 | goto out_err; |
2211 | 2363 | ||
2364 | kvm->arch.sie_page2->kvm = kvm; | ||
2212 | kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; | 2365 | kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; |
2213 | 2366 | ||
2214 | for (i = 0; i < kvm_s390_fac_size(); i++) { | 2367 | for (i = 0; i < kvm_s390_fac_size(); i++) { |
@@ -2218,6 +2371,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
2218 | kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] & | 2371 | kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] & |
2219 | kvm_s390_fac_base[i]; | 2372 | kvm_s390_fac_base[i]; |
2220 | } | 2373 | } |
2374 | kvm->arch.model.subfuncs = kvm_s390_available_subfunc; | ||
2221 | 2375 | ||
2222 | /* we are always in czam mode - even on pre z14 machines */ | 2376 | /* we are always in czam mode - even on pre z14 machines */ |
2223 | set_kvm_facility(kvm->arch.model.fac_mask, 138); | 2377 | set_kvm_facility(kvm->arch.model.fac_mask, 138); |
@@ -2812,7 +2966,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, | |||
2812 | 2966 | ||
2813 | vcpu->arch.sie_block->icpua = id; | 2967 | vcpu->arch.sie_block->icpua = id; |
2814 | spin_lock_init(&vcpu->arch.local_int.lock); | 2968 | spin_lock_init(&vcpu->arch.local_int.lock); |
2815 | vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa; | 2969 | vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin; |
2816 | if (vcpu->arch.sie_block->gd && sclp.has_gisaf) | 2970 | if (vcpu->arch.sie_block->gd && sclp.has_gisaf) |
2817 | vcpu->arch.sie_block->gd |= GISA_FORMAT1; | 2971 | vcpu->arch.sie_block->gd |= GISA_FORMAT1; |
2818 | seqcount_init(&vcpu->arch.cputm_seqcount); | 2972 | seqcount_init(&vcpu->arch.cputm_seqcount); |
@@ -3458,6 +3612,8 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) | |||
3458 | kvm_s390_patch_guest_per_regs(vcpu); | 3612 | kvm_s390_patch_guest_per_regs(vcpu); |
3459 | } | 3613 | } |
3460 | 3614 | ||
3615 | clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask); | ||
3616 | |||
3461 | vcpu->arch.sie_block->icptcode = 0; | 3617 | vcpu->arch.sie_block->icptcode = 0; |
3462 | cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); | 3618 | cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); |
3463 | VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); | 3619 | VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); |
@@ -4293,12 +4449,12 @@ static int __init kvm_s390_init(void) | |||
4293 | int i; | 4449 | int i; |
4294 | 4450 | ||
4295 | if (!sclp.has_sief2) { | 4451 | if (!sclp.has_sief2) { |
4296 | pr_info("SIE not available\n"); | 4452 | pr_info("SIE is not available\n"); |
4297 | return -ENODEV; | 4453 | return -ENODEV; |
4298 | } | 4454 | } |
4299 | 4455 | ||
4300 | if (nested && hpage) { | 4456 | if (nested && hpage) { |
4301 | pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently"); | 4457 | pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n"); |
4302 | return -EINVAL; | 4458 | return -EINVAL; |
4303 | } | 4459 | } |
4304 | 4460 | ||
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 1f6e36cdce0d..6d9448dbd052 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
@@ -67,7 +67,7 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) | |||
67 | 67 | ||
68 | static inline int is_vcpu_idle(struct kvm_vcpu *vcpu) | 68 | static inline int is_vcpu_idle(struct kvm_vcpu *vcpu) |
69 | { | 69 | { |
70 | return test_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask); | 70 | return test_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask); |
71 | } | 71 | } |
72 | 72 | ||
73 | static inline int kvm_is_ucontrol(struct kvm *kvm) | 73 | static inline int kvm_is_ucontrol(struct kvm *kvm) |
@@ -381,6 +381,8 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, | |||
381 | void kvm_s390_gisa_init(struct kvm *kvm); | 381 | void kvm_s390_gisa_init(struct kvm *kvm); |
382 | void kvm_s390_gisa_clear(struct kvm *kvm); | 382 | void kvm_s390_gisa_clear(struct kvm *kvm); |
383 | void kvm_s390_gisa_destroy(struct kvm *kvm); | 383 | void kvm_s390_gisa_destroy(struct kvm *kvm); |
384 | int kvm_s390_gib_init(u8 nisc); | ||
385 | void kvm_s390_gib_destroy(void); | ||
384 | 386 | ||
385 | /* implemented in guestdbg.c */ | 387 | /* implemented in guestdbg.c */ |
386 | void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu); | 388 | void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 180373360e34..a5db4475e72d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <asm/msr-index.h> | 35 | #include <asm/msr-index.h> |
36 | #include <asm/asm.h> | 36 | #include <asm/asm.h> |
37 | #include <asm/kvm_page_track.h> | 37 | #include <asm/kvm_page_track.h> |
38 | #include <asm/kvm_vcpu_regs.h> | ||
38 | #include <asm/hyperv-tlfs.h> | 39 | #include <asm/hyperv-tlfs.h> |
39 | 40 | ||
40 | #define KVM_MAX_VCPUS 288 | 41 | #define KVM_MAX_VCPUS 288 |
@@ -137,23 +138,23 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) | |||
137 | #define ASYNC_PF_PER_VCPU 64 | 138 | #define ASYNC_PF_PER_VCPU 64 |
138 | 139 | ||
139 | enum kvm_reg { | 140 | enum kvm_reg { |
140 | VCPU_REGS_RAX = 0, | 141 | VCPU_REGS_RAX = __VCPU_REGS_RAX, |
141 | VCPU_REGS_RCX = 1, | 142 | VCPU_REGS_RCX = __VCPU_REGS_RCX, |
142 | VCPU_REGS_RDX = 2, | 143 | VCPU_REGS_RDX = __VCPU_REGS_RDX, |
143 | VCPU_REGS_RBX = 3, | 144 | VCPU_REGS_RBX = __VCPU_REGS_RBX, |
144 | VCPU_REGS_RSP = 4, | 145 | VCPU_REGS_RSP = __VCPU_REGS_RSP, |
145 | VCPU_REGS_RBP = 5, | 146 | VCPU_REGS_RBP = __VCPU_REGS_RBP, |
146 | VCPU_REGS_RSI = 6, | 147 | VCPU_REGS_RSI = __VCPU_REGS_RSI, |
147 | VCPU_REGS_RDI = 7, | 148 | VCPU_REGS_RDI = __VCPU_REGS_RDI, |
148 | #ifdef CONFIG_X86_64 | 149 | #ifdef CONFIG_X86_64 |
149 | VCPU_REGS_R8 = 8, | 150 | VCPU_REGS_R8 = __VCPU_REGS_R8, |
150 | VCPU_REGS_R9 = 9, | 151 | VCPU_REGS_R9 = __VCPU_REGS_R9, |
151 | VCPU_REGS_R10 = 10, | 152 | VCPU_REGS_R10 = __VCPU_REGS_R10, |
152 | VCPU_REGS_R11 = 11, | 153 | VCPU_REGS_R11 = __VCPU_REGS_R11, |
153 | VCPU_REGS_R12 = 12, | 154 | VCPU_REGS_R12 = __VCPU_REGS_R12, |
154 | VCPU_REGS_R13 = 13, | 155 | VCPU_REGS_R13 = __VCPU_REGS_R13, |
155 | VCPU_REGS_R14 = 14, | 156 | VCPU_REGS_R14 = __VCPU_REGS_R14, |
156 | VCPU_REGS_R15 = 15, | 157 | VCPU_REGS_R15 = __VCPU_REGS_R15, |
157 | #endif | 158 | #endif |
158 | VCPU_REGS_RIP, | 159 | VCPU_REGS_RIP, |
159 | NR_VCPU_REGS | 160 | NR_VCPU_REGS |
@@ -319,6 +320,7 @@ struct kvm_mmu_page { | |||
319 | struct list_head link; | 320 | struct list_head link; |
320 | struct hlist_node hash_link; | 321 | struct hlist_node hash_link; |
321 | bool unsync; | 322 | bool unsync; |
323 | bool mmio_cached; | ||
322 | 324 | ||
323 | /* | 325 | /* |
324 | * The following two entries are used to key the shadow page in the | 326 | * The following two entries are used to key the shadow page in the |
@@ -333,10 +335,6 @@ struct kvm_mmu_page { | |||
333 | int root_count; /* Currently serving as active root */ | 335 | int root_count; /* Currently serving as active root */ |
334 | unsigned int unsync_children; | 336 | unsigned int unsync_children; |
335 | struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ | 337 | struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ |
336 | |||
337 | /* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen. */ | ||
338 | unsigned long mmu_valid_gen; | ||
339 | |||
340 | DECLARE_BITMAP(unsync_child_bitmap, 512); | 338 | DECLARE_BITMAP(unsync_child_bitmap, 512); |
341 | 339 | ||
342 | #ifdef CONFIG_X86_32 | 340 | #ifdef CONFIG_X86_32 |
@@ -848,13 +846,11 @@ struct kvm_arch { | |||
848 | unsigned int n_requested_mmu_pages; | 846 | unsigned int n_requested_mmu_pages; |
849 | unsigned int n_max_mmu_pages; | 847 | unsigned int n_max_mmu_pages; |
850 | unsigned int indirect_shadow_pages; | 848 | unsigned int indirect_shadow_pages; |
851 | unsigned long mmu_valid_gen; | ||
852 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; | 849 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; |
853 | /* | 850 | /* |
854 | * Hash table of struct kvm_mmu_page. | 851 | * Hash table of struct kvm_mmu_page. |
855 | */ | 852 | */ |
856 | struct list_head active_mmu_pages; | 853 | struct list_head active_mmu_pages; |
857 | struct list_head zapped_obsolete_pages; | ||
858 | struct kvm_page_track_notifier_node mmu_sp_tracker; | 854 | struct kvm_page_track_notifier_node mmu_sp_tracker; |
859 | struct kvm_page_track_notifier_head track_notifier_head; | 855 | struct kvm_page_track_notifier_head track_notifier_head; |
860 | 856 | ||
@@ -1255,7 +1251,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, | |||
1255 | struct kvm_memory_slot *slot, | 1251 | struct kvm_memory_slot *slot, |
1256 | gfn_t gfn_offset, unsigned long mask); | 1252 | gfn_t gfn_offset, unsigned long mask); |
1257 | void kvm_mmu_zap_all(struct kvm *kvm); | 1253 | void kvm_mmu_zap_all(struct kvm *kvm); |
1258 | void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots); | 1254 | void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); |
1259 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); | 1255 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); |
1260 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); | 1256 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); |
1261 | 1257 | ||
diff --git a/arch/x86/include/asm/kvm_vcpu_regs.h b/arch/x86/include/asm/kvm_vcpu_regs.h new file mode 100644 index 000000000000..1af2cb59233b --- /dev/null +++ b/arch/x86/include/asm/kvm_vcpu_regs.h | |||
@@ -0,0 +1,25 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | #ifndef _ASM_X86_KVM_VCPU_REGS_H | ||
3 | #define _ASM_X86_KVM_VCPU_REGS_H | ||
4 | |||
5 | #define __VCPU_REGS_RAX 0 | ||
6 | #define __VCPU_REGS_RCX 1 | ||
7 | #define __VCPU_REGS_RDX 2 | ||
8 | #define __VCPU_REGS_RBX 3 | ||
9 | #define __VCPU_REGS_RSP 4 | ||
10 | #define __VCPU_REGS_RBP 5 | ||
11 | #define __VCPU_REGS_RSI 6 | ||
12 | #define __VCPU_REGS_RDI 7 | ||
13 | |||
14 | #ifdef CONFIG_X86_64 | ||
15 | #define __VCPU_REGS_R8 8 | ||
16 | #define __VCPU_REGS_R9 9 | ||
17 | #define __VCPU_REGS_R10 10 | ||
18 | #define __VCPU_REGS_R11 11 | ||
19 | #define __VCPU_REGS_R12 12 | ||
20 | #define __VCPU_REGS_R13 13 | ||
21 | #define __VCPU_REGS_R14 14 | ||
22 | #define __VCPU_REGS_R15 15 | ||
23 | #endif | ||
24 | |||
25 | #endif /* _ASM_X86_KVM_VCPU_REGS_H */ | ||
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index e811d4d1c824..904494b924c1 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -104,12 +104,8 @@ static u64 kvm_sched_clock_read(void) | |||
104 | 104 | ||
105 | static inline void kvm_sched_clock_init(bool stable) | 105 | static inline void kvm_sched_clock_init(bool stable) |
106 | { | 106 | { |
107 | if (!stable) { | 107 | if (!stable) |
108 | pv_ops.time.sched_clock = kvm_clock_read; | ||
109 | clear_sched_clock_stable(); | 108 | clear_sched_clock_stable(); |
110 | return; | ||
111 | } | ||
112 | |||
113 | kvm_sched_clock_offset = kvm_clock_read(); | 109 | kvm_sched_clock_offset = kvm_clock_read(); |
114 | pv_ops.time.sched_clock = kvm_sched_clock_read; | 110 | pv_ops.time.sched_clock = kvm_sched_clock_read; |
115 | 111 | ||
@@ -355,6 +351,20 @@ void __init kvmclock_init(void) | |||
355 | machine_ops.crash_shutdown = kvm_crash_shutdown; | 351 | machine_ops.crash_shutdown = kvm_crash_shutdown; |
356 | #endif | 352 | #endif |
357 | kvm_get_preset_lpj(); | 353 | kvm_get_preset_lpj(); |
354 | |||
355 | /* | ||
356 | * X86_FEATURE_NONSTOP_TSC is TSC runs at constant rate | ||
357 | * with P/T states and does not stop in deep C-states. | ||
358 | * | ||
359 | * Invariant TSC exposed by host means kvmclock is not necessary: | ||
360 | * can use TSC as clocksource. | ||
361 | * | ||
362 | */ | ||
363 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && | ||
364 | boot_cpu_has(X86_FEATURE_NONSTOP_TSC) && | ||
365 | !check_tsc_unstable()) | ||
366 | kvm_clock.rating = 299; | ||
367 | |||
358 | clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); | 368 | clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); |
359 | pv_info.name = "KVM"; | 369 | pv_info.name = "KVM"; |
360 | } | 370 | } |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c07958b59f50..fd3951638ae4 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -405,7 +405,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
405 | F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | | 405 | F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | |
406 | F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | | 406 | F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | |
407 | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | | 407 | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | |
408 | F(CLDEMOTE); | 408 | F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B); |
409 | 409 | ||
410 | /* cpuid 7.0.edx*/ | 410 | /* cpuid 7.0.edx*/ |
411 | const u32 kvm_cpuid_7_0_edx_x86_features = | 411 | const u32 kvm_cpuid_7_0_edx_x86_features = |
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 89d20ed1d2e8..27c43525a05f 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c | |||
@@ -1729,7 +1729,7 @@ static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd) | |||
1729 | 1729 | ||
1730 | mutex_lock(&hv->hv_lock); | 1730 | mutex_lock(&hv->hv_lock); |
1731 | ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1, | 1731 | ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1, |
1732 | GFP_KERNEL); | 1732 | GFP_KERNEL_ACCOUNT); |
1733 | mutex_unlock(&hv->hv_lock); | 1733 | mutex_unlock(&hv->hv_lock); |
1734 | 1734 | ||
1735 | if (ret >= 0) | 1735 | if (ret >= 0) |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index af192895b1fc..4a6dc54cc12b 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -653,7 +653,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | |||
653 | pid_t pid_nr; | 653 | pid_t pid_nr; |
654 | int ret; | 654 | int ret; |
655 | 655 | ||
656 | pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL); | 656 | pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL_ACCOUNT); |
657 | if (!pit) | 657 | if (!pit) |
658 | return NULL; | 658 | return NULL; |
659 | 659 | ||
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index bdcd4139eca9..8b38bb4868a6 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -583,7 +583,7 @@ int kvm_pic_init(struct kvm *kvm) | |||
583 | struct kvm_pic *s; | 583 | struct kvm_pic *s; |
584 | int ret; | 584 | int ret; |
585 | 585 | ||
586 | s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); | 586 | s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL_ACCOUNT); |
587 | if (!s) | 587 | if (!s) |
588 | return -ENOMEM; | 588 | return -ENOMEM; |
589 | spin_lock_init(&s->lock); | 589 | spin_lock_init(&s->lock); |
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 4e822ad363f3..1add1bc881e2 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c | |||
@@ -622,7 +622,7 @@ int kvm_ioapic_init(struct kvm *kvm) | |||
622 | struct kvm_ioapic *ioapic; | 622 | struct kvm_ioapic *ioapic; |
623 | int ret; | 623 | int ret; |
624 | 624 | ||
625 | ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); | 625 | ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL_ACCOUNT); |
626 | if (!ioapic) | 626 | if (!ioapic) |
627 | return -ENOMEM; | 627 | return -ENOMEM; |
628 | spin_lock_init(&ioapic->lock); | 628 | spin_lock_init(&ioapic->lock); |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 4b6c2da7265c..991fdf7fc17f 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -181,7 +181,8 @@ static void recalculate_apic_map(struct kvm *kvm) | |||
181 | max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic)); | 181 | max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic)); |
182 | 182 | ||
183 | new = kvzalloc(sizeof(struct kvm_apic_map) + | 183 | new = kvzalloc(sizeof(struct kvm_apic_map) + |
184 | sizeof(struct kvm_lapic *) * ((u64)max_id + 1), GFP_KERNEL); | 184 | sizeof(struct kvm_lapic *) * ((u64)max_id + 1), |
185 | GFP_KERNEL_ACCOUNT); | ||
185 | 186 | ||
186 | if (!new) | 187 | if (!new) |
187 | goto out; | 188 | goto out; |
@@ -2259,13 +2260,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) | |||
2259 | ASSERT(vcpu != NULL); | 2260 | ASSERT(vcpu != NULL); |
2260 | apic_debug("apic_init %d\n", vcpu->vcpu_id); | 2261 | apic_debug("apic_init %d\n", vcpu->vcpu_id); |
2261 | 2262 | ||
2262 | apic = kzalloc(sizeof(*apic), GFP_KERNEL); | 2263 | apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT); |
2263 | if (!apic) | 2264 | if (!apic) |
2264 | goto nomem; | 2265 | goto nomem; |
2265 | 2266 | ||
2266 | vcpu->arch.apic = apic; | 2267 | vcpu->arch.apic = apic; |
2267 | 2268 | ||
2268 | apic->regs = (void *)get_zeroed_page(GFP_KERNEL); | 2269 | apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); |
2269 | if (!apic->regs) { | 2270 | if (!apic->regs) { |
2270 | printk(KERN_ERR "malloc apic regs error for vcpu %x\n", | 2271 | printk(KERN_ERR "malloc apic regs error for vcpu %x\n", |
2271 | vcpu->vcpu_id); | 2272 | vcpu->vcpu_id); |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f2d1d230d5b8..7837ab001d80 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -109,9 +109,11 @@ module_param(dbg, bool, 0644); | |||
109 | (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1)) | 109 | (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1)) |
110 | 110 | ||
111 | 111 | ||
112 | #define PT64_BASE_ADDR_MASK __sme_clr((((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))) | 112 | #ifdef CONFIG_DYNAMIC_PHYSICAL_MASK |
113 | #define PT64_DIR_BASE_ADDR_MASK \ | 113 | #define PT64_BASE_ADDR_MASK (physical_mask & ~(u64)(PAGE_SIZE-1)) |
114 | (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1)) | 114 | #else |
115 | #define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)) | ||
116 | #endif | ||
115 | #define PT64_LVL_ADDR_MASK(level) \ | 117 | #define PT64_LVL_ADDR_MASK(level) \ |
116 | (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ | 118 | (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ |
117 | * PT64_LEVEL_BITS))) - 1)) | 119 | * PT64_LEVEL_BITS))) - 1)) |
@@ -330,53 +332,56 @@ static inline bool is_access_track_spte(u64 spte) | |||
330 | } | 332 | } |
331 | 333 | ||
332 | /* | 334 | /* |
333 | * the low bit of the generation number is always presumed to be zero. | 335 | * Due to limited space in PTEs, the MMIO generation is a 19 bit subset of |
334 | * This disables mmio caching during memslot updates. The concept is | 336 | * the memslots generation and is derived as follows: |
335 | * similar to a seqcount but instead of retrying the access we just punt | ||
336 | * and ignore the cache. | ||
337 | * | 337 | * |
338 | * spte bits 3-11 are used as bits 1-9 of the generation number, | 338 | * Bits 0-8 of the MMIO generation are propagated to spte bits 3-11 |
339 | * the bits 52-61 are used as bits 10-19 of the generation number. | 339 | * Bits 9-18 of the MMIO generation are propagated to spte bits 52-61 |
340 | * | ||
341 | * The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in | ||
342 | * the MMIO generation number, as doing so would require stealing a bit from | ||
343 | * the "real" generation number and thus effectively halve the maximum number | ||
344 | * of MMIO generations that can be handled before encountering a wrap (which | ||
345 | * requires a full MMU zap). The flag is instead explicitly queried when | ||
346 | * checking for MMIO spte cache hits. | ||
340 | */ | 347 | */ |
341 | #define MMIO_SPTE_GEN_LOW_SHIFT 2 | 348 | #define MMIO_SPTE_GEN_MASK GENMASK_ULL(18, 0) |
342 | #define MMIO_SPTE_GEN_HIGH_SHIFT 52 | ||
343 | 349 | ||
344 | #define MMIO_GEN_SHIFT 20 | 350 | #define MMIO_SPTE_GEN_LOW_START 3 |
345 | #define MMIO_GEN_LOW_SHIFT 10 | 351 | #define MMIO_SPTE_GEN_LOW_END 11 |
346 | #define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 2) | 352 | #define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \ |
347 | #define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1) | 353 | MMIO_SPTE_GEN_LOW_START) |
348 | 354 | ||
349 | static u64 generation_mmio_spte_mask(unsigned int gen) | 355 | #define MMIO_SPTE_GEN_HIGH_START 52 |
356 | #define MMIO_SPTE_GEN_HIGH_END 61 | ||
357 | #define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \ | ||
358 | MMIO_SPTE_GEN_HIGH_START) | ||
359 | static u64 generation_mmio_spte_mask(u64 gen) | ||
350 | { | 360 | { |
351 | u64 mask; | 361 | u64 mask; |
352 | 362 | ||
353 | WARN_ON(gen & ~MMIO_GEN_MASK); | 363 | WARN_ON(gen & ~MMIO_SPTE_GEN_MASK); |
354 | 364 | ||
355 | mask = (gen & MMIO_GEN_LOW_MASK) << MMIO_SPTE_GEN_LOW_SHIFT; | 365 | mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK; |
356 | mask |= ((u64)gen >> MMIO_GEN_LOW_SHIFT) << MMIO_SPTE_GEN_HIGH_SHIFT; | 366 | mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK; |
357 | return mask; | 367 | return mask; |
358 | } | 368 | } |
359 | 369 | ||
360 | static unsigned int get_mmio_spte_generation(u64 spte) | 370 | static u64 get_mmio_spte_generation(u64 spte) |
361 | { | 371 | { |
362 | unsigned int gen; | 372 | u64 gen; |
363 | 373 | ||
364 | spte &= ~shadow_mmio_mask; | 374 | spte &= ~shadow_mmio_mask; |
365 | 375 | ||
366 | gen = (spte >> MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_GEN_LOW_MASK; | 376 | gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START; |
367 | gen |= (spte >> MMIO_SPTE_GEN_HIGH_SHIFT) << MMIO_GEN_LOW_SHIFT; | 377 | gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START; |
368 | return gen; | 378 | return gen; |
369 | } | 379 | } |
370 | 380 | ||
371 | static unsigned int kvm_current_mmio_generation(struct kvm_vcpu *vcpu) | ||
372 | { | ||
373 | return kvm_vcpu_memslots(vcpu)->generation & MMIO_GEN_MASK; | ||
374 | } | ||
375 | |||
376 | static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, | 381 | static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, |
377 | unsigned access) | 382 | unsigned access) |
378 | { | 383 | { |
379 | unsigned int gen = kvm_current_mmio_generation(vcpu); | 384 | u64 gen = kvm_vcpu_memslots(vcpu)->generation & MMIO_SPTE_GEN_MASK; |
380 | u64 mask = generation_mmio_spte_mask(gen); | 385 | u64 mask = generation_mmio_spte_mask(gen); |
381 | u64 gpa = gfn << PAGE_SHIFT; | 386 | u64 gpa = gfn << PAGE_SHIFT; |
382 | 387 | ||
@@ -386,6 +391,8 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, | |||
386 | mask |= (gpa & shadow_nonpresent_or_rsvd_mask) | 391 | mask |= (gpa & shadow_nonpresent_or_rsvd_mask) |
387 | << shadow_nonpresent_or_rsvd_mask_len; | 392 | << shadow_nonpresent_or_rsvd_mask_len; |
388 | 393 | ||
394 | page_header(__pa(sptep))->mmio_cached = true; | ||
395 | |||
389 | trace_mark_mmio_spte(sptep, gfn, access, gen); | 396 | trace_mark_mmio_spte(sptep, gfn, access, gen); |
390 | mmu_spte_set(sptep, mask); | 397 | mmu_spte_set(sptep, mask); |
391 | } | 398 | } |
@@ -407,7 +414,7 @@ static gfn_t get_mmio_spte_gfn(u64 spte) | |||
407 | 414 | ||
408 | static unsigned get_mmio_spte_access(u64 spte) | 415 | static unsigned get_mmio_spte_access(u64 spte) |
409 | { | 416 | { |
410 | u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask; | 417 | u64 mask = generation_mmio_spte_mask(MMIO_SPTE_GEN_MASK) | shadow_mmio_mask; |
411 | return (spte & ~mask) & ~PAGE_MASK; | 418 | return (spte & ~mask) & ~PAGE_MASK; |
412 | } | 419 | } |
413 | 420 | ||
@@ -424,9 +431,13 @@ static bool set_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, | |||
424 | 431 | ||
425 | static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte) | 432 | static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte) |
426 | { | 433 | { |
427 | unsigned int kvm_gen, spte_gen; | 434 | u64 kvm_gen, spte_gen, gen; |
428 | 435 | ||
429 | kvm_gen = kvm_current_mmio_generation(vcpu); | 436 | gen = kvm_vcpu_memslots(vcpu)->generation; |
437 | if (unlikely(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS)) | ||
438 | return false; | ||
439 | |||
440 | kvm_gen = gen & MMIO_SPTE_GEN_MASK; | ||
430 | spte_gen = get_mmio_spte_generation(spte); | 441 | spte_gen = get_mmio_spte_generation(spte); |
431 | 442 | ||
432 | trace_check_mmio_spte(spte, kvm_gen, spte_gen); | 443 | trace_check_mmio_spte(spte, kvm_gen, spte_gen); |
@@ -959,7 +970,7 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | |||
959 | if (cache->nobjs >= min) | 970 | if (cache->nobjs >= min) |
960 | return 0; | 971 | return 0; |
961 | while (cache->nobjs < ARRAY_SIZE(cache->objects)) { | 972 | while (cache->nobjs < ARRAY_SIZE(cache->objects)) { |
962 | obj = kmem_cache_zalloc(base_cache, GFP_KERNEL); | 973 | obj = kmem_cache_zalloc(base_cache, GFP_KERNEL_ACCOUNT); |
963 | if (!obj) | 974 | if (!obj) |
964 | return cache->nobjs >= min ? 0 : -ENOMEM; | 975 | return cache->nobjs >= min ? 0 : -ENOMEM; |
965 | cache->objects[cache->nobjs++] = obj; | 976 | cache->objects[cache->nobjs++] = obj; |
@@ -2049,12 +2060,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct | |||
2049 | if (!direct) | 2060 | if (!direct) |
2050 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); | 2061 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); |
2051 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | 2062 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); |
2052 | |||
2053 | /* | ||
2054 | * The active_mmu_pages list is the FIFO list, do not move the | ||
2055 | * page until it is zapped. kvm_zap_obsolete_pages depends on | ||
2056 | * this feature. See the comments in kvm_zap_obsolete_pages(). | ||
2057 | */ | ||
2058 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | 2063 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); |
2059 | kvm_mod_used_mmu_pages(vcpu->kvm, +1); | 2064 | kvm_mod_used_mmu_pages(vcpu->kvm, +1); |
2060 | return sp; | 2065 | return sp; |
@@ -2195,23 +2200,15 @@ static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
2195 | --kvm->stat.mmu_unsync; | 2200 | --kvm->stat.mmu_unsync; |
2196 | } | 2201 | } |
2197 | 2202 | ||
2198 | static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | 2203 | static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, |
2199 | struct list_head *invalid_list); | 2204 | struct list_head *invalid_list); |
2200 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | 2205 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, |
2201 | struct list_head *invalid_list); | 2206 | struct list_head *invalid_list); |
2202 | 2207 | ||
2203 | /* | ||
2204 | * NOTE: we should pay more attention on the zapped-obsolete page | ||
2205 | * (is_obsolete_sp(sp) && sp->role.invalid) when you do hash list walk | ||
2206 | * since it has been deleted from active_mmu_pages but still can be found | ||
2207 | * at hast list. | ||
2208 | * | ||
2209 | * for_each_valid_sp() has skipped that kind of pages. | ||
2210 | */ | ||
2211 | #define for_each_valid_sp(_kvm, _sp, _gfn) \ | 2208 | #define for_each_valid_sp(_kvm, _sp, _gfn) \ |
2212 | hlist_for_each_entry(_sp, \ | 2209 | hlist_for_each_entry(_sp, \ |
2213 | &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ | 2210 | &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ |
2214 | if (is_obsolete_sp((_kvm), (_sp)) || (_sp)->role.invalid) { \ | 2211 | if ((_sp)->role.invalid) { \ |
2215 | } else | 2212 | } else |
2216 | 2213 | ||
2217 | #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \ | 2214 | #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \ |
@@ -2231,18 +2228,28 @@ static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
2231 | return true; | 2228 | return true; |
2232 | } | 2229 | } |
2233 | 2230 | ||
2231 | static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm, | ||
2232 | struct list_head *invalid_list, | ||
2233 | bool remote_flush) | ||
2234 | { | ||
2235 | if (!remote_flush && !list_empty(invalid_list)) | ||
2236 | return false; | ||
2237 | |||
2238 | if (!list_empty(invalid_list)) | ||
2239 | kvm_mmu_commit_zap_page(kvm, invalid_list); | ||
2240 | else | ||
2241 | kvm_flush_remote_tlbs(kvm); | ||
2242 | return true; | ||
2243 | } | ||
2244 | |||
2234 | static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu, | 2245 | static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu, |
2235 | struct list_head *invalid_list, | 2246 | struct list_head *invalid_list, |
2236 | bool remote_flush, bool local_flush) | 2247 | bool remote_flush, bool local_flush) |
2237 | { | 2248 | { |
2238 | if (!list_empty(invalid_list)) { | 2249 | if (kvm_mmu_remote_flush_or_zap(vcpu->kvm, invalid_list, remote_flush)) |
2239 | kvm_mmu_commit_zap_page(vcpu->kvm, invalid_list); | ||
2240 | return; | 2250 | return; |
2241 | } | ||
2242 | 2251 | ||
2243 | if (remote_flush) | 2252 | if (local_flush) |
2244 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
2245 | else if (local_flush) | ||
2246 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); | 2253 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
2247 | } | 2254 | } |
2248 | 2255 | ||
@@ -2253,11 +2260,6 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { } | |||
2253 | static void mmu_audit_disable(void) { } | 2260 | static void mmu_audit_disable(void) { } |
2254 | #endif | 2261 | #endif |
2255 | 2262 | ||
2256 | static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp) | ||
2257 | { | ||
2258 | return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen); | ||
2259 | } | ||
2260 | |||
2261 | static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 2263 | static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
2262 | struct list_head *invalid_list) | 2264 | struct list_head *invalid_list) |
2263 | { | 2265 | { |
@@ -2482,7 +2484,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
2482 | if (level > PT_PAGE_TABLE_LEVEL && need_sync) | 2484 | if (level > PT_PAGE_TABLE_LEVEL && need_sync) |
2483 | flush |= kvm_sync_pages(vcpu, gfn, &invalid_list); | 2485 | flush |= kvm_sync_pages(vcpu, gfn, &invalid_list); |
2484 | } | 2486 | } |
2485 | sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; | ||
2486 | clear_page(sp->spt); | 2487 | clear_page(sp->spt); |
2487 | trace_kvm_mmu_get_page(sp, true); | 2488 | trace_kvm_mmu_get_page(sp, true); |
2488 | 2489 | ||
@@ -2668,17 +2669,22 @@ static int mmu_zap_unsync_children(struct kvm *kvm, | |||
2668 | return zapped; | 2669 | return zapped; |
2669 | } | 2670 | } |
2670 | 2671 | ||
2671 | static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | 2672 | static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, |
2672 | struct list_head *invalid_list) | 2673 | struct kvm_mmu_page *sp, |
2674 | struct list_head *invalid_list, | ||
2675 | int *nr_zapped) | ||
2673 | { | 2676 | { |
2674 | int ret; | 2677 | bool list_unstable; |
2675 | 2678 | ||
2676 | trace_kvm_mmu_prepare_zap_page(sp); | 2679 | trace_kvm_mmu_prepare_zap_page(sp); |
2677 | ++kvm->stat.mmu_shadow_zapped; | 2680 | ++kvm->stat.mmu_shadow_zapped; |
2678 | ret = mmu_zap_unsync_children(kvm, sp, invalid_list); | 2681 | *nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list); |
2679 | kvm_mmu_page_unlink_children(kvm, sp); | 2682 | kvm_mmu_page_unlink_children(kvm, sp); |
2680 | kvm_mmu_unlink_parents(kvm, sp); | 2683 | kvm_mmu_unlink_parents(kvm, sp); |
2681 | 2684 | ||
2685 | /* Zapping children means active_mmu_pages has become unstable. */ | ||
2686 | list_unstable = *nr_zapped; | ||
2687 | |||
2682 | if (!sp->role.invalid && !sp->role.direct) | 2688 | if (!sp->role.invalid && !sp->role.direct) |
2683 | unaccount_shadowed(kvm, sp); | 2689 | unaccount_shadowed(kvm, sp); |
2684 | 2690 | ||
@@ -2686,22 +2692,27 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
2686 | kvm_unlink_unsync_page(kvm, sp); | 2692 | kvm_unlink_unsync_page(kvm, sp); |
2687 | if (!sp->root_count) { | 2693 | if (!sp->root_count) { |
2688 | /* Count self */ | 2694 | /* Count self */ |
2689 | ret++; | 2695 | (*nr_zapped)++; |
2690 | list_move(&sp->link, invalid_list); | 2696 | list_move(&sp->link, invalid_list); |
2691 | kvm_mod_used_mmu_pages(kvm, -1); | 2697 | kvm_mod_used_mmu_pages(kvm, -1); |
2692 | } else { | 2698 | } else { |
2693 | list_move(&sp->link, &kvm->arch.active_mmu_pages); | 2699 | list_move(&sp->link, &kvm->arch.active_mmu_pages); |
2694 | 2700 | ||
2695 | /* | 2701 | if (!sp->role.invalid) |
2696 | * The obsolete pages can not be used on any vcpus. | ||
2697 | * See the comments in kvm_mmu_invalidate_zap_all_pages(). | ||
2698 | */ | ||
2699 | if (!sp->role.invalid && !is_obsolete_sp(kvm, sp)) | ||
2700 | kvm_reload_remote_mmus(kvm); | 2702 | kvm_reload_remote_mmus(kvm); |
2701 | } | 2703 | } |
2702 | 2704 | ||
2703 | sp->role.invalid = 1; | 2705 | sp->role.invalid = 1; |
2704 | return ret; | 2706 | return list_unstable; |
2707 | } | ||
2708 | |||
2709 | static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | ||
2710 | struct list_head *invalid_list) | ||
2711 | { | ||
2712 | int nr_zapped; | ||
2713 | |||
2714 | __kvm_mmu_prepare_zap_page(kvm, sp, invalid_list, &nr_zapped); | ||
2715 | return nr_zapped; | ||
2705 | } | 2716 | } |
2706 | 2717 | ||
2707 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | 2718 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, |
@@ -3703,7 +3714,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | |||
3703 | 3714 | ||
3704 | u64 *lm_root; | 3715 | u64 *lm_root; |
3705 | 3716 | ||
3706 | lm_root = (void*)get_zeroed_page(GFP_KERNEL); | 3717 | lm_root = (void*)get_zeroed_page(GFP_KERNEL_ACCOUNT); |
3707 | if (lm_root == NULL) | 3718 | if (lm_root == NULL) |
3708 | return 1; | 3719 | return 1; |
3709 | 3720 | ||
@@ -4204,14 +4215,6 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, | |||
4204 | return false; | 4215 | return false; |
4205 | 4216 | ||
4206 | if (cached_root_available(vcpu, new_cr3, new_role)) { | 4217 | if (cached_root_available(vcpu, new_cr3, new_role)) { |
4207 | /* | ||
4208 | * It is possible that the cached previous root page is | ||
4209 | * obsolete because of a change in the MMU | ||
4210 | * generation number. However, that is accompanied by | ||
4211 | * KVM_REQ_MMU_RELOAD, which will free the root that we | ||
4212 | * have set here and allocate a new one. | ||
4213 | */ | ||
4214 | |||
4215 | kvm_make_request(KVM_REQ_LOAD_CR3, vcpu); | 4218 | kvm_make_request(KVM_REQ_LOAD_CR3, vcpu); |
4216 | if (!skip_tlb_flush) { | 4219 | if (!skip_tlb_flush) { |
4217 | kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); | 4220 | kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); |
@@ -5486,6 +5489,76 @@ void kvm_disable_tdp(void) | |||
5486 | } | 5489 | } |
5487 | EXPORT_SYMBOL_GPL(kvm_disable_tdp); | 5490 | EXPORT_SYMBOL_GPL(kvm_disable_tdp); |
5488 | 5491 | ||
5492 | |||
5493 | /* The return value indicates if tlb flush on all vcpus is needed. */ | ||
5494 | typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head); | ||
5495 | |||
5496 | /* The caller should hold mmu-lock before calling this function. */ | ||
5497 | static __always_inline bool | ||
5498 | slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
5499 | slot_level_handler fn, int start_level, int end_level, | ||
5500 | gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb) | ||
5501 | { | ||
5502 | struct slot_rmap_walk_iterator iterator; | ||
5503 | bool flush = false; | ||
5504 | |||
5505 | for_each_slot_rmap_range(memslot, start_level, end_level, start_gfn, | ||
5506 | end_gfn, &iterator) { | ||
5507 | if (iterator.rmap) | ||
5508 | flush |= fn(kvm, iterator.rmap); | ||
5509 | |||
5510 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { | ||
5511 | if (flush && lock_flush_tlb) { | ||
5512 | kvm_flush_remote_tlbs(kvm); | ||
5513 | flush = false; | ||
5514 | } | ||
5515 | cond_resched_lock(&kvm->mmu_lock); | ||
5516 | } | ||
5517 | } | ||
5518 | |||
5519 | if (flush && lock_flush_tlb) { | ||
5520 | kvm_flush_remote_tlbs(kvm); | ||
5521 | flush = false; | ||
5522 | } | ||
5523 | |||
5524 | return flush; | ||
5525 | } | ||
5526 | |||
5527 | static __always_inline bool | ||
5528 | slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
5529 | slot_level_handler fn, int start_level, int end_level, | ||
5530 | bool lock_flush_tlb) | ||
5531 | { | ||
5532 | return slot_handle_level_range(kvm, memslot, fn, start_level, | ||
5533 | end_level, memslot->base_gfn, | ||
5534 | memslot->base_gfn + memslot->npages - 1, | ||
5535 | lock_flush_tlb); | ||
5536 | } | ||
5537 | |||
5538 | static __always_inline bool | ||
5539 | slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
5540 | slot_level_handler fn, bool lock_flush_tlb) | ||
5541 | { | ||
5542 | return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL, | ||
5543 | PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); | ||
5544 | } | ||
5545 | |||
5546 | static __always_inline bool | ||
5547 | slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
5548 | slot_level_handler fn, bool lock_flush_tlb) | ||
5549 | { | ||
5550 | return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL + 1, | ||
5551 | PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); | ||
5552 | } | ||
5553 | |||
5554 | static __always_inline bool | ||
5555 | slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
5556 | slot_level_handler fn, bool lock_flush_tlb) | ||
5557 | { | ||
5558 | return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL, | ||
5559 | PT_PAGE_TABLE_LEVEL, lock_flush_tlb); | ||
5560 | } | ||
5561 | |||
5489 | static void free_mmu_pages(struct kvm_vcpu *vcpu) | 5562 | static void free_mmu_pages(struct kvm_vcpu *vcpu) |
5490 | { | 5563 | { |
5491 | free_page((unsigned long)vcpu->arch.mmu->pae_root); | 5564 | free_page((unsigned long)vcpu->arch.mmu->pae_root); |
@@ -5505,7 +5578,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) | |||
5505 | * Therefore we need to allocate shadow page tables in the first | 5578 | * Therefore we need to allocate shadow page tables in the first |
5506 | * 4GB of memory, which happens to fit the DMA32 zone. | 5579 | * 4GB of memory, which happens to fit the DMA32 zone. |
5507 | */ | 5580 | */ |
5508 | page = alloc_page(GFP_KERNEL | __GFP_DMA32); | 5581 | page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_DMA32); |
5509 | if (!page) | 5582 | if (!page) |
5510 | return -ENOMEM; | 5583 | return -ENOMEM; |
5511 | 5584 | ||
@@ -5543,105 +5616,62 @@ static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm, | |||
5543 | struct kvm_memory_slot *slot, | 5616 | struct kvm_memory_slot *slot, |
5544 | struct kvm_page_track_notifier_node *node) | 5617 | struct kvm_page_track_notifier_node *node) |
5545 | { | 5618 | { |
5546 | kvm_mmu_invalidate_zap_all_pages(kvm); | 5619 | struct kvm_mmu_page *sp; |
5547 | } | 5620 | LIST_HEAD(invalid_list); |
5548 | 5621 | unsigned long i; | |
5549 | void kvm_mmu_init_vm(struct kvm *kvm) | 5622 | bool flush; |
5550 | { | 5623 | gfn_t gfn; |
5551 | struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; | ||
5552 | |||
5553 | node->track_write = kvm_mmu_pte_write; | ||
5554 | node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot; | ||
5555 | kvm_page_track_register_notifier(kvm, node); | ||
5556 | } | ||
5557 | 5624 | ||
5558 | void kvm_mmu_uninit_vm(struct kvm *kvm) | 5625 | spin_lock(&kvm->mmu_lock); |
5559 | { | ||
5560 | struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; | ||
5561 | 5626 | ||
5562 | kvm_page_track_unregister_notifier(kvm, node); | 5627 | if (list_empty(&kvm->arch.active_mmu_pages)) |
5563 | } | 5628 | goto out_unlock; |
5564 | 5629 | ||
5565 | /* The return value indicates if tlb flush on all vcpus is needed. */ | 5630 | flush = slot_handle_all_level(kvm, slot, kvm_zap_rmapp, false); |
5566 | typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head); | ||
5567 | 5631 | ||
5568 | /* The caller should hold mmu-lock before calling this function. */ | 5632 | for (i = 0; i < slot->npages; i++) { |
5569 | static __always_inline bool | 5633 | gfn = slot->base_gfn + i; |
5570 | slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
5571 | slot_level_handler fn, int start_level, int end_level, | ||
5572 | gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb) | ||
5573 | { | ||
5574 | struct slot_rmap_walk_iterator iterator; | ||
5575 | bool flush = false; | ||
5576 | 5634 | ||
5577 | for_each_slot_rmap_range(memslot, start_level, end_level, start_gfn, | 5635 | for_each_valid_sp(kvm, sp, gfn) { |
5578 | end_gfn, &iterator) { | 5636 | if (sp->gfn != gfn) |
5579 | if (iterator.rmap) | 5637 | continue; |
5580 | flush |= fn(kvm, iterator.rmap); | ||
5581 | 5638 | ||
5639 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); | ||
5640 | } | ||
5582 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { | 5641 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { |
5583 | if (flush && lock_flush_tlb) { | 5642 | kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); |
5584 | kvm_flush_remote_tlbs(kvm); | 5643 | flush = false; |
5585 | flush = false; | ||
5586 | } | ||
5587 | cond_resched_lock(&kvm->mmu_lock); | 5644 | cond_resched_lock(&kvm->mmu_lock); |
5588 | } | 5645 | } |
5589 | } | 5646 | } |
5647 | kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); | ||
5590 | 5648 | ||
5591 | if (flush && lock_flush_tlb) { | 5649 | out_unlock: |
5592 | kvm_flush_remote_tlbs(kvm); | 5650 | spin_unlock(&kvm->mmu_lock); |
5593 | flush = false; | ||
5594 | } | ||
5595 | |||
5596 | return flush; | ||
5597 | } | 5651 | } |
5598 | 5652 | ||
5599 | static __always_inline bool | 5653 | void kvm_mmu_init_vm(struct kvm *kvm) |
5600 | slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
5601 | slot_level_handler fn, int start_level, int end_level, | ||
5602 | bool lock_flush_tlb) | ||
5603 | { | 5654 | { |
5604 | return slot_handle_level_range(kvm, memslot, fn, start_level, | 5655 | struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; |
5605 | end_level, memslot->base_gfn, | ||
5606 | memslot->base_gfn + memslot->npages - 1, | ||
5607 | lock_flush_tlb); | ||
5608 | } | ||
5609 | 5656 | ||
5610 | static __always_inline bool | 5657 | node->track_write = kvm_mmu_pte_write; |
5611 | slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot, | 5658 | node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot; |
5612 | slot_level_handler fn, bool lock_flush_tlb) | 5659 | kvm_page_track_register_notifier(kvm, node); |
5613 | { | ||
5614 | return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL, | ||
5615 | PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); | ||
5616 | } | 5660 | } |
5617 | 5661 | ||
5618 | static __always_inline bool | 5662 | void kvm_mmu_uninit_vm(struct kvm *kvm) |
5619 | slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
5620 | slot_level_handler fn, bool lock_flush_tlb) | ||
5621 | { | 5663 | { |
5622 | return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL + 1, | 5664 | struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; |
5623 | PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); | ||
5624 | } | ||
5625 | 5665 | ||
5626 | static __always_inline bool | 5666 | kvm_page_track_unregister_notifier(kvm, node); |
5627 | slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
5628 | slot_level_handler fn, bool lock_flush_tlb) | ||
5629 | { | ||
5630 | return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL, | ||
5631 | PT_PAGE_TABLE_LEVEL, lock_flush_tlb); | ||
5632 | } | 5667 | } |
5633 | 5668 | ||
5634 | void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) | 5669 | void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) |
5635 | { | 5670 | { |
5636 | struct kvm_memslots *slots; | 5671 | struct kvm_memslots *slots; |
5637 | struct kvm_memory_slot *memslot; | 5672 | struct kvm_memory_slot *memslot; |
5638 | bool flush_tlb = true; | ||
5639 | bool flush = false; | ||
5640 | int i; | 5673 | int i; |
5641 | 5674 | ||
5642 | if (kvm_available_flush_tlb_with_range()) | ||
5643 | flush_tlb = false; | ||
5644 | |||
5645 | spin_lock(&kvm->mmu_lock); | 5675 | spin_lock(&kvm->mmu_lock); |
5646 | for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { | 5676 | for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { |
5647 | slots = __kvm_memslots(kvm, i); | 5677 | slots = __kvm_memslots(kvm, i); |
@@ -5653,17 +5683,12 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) | |||
5653 | if (start >= end) | 5683 | if (start >= end) |
5654 | continue; | 5684 | continue; |
5655 | 5685 | ||
5656 | flush |= slot_handle_level_range(kvm, memslot, | 5686 | slot_handle_level_range(kvm, memslot, kvm_zap_rmapp, |
5657 | kvm_zap_rmapp, PT_PAGE_TABLE_LEVEL, | 5687 | PT_PAGE_TABLE_LEVEL, PT_MAX_HUGEPAGE_LEVEL, |
5658 | PT_MAX_HUGEPAGE_LEVEL, start, | 5688 | start, end - 1, true); |
5659 | end - 1, flush_tlb); | ||
5660 | } | 5689 | } |
5661 | } | 5690 | } |
5662 | 5691 | ||
5663 | if (flush) | ||
5664 | kvm_flush_remote_tlbs_with_address(kvm, gfn_start, | ||
5665 | gfn_end - gfn_start + 1); | ||
5666 | |||
5667 | spin_unlock(&kvm->mmu_lock); | 5692 | spin_unlock(&kvm->mmu_lock); |
5668 | } | 5693 | } |
5669 | 5694 | ||
@@ -5815,101 +5840,58 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm, | |||
5815 | } | 5840 | } |
5816 | EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty); | 5841 | EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty); |
5817 | 5842 | ||
5818 | #define BATCH_ZAP_PAGES 10 | 5843 | static void __kvm_mmu_zap_all(struct kvm *kvm, bool mmio_only) |
5819 | static void kvm_zap_obsolete_pages(struct kvm *kvm) | ||
5820 | { | 5844 | { |
5821 | struct kvm_mmu_page *sp, *node; | 5845 | struct kvm_mmu_page *sp, *node; |
5822 | int batch = 0; | 5846 | LIST_HEAD(invalid_list); |
5847 | int ign; | ||
5823 | 5848 | ||
5849 | spin_lock(&kvm->mmu_lock); | ||
5824 | restart: | 5850 | restart: |
5825 | list_for_each_entry_safe_reverse(sp, node, | 5851 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { |
5826 | &kvm->arch.active_mmu_pages, link) { | 5852 | if (mmio_only && !sp->mmio_cached) |
5827 | int ret; | ||
5828 | |||
5829 | /* | ||
5830 | * No obsolete page exists before new created page since | ||
5831 | * active_mmu_pages is the FIFO list. | ||
5832 | */ | ||
5833 | if (!is_obsolete_sp(kvm, sp)) | ||
5834 | break; | ||
5835 | |||
5836 | /* | ||
5837 | * Since we are reversely walking the list and the invalid | ||
5838 | * list will be moved to the head, skip the invalid page | ||
5839 | * can help us to avoid the infinity list walking. | ||
5840 | */ | ||
5841 | if (sp->role.invalid) | ||
5842 | continue; | 5853 | continue; |
5843 | 5854 | if (sp->role.invalid && sp->root_count) | |
5844 | /* | 5855 | continue; |
5845 | * Need not flush tlb since we only zap the sp with invalid | 5856 | if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign)) { |
5846 | * generation number. | 5857 | WARN_ON_ONCE(mmio_only); |
5847 | */ | ||
5848 | if (batch >= BATCH_ZAP_PAGES && | ||
5849 | cond_resched_lock(&kvm->mmu_lock)) { | ||
5850 | batch = 0; | ||
5851 | goto restart; | 5858 | goto restart; |
5852 | } | 5859 | } |
5853 | 5860 | if (cond_resched_lock(&kvm->mmu_lock)) | |
5854 | ret = kvm_mmu_prepare_zap_page(kvm, sp, | ||
5855 | &kvm->arch.zapped_obsolete_pages); | ||
5856 | batch += ret; | ||
5857 | |||
5858 | if (ret) | ||
5859 | goto restart; | 5861 | goto restart; |
5860 | } | 5862 | } |
5861 | 5863 | ||
5862 | /* | 5864 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
5863 | * Should flush tlb before free page tables since lockless-walking | ||
5864 | * may use the pages. | ||
5865 | */ | ||
5866 | kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages); | ||
5867 | } | ||
5868 | |||
5869 | /* | ||
5870 | * Fast invalidate all shadow pages and use lock-break technique | ||
5871 | * to zap obsolete pages. | ||
5872 | * | ||
5873 | * It's required when memslot is being deleted or VM is being | ||
5874 | * destroyed, in these cases, we should ensure that KVM MMU does | ||
5875 | * not use any resource of the being-deleted slot or all slots | ||
5876 | * after calling the function. | ||
5877 | */ | ||
5878 | void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm) | ||
5879 | { | ||
5880 | spin_lock(&kvm->mmu_lock); | ||
5881 | trace_kvm_mmu_invalidate_zap_all_pages(kvm); | ||
5882 | kvm->arch.mmu_valid_gen++; | ||
5883 | |||
5884 | /* | ||
5885 | * Notify all vcpus to reload its shadow page table | ||
5886 | * and flush TLB. Then all vcpus will switch to new | ||
5887 | * shadow page table with the new mmu_valid_gen. | ||
5888 | * | ||
5889 | * Note: we should do this under the protection of | ||
5890 | * mmu-lock, otherwise, vcpu would purge shadow page | ||
5891 | * but miss tlb flush. | ||
5892 | */ | ||
5893 | kvm_reload_remote_mmus(kvm); | ||
5894 | |||
5895 | kvm_zap_obsolete_pages(kvm); | ||
5896 | spin_unlock(&kvm->mmu_lock); | 5865 | spin_unlock(&kvm->mmu_lock); |
5897 | } | 5866 | } |
5898 | 5867 | ||
5899 | static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) | 5868 | void kvm_mmu_zap_all(struct kvm *kvm) |
5900 | { | 5869 | { |
5901 | return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages)); | 5870 | return __kvm_mmu_zap_all(kvm, false); |
5902 | } | 5871 | } |
5903 | 5872 | ||
5904 | void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots) | 5873 | void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen) |
5905 | { | 5874 | { |
5875 | WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS); | ||
5876 | |||
5877 | gen &= MMIO_SPTE_GEN_MASK; | ||
5878 | |||
5906 | /* | 5879 | /* |
5907 | * The very rare case: if the generation-number is round, | 5880 | * Generation numbers are incremented in multiples of the number of |
5881 | * address spaces in order to provide unique generations across all | ||
5882 | * address spaces. Strip what is effectively the address space | ||
5883 | * modifier prior to checking for a wrap of the MMIO generation so | ||
5884 | * that a wrap in any address space is detected. | ||
5885 | */ | ||
5886 | gen &= ~((u64)KVM_ADDRESS_SPACE_NUM - 1); | ||
5887 | |||
5888 | /* | ||
5889 | * The very rare case: if the MMIO generation number has wrapped, | ||
5908 | * zap all shadow pages. | 5890 | * zap all shadow pages. |
5909 | */ | 5891 | */ |
5910 | if (unlikely((slots->generation & MMIO_GEN_MASK) == 0)) { | 5892 | if (unlikely(gen == 0)) { |
5911 | kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n"); | 5893 | kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n"); |
5912 | kvm_mmu_invalidate_zap_all_pages(kvm); | 5894 | __kvm_mmu_zap_all(kvm, true); |
5913 | } | 5895 | } |
5914 | } | 5896 | } |
5915 | 5897 | ||
@@ -5940,24 +5922,16 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) | |||
5940 | * want to shrink a VM that only started to populate its MMU | 5922 | * want to shrink a VM that only started to populate its MMU |
5941 | * anyway. | 5923 | * anyway. |
5942 | */ | 5924 | */ |
5943 | if (!kvm->arch.n_used_mmu_pages && | 5925 | if (!kvm->arch.n_used_mmu_pages) |
5944 | !kvm_has_zapped_obsolete_pages(kvm)) | ||
5945 | continue; | 5926 | continue; |
5946 | 5927 | ||
5947 | idx = srcu_read_lock(&kvm->srcu); | 5928 | idx = srcu_read_lock(&kvm->srcu); |
5948 | spin_lock(&kvm->mmu_lock); | 5929 | spin_lock(&kvm->mmu_lock); |
5949 | 5930 | ||
5950 | if (kvm_has_zapped_obsolete_pages(kvm)) { | ||
5951 | kvm_mmu_commit_zap_page(kvm, | ||
5952 | &kvm->arch.zapped_obsolete_pages); | ||
5953 | goto unlock; | ||
5954 | } | ||
5955 | |||
5956 | if (prepare_zap_oldest_mmu_page(kvm, &invalid_list)) | 5931 | if (prepare_zap_oldest_mmu_page(kvm, &invalid_list)) |
5957 | freed++; | 5932 | freed++; |
5958 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 5933 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
5959 | 5934 | ||
5960 | unlock: | ||
5961 | spin_unlock(&kvm->mmu_lock); | 5935 | spin_unlock(&kvm->mmu_lock); |
5962 | srcu_read_unlock(&kvm->srcu, idx); | 5936 | srcu_read_unlock(&kvm->srcu, idx); |
5963 | 5937 | ||
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index c7b333147c4a..bbdc60f2fae8 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -203,7 +203,6 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | |||
203 | return -(u32)fault & errcode; | 203 | return -(u32)fault & errcode; |
204 | } | 204 | } |
205 | 205 | ||
206 | void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); | ||
207 | void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end); | 206 | void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end); |
208 | 207 | ||
209 | void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); | 208 | void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); |
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index c73bf4e4988c..9f6c855a0043 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
@@ -8,18 +8,16 @@ | |||
8 | #undef TRACE_SYSTEM | 8 | #undef TRACE_SYSTEM |
9 | #define TRACE_SYSTEM kvmmmu | 9 | #define TRACE_SYSTEM kvmmmu |
10 | 10 | ||
11 | #define KVM_MMU_PAGE_FIELDS \ | 11 | #define KVM_MMU_PAGE_FIELDS \ |
12 | __field(unsigned long, mmu_valid_gen) \ | 12 | __field(__u64, gfn) \ |
13 | __field(__u64, gfn) \ | 13 | __field(__u32, role) \ |
14 | __field(__u32, role) \ | 14 | __field(__u32, root_count) \ |
15 | __field(__u32, root_count) \ | ||
16 | __field(bool, unsync) | 15 | __field(bool, unsync) |
17 | 16 | ||
18 | #define KVM_MMU_PAGE_ASSIGN(sp) \ | 17 | #define KVM_MMU_PAGE_ASSIGN(sp) \ |
19 | __entry->mmu_valid_gen = sp->mmu_valid_gen; \ | 18 | __entry->gfn = sp->gfn; \ |
20 | __entry->gfn = sp->gfn; \ | 19 | __entry->role = sp->role.word; \ |
21 | __entry->role = sp->role.word; \ | 20 | __entry->root_count = sp->root_count; \ |
22 | __entry->root_count = sp->root_count; \ | ||
23 | __entry->unsync = sp->unsync; | 21 | __entry->unsync = sp->unsync; |
24 | 22 | ||
25 | #define KVM_MMU_PAGE_PRINTK() ({ \ | 23 | #define KVM_MMU_PAGE_PRINTK() ({ \ |
@@ -31,9 +29,8 @@ | |||
31 | \ | 29 | \ |
32 | role.word = __entry->role; \ | 30 | role.word = __entry->role; \ |
33 | \ | 31 | \ |
34 | trace_seq_printf(p, "sp gen %lx gfn %llx l%u%s q%u%s %s%s" \ | 32 | trace_seq_printf(p, "sp gfn %llx l%u%s q%u%s %s%s" \ |
35 | " %snxe %sad root %u %s%c", \ | 33 | " %snxe %sad root %u %s%c", \ |
36 | __entry->mmu_valid_gen, \ | ||
37 | __entry->gfn, role.level, \ | 34 | __entry->gfn, role.level, \ |
38 | role.cr4_pae ? " pae" : "", \ | 35 | role.cr4_pae ? " pae" : "", \ |
39 | role.quadrant, \ | 36 | role.quadrant, \ |
@@ -283,27 +280,6 @@ TRACE_EVENT( | |||
283 | ); | 280 | ); |
284 | 281 | ||
285 | TRACE_EVENT( | 282 | TRACE_EVENT( |
286 | kvm_mmu_invalidate_zap_all_pages, | ||
287 | TP_PROTO(struct kvm *kvm), | ||
288 | TP_ARGS(kvm), | ||
289 | |||
290 | TP_STRUCT__entry( | ||
291 | __field(unsigned long, mmu_valid_gen) | ||
292 | __field(unsigned int, mmu_used_pages) | ||
293 | ), | ||
294 | |||
295 | TP_fast_assign( | ||
296 | __entry->mmu_valid_gen = kvm->arch.mmu_valid_gen; | ||
297 | __entry->mmu_used_pages = kvm->arch.n_used_mmu_pages; | ||
298 | ), | ||
299 | |||
300 | TP_printk("kvm-mmu-valid-gen %lx used_pages %x", | ||
301 | __entry->mmu_valid_gen, __entry->mmu_used_pages | ||
302 | ) | ||
303 | ); | ||
304 | |||
305 | |||
306 | TRACE_EVENT( | ||
307 | check_mmio_spte, | 283 | check_mmio_spte, |
308 | TP_PROTO(u64 spte, unsigned int kvm_gen, unsigned int spte_gen), | 284 | TP_PROTO(u64 spte, unsigned int kvm_gen, unsigned int spte_gen), |
309 | TP_ARGS(spte, kvm_gen, spte_gen), | 285 | TP_ARGS(spte, kvm_gen, spte_gen), |
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c index 3052a59a3065..fd04d462fdae 100644 --- a/arch/x86/kvm/page_track.c +++ b/arch/x86/kvm/page_track.c | |||
@@ -42,7 +42,7 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, | |||
42 | for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) { | 42 | for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) { |
43 | slot->arch.gfn_track[i] = | 43 | slot->arch.gfn_track[i] = |
44 | kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]), | 44 | kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]), |
45 | GFP_KERNEL); | 45 | GFP_KERNEL_ACCOUNT); |
46 | if (!slot->arch.gfn_track[i]) | 46 | if (!slot->arch.gfn_track[i]) |
47 | goto track_free; | 47 | goto track_free; |
48 | } | 48 | } |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f13a3a24d360..b5b128a0a051 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -145,7 +145,6 @@ struct kvm_svm { | |||
145 | 145 | ||
146 | /* Struct members for AVIC */ | 146 | /* Struct members for AVIC */ |
147 | u32 avic_vm_id; | 147 | u32 avic_vm_id; |
148 | u32 ldr_mode; | ||
149 | struct page *avic_logical_id_table_page; | 148 | struct page *avic_logical_id_table_page; |
150 | struct page *avic_physical_id_table_page; | 149 | struct page *avic_physical_id_table_page; |
151 | struct hlist_node hnode; | 150 | struct hlist_node hnode; |
@@ -236,6 +235,7 @@ struct vcpu_svm { | |||
236 | bool nrips_enabled : 1; | 235 | bool nrips_enabled : 1; |
237 | 236 | ||
238 | u32 ldr_reg; | 237 | u32 ldr_reg; |
238 | u32 dfr_reg; | ||
239 | struct page *avic_backing_page; | 239 | struct page *avic_backing_page; |
240 | u64 *avic_physical_id_cache; | 240 | u64 *avic_physical_id_cache; |
241 | bool avic_is_running; | 241 | bool avic_is_running; |
@@ -1795,9 +1795,10 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, | |||
1795 | /* Avoid using vmalloc for smaller buffers. */ | 1795 | /* Avoid using vmalloc for smaller buffers. */ |
1796 | size = npages * sizeof(struct page *); | 1796 | size = npages * sizeof(struct page *); |
1797 | if (size > PAGE_SIZE) | 1797 | if (size > PAGE_SIZE) |
1798 | pages = vmalloc(size); | 1798 | pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO, |
1799 | PAGE_KERNEL); | ||
1799 | else | 1800 | else |
1800 | pages = kmalloc(size, GFP_KERNEL); | 1801 | pages = kmalloc(size, GFP_KERNEL_ACCOUNT); |
1801 | 1802 | ||
1802 | if (!pages) | 1803 | if (!pages) |
1803 | return NULL; | 1804 | return NULL; |
@@ -1865,7 +1866,9 @@ static void __unregister_enc_region_locked(struct kvm *kvm, | |||
1865 | 1866 | ||
1866 | static struct kvm *svm_vm_alloc(void) | 1867 | static struct kvm *svm_vm_alloc(void) |
1867 | { | 1868 | { |
1868 | struct kvm_svm *kvm_svm = vzalloc(sizeof(struct kvm_svm)); | 1869 | struct kvm_svm *kvm_svm = __vmalloc(sizeof(struct kvm_svm), |
1870 | GFP_KERNEL_ACCOUNT | __GFP_ZERO, | ||
1871 | PAGE_KERNEL); | ||
1869 | return &kvm_svm->kvm; | 1872 | return &kvm_svm->kvm; |
1870 | } | 1873 | } |
1871 | 1874 | ||
@@ -1940,7 +1943,7 @@ static int avic_vm_init(struct kvm *kvm) | |||
1940 | return 0; | 1943 | return 0; |
1941 | 1944 | ||
1942 | /* Allocating physical APIC ID table (4KB) */ | 1945 | /* Allocating physical APIC ID table (4KB) */ |
1943 | p_page = alloc_page(GFP_KERNEL); | 1946 | p_page = alloc_page(GFP_KERNEL_ACCOUNT); |
1944 | if (!p_page) | 1947 | if (!p_page) |
1945 | goto free_avic; | 1948 | goto free_avic; |
1946 | 1949 | ||
@@ -1948,7 +1951,7 @@ static int avic_vm_init(struct kvm *kvm) | |||
1948 | clear_page(page_address(p_page)); | 1951 | clear_page(page_address(p_page)); |
1949 | 1952 | ||
1950 | /* Allocating logical APIC ID table (4KB) */ | 1953 | /* Allocating logical APIC ID table (4KB) */ |
1951 | l_page = alloc_page(GFP_KERNEL); | 1954 | l_page = alloc_page(GFP_KERNEL_ACCOUNT); |
1952 | if (!l_page) | 1955 | if (!l_page) |
1953 | goto free_avic; | 1956 | goto free_avic; |
1954 | 1957 | ||
@@ -2106,6 +2109,7 @@ static int avic_init_vcpu(struct vcpu_svm *svm) | |||
2106 | 2109 | ||
2107 | INIT_LIST_HEAD(&svm->ir_list); | 2110 | INIT_LIST_HEAD(&svm->ir_list); |
2108 | spin_lock_init(&svm->ir_list_lock); | 2111 | spin_lock_init(&svm->ir_list_lock); |
2112 | svm->dfr_reg = APIC_DFR_FLAT; | ||
2109 | 2113 | ||
2110 | return ret; | 2114 | return ret; |
2111 | } | 2115 | } |
@@ -2119,13 +2123,14 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
2119 | struct page *nested_msrpm_pages; | 2123 | struct page *nested_msrpm_pages; |
2120 | int err; | 2124 | int err; |
2121 | 2125 | ||
2122 | svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); | 2126 | svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT); |
2123 | if (!svm) { | 2127 | if (!svm) { |
2124 | err = -ENOMEM; | 2128 | err = -ENOMEM; |
2125 | goto out; | 2129 | goto out; |
2126 | } | 2130 | } |
2127 | 2131 | ||
2128 | svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, GFP_KERNEL); | 2132 | svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, |
2133 | GFP_KERNEL_ACCOUNT); | ||
2129 | if (!svm->vcpu.arch.guest_fpu) { | 2134 | if (!svm->vcpu.arch.guest_fpu) { |
2130 | printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); | 2135 | printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); |
2131 | err = -ENOMEM; | 2136 | err = -ENOMEM; |
@@ -2137,19 +2142,19 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
2137 | goto free_svm; | 2142 | goto free_svm; |
2138 | 2143 | ||
2139 | err = -ENOMEM; | 2144 | err = -ENOMEM; |
2140 | page = alloc_page(GFP_KERNEL); | 2145 | page = alloc_page(GFP_KERNEL_ACCOUNT); |
2141 | if (!page) | 2146 | if (!page) |
2142 | goto uninit; | 2147 | goto uninit; |
2143 | 2148 | ||
2144 | msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); | 2149 | msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER); |
2145 | if (!msrpm_pages) | 2150 | if (!msrpm_pages) |
2146 | goto free_page1; | 2151 | goto free_page1; |
2147 | 2152 | ||
2148 | nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); | 2153 | nested_msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER); |
2149 | if (!nested_msrpm_pages) | 2154 | if (!nested_msrpm_pages) |
2150 | goto free_page2; | 2155 | goto free_page2; |
2151 | 2156 | ||
2152 | hsave_page = alloc_page(GFP_KERNEL); | 2157 | hsave_page = alloc_page(GFP_KERNEL_ACCOUNT); |
2153 | if (!hsave_page) | 2158 | if (!hsave_page) |
2154 | goto free_page3; | 2159 | goto free_page3; |
2155 | 2160 | ||
@@ -4565,8 +4570,7 @@ static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat) | |||
4565 | return &logical_apic_id_table[index]; | 4570 | return &logical_apic_id_table[index]; |
4566 | } | 4571 | } |
4567 | 4572 | ||
4568 | static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr, | 4573 | static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr) |
4569 | bool valid) | ||
4570 | { | 4574 | { |
4571 | bool flat; | 4575 | bool flat; |
4572 | u32 *entry, new_entry; | 4576 | u32 *entry, new_entry; |
@@ -4579,31 +4583,39 @@ static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr, | |||
4579 | new_entry = READ_ONCE(*entry); | 4583 | new_entry = READ_ONCE(*entry); |
4580 | new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK; | 4584 | new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK; |
4581 | new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK); | 4585 | new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK); |
4582 | if (valid) | 4586 | new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK; |
4583 | new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK; | ||
4584 | else | ||
4585 | new_entry &= ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK; | ||
4586 | WRITE_ONCE(*entry, new_entry); | 4587 | WRITE_ONCE(*entry, new_entry); |
4587 | 4588 | ||
4588 | return 0; | 4589 | return 0; |
4589 | } | 4590 | } |
4590 | 4591 | ||
4592 | static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu) | ||
4593 | { | ||
4594 | struct vcpu_svm *svm = to_svm(vcpu); | ||
4595 | bool flat = svm->dfr_reg == APIC_DFR_FLAT; | ||
4596 | u32 *entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat); | ||
4597 | |||
4598 | if (entry) | ||
4599 | WRITE_ONCE(*entry, (u32) ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK); | ||
4600 | } | ||
4601 | |||
4591 | static int avic_handle_ldr_update(struct kvm_vcpu *vcpu) | 4602 | static int avic_handle_ldr_update(struct kvm_vcpu *vcpu) |
4592 | { | 4603 | { |
4593 | int ret; | 4604 | int ret = 0; |
4594 | struct vcpu_svm *svm = to_svm(vcpu); | 4605 | struct vcpu_svm *svm = to_svm(vcpu); |
4595 | u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR); | 4606 | u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR); |
4596 | 4607 | ||
4597 | if (!ldr) | 4608 | if (ldr == svm->ldr_reg) |
4598 | return 1; | 4609 | return 0; |
4599 | 4610 | ||
4600 | ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr, true); | 4611 | avic_invalidate_logical_id_entry(vcpu); |
4601 | if (ret && svm->ldr_reg) { | 4612 | |
4602 | avic_ldr_write(vcpu, 0, svm->ldr_reg, false); | 4613 | if (ldr) |
4603 | svm->ldr_reg = 0; | 4614 | ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr); |
4604 | } else { | 4615 | |
4616 | if (!ret) | ||
4605 | svm->ldr_reg = ldr; | 4617 | svm->ldr_reg = ldr; |
4606 | } | 4618 | |
4607 | return ret; | 4619 | return ret; |
4608 | } | 4620 | } |
4609 | 4621 | ||
@@ -4637,27 +4649,16 @@ static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu) | |||
4637 | return 0; | 4649 | return 0; |
4638 | } | 4650 | } |
4639 | 4651 | ||
4640 | static int avic_handle_dfr_update(struct kvm_vcpu *vcpu) | 4652 | static void avic_handle_dfr_update(struct kvm_vcpu *vcpu) |
4641 | { | 4653 | { |
4642 | struct vcpu_svm *svm = to_svm(vcpu); | 4654 | struct vcpu_svm *svm = to_svm(vcpu); |
4643 | struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); | ||
4644 | u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR); | 4655 | u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR); |
4645 | u32 mod = (dfr >> 28) & 0xf; | ||
4646 | 4656 | ||
4647 | /* | 4657 | if (svm->dfr_reg == dfr) |
4648 | * We assume that all local APICs are using the same type. | 4658 | return; |
4649 | * If this changes, we need to flush the AVIC logical | ||
4650 | * APID id table. | ||
4651 | */ | ||
4652 | if (kvm_svm->ldr_mode == mod) | ||
4653 | return 0; | ||
4654 | |||
4655 | clear_page(page_address(kvm_svm->avic_logical_id_table_page)); | ||
4656 | kvm_svm->ldr_mode = mod; | ||
4657 | 4659 | ||
4658 | if (svm->ldr_reg) | 4660 | avic_invalidate_logical_id_entry(vcpu); |
4659 | avic_handle_ldr_update(vcpu); | 4661 | svm->dfr_reg = dfr; |
4660 | return 0; | ||
4661 | } | 4662 | } |
4662 | 4663 | ||
4663 | static int avic_unaccel_trap_write(struct vcpu_svm *svm) | 4664 | static int avic_unaccel_trap_write(struct vcpu_svm *svm) |
@@ -5125,11 +5126,11 @@ static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) | |||
5125 | struct vcpu_svm *svm = to_svm(vcpu); | 5126 | struct vcpu_svm *svm = to_svm(vcpu); |
5126 | struct vmcb *vmcb = svm->vmcb; | 5127 | struct vmcb *vmcb = svm->vmcb; |
5127 | 5128 | ||
5128 | if (!kvm_vcpu_apicv_active(&svm->vcpu)) | 5129 | if (kvm_vcpu_apicv_active(vcpu)) |
5129 | return; | 5130 | vmcb->control.int_ctl |= AVIC_ENABLE_MASK; |
5130 | 5131 | else | |
5131 | vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK; | 5132 | vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK; |
5132 | mark_dirty(vmcb, VMCB_INTR); | 5133 | mark_dirty(vmcb, VMCB_AVIC); |
5133 | } | 5134 | } |
5134 | 5135 | ||
5135 | static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | 5136 | static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) |
@@ -5195,7 +5196,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) | |||
5195 | * Allocating new amd_iommu_pi_data, which will get | 5196 | * Allocating new amd_iommu_pi_data, which will get |
5196 | * add to the per-vcpu ir_list. | 5197 | * add to the per-vcpu ir_list. |
5197 | */ | 5198 | */ |
5198 | ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL); | 5199 | ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT); |
5199 | if (!ir) { | 5200 | if (!ir) { |
5200 | ret = -ENOMEM; | 5201 | ret = -ENOMEM; |
5201 | goto out; | 5202 | goto out; |
@@ -6163,8 +6164,7 @@ static inline void avic_post_state_restore(struct kvm_vcpu *vcpu) | |||
6163 | { | 6164 | { |
6164 | if (avic_handle_apic_id_update(vcpu) != 0) | 6165 | if (avic_handle_apic_id_update(vcpu) != 0) |
6165 | return; | 6166 | return; |
6166 | if (avic_handle_dfr_update(vcpu) != 0) | 6167 | avic_handle_dfr_update(vcpu); |
6167 | return; | ||
6168 | avic_handle_ldr_update(vcpu); | 6168 | avic_handle_ldr_update(vcpu); |
6169 | } | 6169 | } |
6170 | 6170 | ||
@@ -6311,7 +6311,7 @@ static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) | |||
6311 | if (ret) | 6311 | if (ret) |
6312 | return ret; | 6312 | return ret; |
6313 | 6313 | ||
6314 | data = kzalloc(sizeof(*data), GFP_KERNEL); | 6314 | data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); |
6315 | if (!data) | 6315 | if (!data) |
6316 | return -ENOMEM; | 6316 | return -ENOMEM; |
6317 | 6317 | ||
@@ -6361,7 +6361,7 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) | |||
6361 | if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) | 6361 | if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) |
6362 | return -EFAULT; | 6362 | return -EFAULT; |
6363 | 6363 | ||
6364 | start = kzalloc(sizeof(*start), GFP_KERNEL); | 6364 | start = kzalloc(sizeof(*start), GFP_KERNEL_ACCOUNT); |
6365 | if (!start) | 6365 | if (!start) |
6366 | return -ENOMEM; | 6366 | return -ENOMEM; |
6367 | 6367 | ||
@@ -6458,7 +6458,7 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) | |||
6458 | if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) | 6458 | if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) |
6459 | return -EFAULT; | 6459 | return -EFAULT; |
6460 | 6460 | ||
6461 | data = kzalloc(sizeof(*data), GFP_KERNEL); | 6461 | data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); |
6462 | if (!data) | 6462 | if (!data) |
6463 | return -ENOMEM; | 6463 | return -ENOMEM; |
6464 | 6464 | ||
@@ -6535,7 +6535,7 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) | |||
6535 | if (copy_from_user(¶ms, measure, sizeof(params))) | 6535 | if (copy_from_user(¶ms, measure, sizeof(params))) |
6536 | return -EFAULT; | 6536 | return -EFAULT; |
6537 | 6537 | ||
6538 | data = kzalloc(sizeof(*data), GFP_KERNEL); | 6538 | data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); |
6539 | if (!data) | 6539 | if (!data) |
6540 | return -ENOMEM; | 6540 | return -ENOMEM; |
6541 | 6541 | ||
@@ -6597,7 +6597,7 @@ static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) | |||
6597 | if (!sev_guest(kvm)) | 6597 | if (!sev_guest(kvm)) |
6598 | return -ENOTTY; | 6598 | return -ENOTTY; |
6599 | 6599 | ||
6600 | data = kzalloc(sizeof(*data), GFP_KERNEL); | 6600 | data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); |
6601 | if (!data) | 6601 | if (!data) |
6602 | return -ENOMEM; | 6602 | return -ENOMEM; |
6603 | 6603 | ||
@@ -6618,7 +6618,7 @@ static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp) | |||
6618 | if (!sev_guest(kvm)) | 6618 | if (!sev_guest(kvm)) |
6619 | return -ENOTTY; | 6619 | return -ENOTTY; |
6620 | 6620 | ||
6621 | data = kzalloc(sizeof(*data), GFP_KERNEL); | 6621 | data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); |
6622 | if (!data) | 6622 | if (!data) |
6623 | return -ENOMEM; | 6623 | return -ENOMEM; |
6624 | 6624 | ||
@@ -6646,7 +6646,7 @@ static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src, | |||
6646 | struct sev_data_dbg *data; | 6646 | struct sev_data_dbg *data; |
6647 | int ret; | 6647 | int ret; |
6648 | 6648 | ||
6649 | data = kzalloc(sizeof(*data), GFP_KERNEL); | 6649 | data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); |
6650 | if (!data) | 6650 | if (!data) |
6651 | return -ENOMEM; | 6651 | return -ENOMEM; |
6652 | 6652 | ||
@@ -6901,7 +6901,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) | |||
6901 | } | 6901 | } |
6902 | 6902 | ||
6903 | ret = -ENOMEM; | 6903 | ret = -ENOMEM; |
6904 | data = kzalloc(sizeof(*data), GFP_KERNEL); | 6904 | data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); |
6905 | if (!data) | 6905 | if (!data) |
6906 | goto e_unpin_memory; | 6906 | goto e_unpin_memory; |
6907 | 6907 | ||
@@ -7007,7 +7007,7 @@ static int svm_register_enc_region(struct kvm *kvm, | |||
7007 | if (range->addr > ULONG_MAX || range->size > ULONG_MAX) | 7007 | if (range->addr > ULONG_MAX || range->size > ULONG_MAX) |
7008 | return -EINVAL; | 7008 | return -EINVAL; |
7009 | 7009 | ||
7010 | region = kzalloc(sizeof(*region), GFP_KERNEL); | 7010 | region = kzalloc(sizeof(*region), GFP_KERNEL_ACCOUNT); |
7011 | if (!region) | 7011 | if (!region) |
7012 | return -ENOMEM; | 7012 | return -ENOMEM; |
7013 | 7013 | ||
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index d737a51a53ca..f24a2c225070 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c | |||
@@ -211,7 +211,6 @@ static void free_nested(struct kvm_vcpu *vcpu) | |||
211 | if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon) | 211 | if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon) |
212 | return; | 212 | return; |
213 | 213 | ||
214 | hrtimer_cancel(&vmx->nested.preemption_timer); | ||
215 | vmx->nested.vmxon = false; | 214 | vmx->nested.vmxon = false; |
216 | vmx->nested.smm.vmxon = false; | 215 | vmx->nested.smm.vmxon = false; |
217 | free_vpid(vmx->nested.vpid02); | 216 | free_vpid(vmx->nested.vpid02); |
@@ -274,6 +273,7 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) | |||
274 | void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu) | 273 | void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu) |
275 | { | 274 | { |
276 | vcpu_load(vcpu); | 275 | vcpu_load(vcpu); |
276 | vmx_leave_nested(vcpu); | ||
277 | vmx_switch_vmcs(vcpu, &to_vmx(vcpu)->vmcs01); | 277 | vmx_switch_vmcs(vcpu, &to_vmx(vcpu)->vmcs01); |
278 | free_nested(vcpu); | 278 | free_nested(vcpu); |
279 | vcpu_put(vcpu); | 279 | vcpu_put(vcpu); |
@@ -1980,17 +1980,6 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) | |||
1980 | prepare_vmcs02_early_full(vmx, vmcs12); | 1980 | prepare_vmcs02_early_full(vmx, vmcs12); |
1981 | 1981 | ||
1982 | /* | 1982 | /* |
1983 | * HOST_RSP is normally set correctly in vmx_vcpu_run() just before | ||
1984 | * entry, but only if the current (host) sp changed from the value | ||
1985 | * we wrote last (vmx->host_rsp). This cache is no longer relevant | ||
1986 | * if we switch vmcs, and rather than hold a separate cache per vmcs, | ||
1987 | * here we just force the write to happen on entry. host_rsp will | ||
1988 | * also be written unconditionally by nested_vmx_check_vmentry_hw() | ||
1989 | * if we are doing early consistency checks via hardware. | ||
1990 | */ | ||
1991 | vmx->host_rsp = 0; | ||
1992 | |||
1993 | /* | ||
1994 | * PIN CONTROLS | 1983 | * PIN CONTROLS |
1995 | */ | 1984 | */ |
1996 | exec_control = vmcs12->pin_based_vm_exec_control; | 1985 | exec_control = vmcs12->pin_based_vm_exec_control; |
@@ -2289,10 +2278,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
2289 | } | 2278 | } |
2290 | vmx_set_rflags(vcpu, vmcs12->guest_rflags); | 2279 | vmx_set_rflags(vcpu, vmcs12->guest_rflags); |
2291 | 2280 | ||
2292 | vmx->nested.preemption_timer_expired = false; | ||
2293 | if (nested_cpu_has_preemption_timer(vmcs12)) | ||
2294 | vmx_start_preemption_timer(vcpu); | ||
2295 | |||
2296 | /* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the | 2281 | /* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the |
2297 | * bitwise-or of what L1 wants to trap for L2, and what we want to | 2282 | * bitwise-or of what L1 wants to trap for L2, and what we want to |
2298 | * trap. Note that CR0.TS also needs updating - we do this later. | 2283 | * trap. Note that CR0.TS also needs updating - we do this later. |
@@ -2722,6 +2707,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) | |||
2722 | { | 2707 | { |
2723 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2708 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2724 | unsigned long cr3, cr4; | 2709 | unsigned long cr3, cr4; |
2710 | bool vm_fail; | ||
2725 | 2711 | ||
2726 | if (!nested_early_check) | 2712 | if (!nested_early_check) |
2727 | return 0; | 2713 | return 0; |
@@ -2755,29 +2741,34 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) | |||
2755 | vmx->loaded_vmcs->host_state.cr4 = cr4; | 2741 | vmx->loaded_vmcs->host_state.cr4 = cr4; |
2756 | } | 2742 | } |
2757 | 2743 | ||
2758 | vmx->__launched = vmx->loaded_vmcs->launched; | ||
2759 | |||
2760 | asm( | 2744 | asm( |
2761 | /* Set HOST_RSP */ | ||
2762 | "sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */ | 2745 | "sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */ |
2763 | __ex("vmwrite %%" _ASM_SP ", %%" _ASM_DX) "\n\t" | 2746 | "cmp %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t" |
2764 | "mov %%" _ASM_SP ", %c[host_rsp](%1)\n\t" | 2747 | "je 1f \n\t" |
2748 | __ex("vmwrite %%" _ASM_SP ", %[HOST_RSP]") "\n\t" | ||
2749 | "mov %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t" | ||
2750 | "1: \n\t" | ||
2765 | "add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */ | 2751 | "add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */ |
2766 | 2752 | ||
2767 | /* Check if vmlaunch or vmresume is needed */ | 2753 | /* Check if vmlaunch or vmresume is needed */ |
2768 | "cmpl $0, %c[launched](%% " _ASM_CX")\n\t" | 2754 | "cmpb $0, %c[launched](%[loaded_vmcs])\n\t" |
2769 | 2755 | ||
2756 | /* | ||
2757 | * VMLAUNCH and VMRESUME clear RFLAGS.{CF,ZF} on VM-Exit, set | ||
2758 | * RFLAGS.CF on VM-Fail Invalid and set RFLAGS.ZF on VM-Fail | ||
2759 | * Valid. vmx_vmenter() directly "returns" RFLAGS, and so the | ||
2760 | * results of VM-Enter is captured via CC_{SET,OUT} to vm_fail. | ||
2761 | */ | ||
2770 | "call vmx_vmenter\n\t" | 2762 | "call vmx_vmenter\n\t" |
2771 | 2763 | ||
2772 | /* Set vmx->fail accordingly */ | 2764 | CC_SET(be) |
2773 | "setbe %c[fail](%% " _ASM_CX")\n\t" | 2765 | : ASM_CALL_CONSTRAINT, CC_OUT(be) (vm_fail) |
2774 | : ASM_CALL_CONSTRAINT | 2766 | : [HOST_RSP]"r"((unsigned long)HOST_RSP), |
2775 | : "c"(vmx), "d"((unsigned long)HOST_RSP), | 2767 | [loaded_vmcs]"r"(vmx->loaded_vmcs), |
2776 | [launched]"i"(offsetof(struct vcpu_vmx, __launched)), | 2768 | [launched]"i"(offsetof(struct loaded_vmcs, launched)), |
2777 | [fail]"i"(offsetof(struct vcpu_vmx, fail)), | 2769 | [host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)), |
2778 | [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)), | ||
2779 | [wordsize]"i"(sizeof(ulong)) | 2770 | [wordsize]"i"(sizeof(ulong)) |
2780 | : "rax", "cc", "memory" | 2771 | : "cc", "memory" |
2781 | ); | 2772 | ); |
2782 | 2773 | ||
2783 | preempt_enable(); | 2774 | preempt_enable(); |
@@ -2787,10 +2778,9 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) | |||
2787 | if (vmx->msr_autoload.guest.nr) | 2778 | if (vmx->msr_autoload.guest.nr) |
2788 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); | 2779 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); |
2789 | 2780 | ||
2790 | if (vmx->fail) { | 2781 | if (vm_fail) { |
2791 | WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) != | 2782 | WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) != |
2792 | VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 2783 | VMXERR_ENTRY_INVALID_CONTROL_FIELD); |
2793 | vmx->fail = 0; | ||
2794 | return 1; | 2784 | return 1; |
2795 | } | 2785 | } |
2796 | 2786 | ||
@@ -2813,8 +2803,6 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) | |||
2813 | 2803 | ||
2814 | return 0; | 2804 | return 0; |
2815 | } | 2805 | } |
2816 | STACK_FRAME_NON_STANDARD(nested_vmx_check_vmentry_hw); | ||
2817 | |||
2818 | 2806 | ||
2819 | static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, | 2807 | static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, |
2820 | struct vmcs12 *vmcs12); | 2808 | struct vmcs12 *vmcs12); |
@@ -3031,6 +3019,15 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) | |||
3031 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 3019 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
3032 | 3020 | ||
3033 | /* | 3021 | /* |
3022 | * Do not start the preemption timer hrtimer until after we know | ||
3023 | * we are successful, so that only nested_vmx_vmexit needs to cancel | ||
3024 | * the timer. | ||
3025 | */ | ||
3026 | vmx->nested.preemption_timer_expired = false; | ||
3027 | if (nested_cpu_has_preemption_timer(vmcs12)) | ||
3028 | vmx_start_preemption_timer(vcpu); | ||
3029 | |||
3030 | /* | ||
3034 | * Note no nested_vmx_succeed or nested_vmx_fail here. At this point | 3031 | * Note no nested_vmx_succeed or nested_vmx_fail here. At this point |
3035 | * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet | 3032 | * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet |
3036 | * returned as far as L1 is concerned. It will only return (and set | 3033 | * returned as far as L1 is concerned. It will only return (and set |
@@ -3450,13 +3447,10 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
3450 | else | 3447 | else |
3451 | vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE; | 3448 | vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE; |
3452 | 3449 | ||
3453 | if (nested_cpu_has_preemption_timer(vmcs12)) { | 3450 | if (nested_cpu_has_preemption_timer(vmcs12) && |
3454 | if (vmcs12->vm_exit_controls & | 3451 | vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) |
3455 | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) | ||
3456 | vmcs12->vmx_preemption_timer_value = | 3452 | vmcs12->vmx_preemption_timer_value = |
3457 | vmx_get_preemption_timer_value(vcpu); | 3453 | vmx_get_preemption_timer_value(vcpu); |
3458 | hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer); | ||
3459 | } | ||
3460 | 3454 | ||
3461 | /* | 3455 | /* |
3462 | * In some cases (usually, nested EPT), L2 is allowed to change its | 3456 | * In some cases (usually, nested EPT), L2 is allowed to change its |
@@ -3864,6 +3858,9 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
3864 | 3858 | ||
3865 | leave_guest_mode(vcpu); | 3859 | leave_guest_mode(vcpu); |
3866 | 3860 | ||
3861 | if (nested_cpu_has_preemption_timer(vmcs12)) | ||
3862 | hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer); | ||
3863 | |||
3867 | if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) | 3864 | if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) |
3868 | vcpu->arch.tsc_offset -= vmcs12->tsc_offset; | 3865 | vcpu->arch.tsc_offset -= vmcs12->tsc_offset; |
3869 | 3866 | ||
@@ -3915,9 +3912,6 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
3915 | vmx_flush_tlb(vcpu, true); | 3912 | vmx_flush_tlb(vcpu, true); |
3916 | } | 3913 | } |
3917 | 3914 | ||
3918 | /* This is needed for same reason as it was needed in prepare_vmcs02 */ | ||
3919 | vmx->host_rsp = 0; | ||
3920 | |||
3921 | /* Unpin physical memory we referred to in vmcs02 */ | 3915 | /* Unpin physical memory we referred to in vmcs02 */ |
3922 | if (vmx->nested.apic_access_page) { | 3916 | if (vmx->nested.apic_access_page) { |
3923 | kvm_release_page_dirty(vmx->nested.apic_access_page); | 3917 | kvm_release_page_dirty(vmx->nested.apic_access_page); |
@@ -4035,25 +4029,50 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, | |||
4035 | /* Addr = segment_base + offset */ | 4029 | /* Addr = segment_base + offset */ |
4036 | /* offset = base + [index * scale] + displacement */ | 4030 | /* offset = base + [index * scale] + displacement */ |
4037 | off = exit_qualification; /* holds the displacement */ | 4031 | off = exit_qualification; /* holds the displacement */ |
4032 | if (addr_size == 1) | ||
4033 | off = (gva_t)sign_extend64(off, 31); | ||
4034 | else if (addr_size == 0) | ||
4035 | off = (gva_t)sign_extend64(off, 15); | ||
4038 | if (base_is_valid) | 4036 | if (base_is_valid) |
4039 | off += kvm_register_read(vcpu, base_reg); | 4037 | off += kvm_register_read(vcpu, base_reg); |
4040 | if (index_is_valid) | 4038 | if (index_is_valid) |
4041 | off += kvm_register_read(vcpu, index_reg)<<scaling; | 4039 | off += kvm_register_read(vcpu, index_reg)<<scaling; |
4042 | vmx_get_segment(vcpu, &s, seg_reg); | 4040 | vmx_get_segment(vcpu, &s, seg_reg); |
4043 | *ret = s.base + off; | ||
4044 | 4041 | ||
4042 | /* | ||
4043 | * The effective address, i.e. @off, of a memory operand is truncated | ||
4044 | * based on the address size of the instruction. Note that this is | ||
4045 | * the *effective address*, i.e. the address prior to accounting for | ||
4046 | * the segment's base. | ||
4047 | */ | ||
4045 | if (addr_size == 1) /* 32 bit */ | 4048 | if (addr_size == 1) /* 32 bit */ |
4046 | *ret &= 0xffffffff; | 4049 | off &= 0xffffffff; |
4050 | else if (addr_size == 0) /* 16 bit */ | ||
4051 | off &= 0xffff; | ||
4047 | 4052 | ||
4048 | /* Checks for #GP/#SS exceptions. */ | 4053 | /* Checks for #GP/#SS exceptions. */ |
4049 | exn = false; | 4054 | exn = false; |
4050 | if (is_long_mode(vcpu)) { | 4055 | if (is_long_mode(vcpu)) { |
4056 | /* | ||
4057 | * The virtual/linear address is never truncated in 64-bit | ||
4058 | * mode, e.g. a 32-bit address size can yield a 64-bit virtual | ||
4059 | * address when using FS/GS with a non-zero base. | ||
4060 | */ | ||
4061 | *ret = s.base + off; | ||
4062 | |||
4051 | /* Long mode: #GP(0)/#SS(0) if the memory address is in a | 4063 | /* Long mode: #GP(0)/#SS(0) if the memory address is in a |
4052 | * non-canonical form. This is the only check on the memory | 4064 | * non-canonical form. This is the only check on the memory |
4053 | * destination for long mode! | 4065 | * destination for long mode! |
4054 | */ | 4066 | */ |
4055 | exn = is_noncanonical_address(*ret, vcpu); | 4067 | exn = is_noncanonical_address(*ret, vcpu); |
4056 | } else if (is_protmode(vcpu)) { | 4068 | } else { |
4069 | /* | ||
4070 | * When not in long mode, the virtual/linear address is | ||
4071 | * unconditionally truncated to 32 bits regardless of the | ||
4072 | * address size. | ||
4073 | */ | ||
4074 | *ret = (s.base + off) & 0xffffffff; | ||
4075 | |||
4057 | /* Protected mode: apply checks for segment validity in the | 4076 | /* Protected mode: apply checks for segment validity in the |
4058 | * following order: | 4077 | * following order: |
4059 | * - segment type check (#GP(0) may be thrown) | 4078 | * - segment type check (#GP(0) may be thrown) |
@@ -4077,10 +4096,16 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, | |||
4077 | /* Protected mode: #GP(0)/#SS(0) if the segment is unusable. | 4096 | /* Protected mode: #GP(0)/#SS(0) if the segment is unusable. |
4078 | */ | 4097 | */ |
4079 | exn = (s.unusable != 0); | 4098 | exn = (s.unusable != 0); |
4080 | /* Protected mode: #GP(0)/#SS(0) if the memory | 4099 | |
4081 | * operand is outside the segment limit. | 4100 | /* |
4101 | * Protected mode: #GP(0)/#SS(0) if the memory operand is | ||
4102 | * outside the segment limit. All CPUs that support VMX ignore | ||
4103 | * limit checks for flat segments, i.e. segments with base==0, | ||
4104 | * limit==0xffffffff and of type expand-up data or code. | ||
4082 | */ | 4105 | */ |
4083 | exn = exn || (off + sizeof(u64) > s.limit); | 4106 | if (!(s.base == 0 && s.limit == 0xffffffff && |
4107 | ((s.type & 8) || !(s.type & 4)))) | ||
4108 | exn = exn || (off + sizeof(u64) > s.limit); | ||
4084 | } | 4109 | } |
4085 | if (exn) { | 4110 | if (exn) { |
4086 | kvm_queue_exception_e(vcpu, | 4111 | kvm_queue_exception_e(vcpu, |
@@ -4145,11 +4170,11 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) | |||
4145 | if (r < 0) | 4170 | if (r < 0) |
4146 | goto out_vmcs02; | 4171 | goto out_vmcs02; |
4147 | 4172 | ||
4148 | vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL); | 4173 | vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT); |
4149 | if (!vmx->nested.cached_vmcs12) | 4174 | if (!vmx->nested.cached_vmcs12) |
4150 | goto out_cached_vmcs12; | 4175 | goto out_cached_vmcs12; |
4151 | 4176 | ||
4152 | vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL); | 4177 | vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT); |
4153 | if (!vmx->nested.cached_shadow_vmcs12) | 4178 | if (!vmx->nested.cached_shadow_vmcs12) |
4154 | goto out_cached_shadow_vmcs12; | 4179 | goto out_cached_shadow_vmcs12; |
4155 | 4180 | ||
@@ -5696,6 +5721,10 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) | |||
5696 | enable_shadow_vmcs = 0; | 5721 | enable_shadow_vmcs = 0; |
5697 | if (enable_shadow_vmcs) { | 5722 | if (enable_shadow_vmcs) { |
5698 | for (i = 0; i < VMX_BITMAP_NR; i++) { | 5723 | for (i = 0; i < VMX_BITMAP_NR; i++) { |
5724 | /* | ||
5725 | * The vmx_bitmap is not tied to a VM and so should | ||
5726 | * not be charged to a memcg. | ||
5727 | */ | ||
5699 | vmx_bitmap[i] = (unsigned long *) | 5728 | vmx_bitmap[i] = (unsigned long *) |
5700 | __get_free_page(GFP_KERNEL); | 5729 | __get_free_page(GFP_KERNEL); |
5701 | if (!vmx_bitmap[i]) { | 5730 | if (!vmx_bitmap[i]) { |
diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index 6def3ba88e3b..cb6079f8a227 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h | |||
@@ -34,6 +34,7 @@ struct vmcs_host_state { | |||
34 | unsigned long cr4; /* May not match real cr4 */ | 34 | unsigned long cr4; /* May not match real cr4 */ |
35 | unsigned long gs_base; | 35 | unsigned long gs_base; |
36 | unsigned long fs_base; | 36 | unsigned long fs_base; |
37 | unsigned long rsp; | ||
37 | 38 | ||
38 | u16 fs_sel, gs_sel, ldt_sel; | 39 | u16 fs_sel, gs_sel, ldt_sel; |
39 | #ifdef CONFIG_X86_64 | 40 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index bcef2c7e9bc4..7b272738c576 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S | |||
@@ -1,6 +1,30 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #include <linux/linkage.h> | 2 | #include <linux/linkage.h> |
3 | #include <asm/asm.h> | 3 | #include <asm/asm.h> |
4 | #include <asm/bitsperlong.h> | ||
5 | #include <asm/kvm_vcpu_regs.h> | ||
6 | |||
7 | #define WORD_SIZE (BITS_PER_LONG / 8) | ||
8 | |||
9 | #define VCPU_RAX __VCPU_REGS_RAX * WORD_SIZE | ||
10 | #define VCPU_RCX __VCPU_REGS_RCX * WORD_SIZE | ||
11 | #define VCPU_RDX __VCPU_REGS_RDX * WORD_SIZE | ||
12 | #define VCPU_RBX __VCPU_REGS_RBX * WORD_SIZE | ||
13 | /* Intentionally omit RSP as it's context switched by hardware */ | ||
14 | #define VCPU_RBP __VCPU_REGS_RBP * WORD_SIZE | ||
15 | #define VCPU_RSI __VCPU_REGS_RSI * WORD_SIZE | ||
16 | #define VCPU_RDI __VCPU_REGS_RDI * WORD_SIZE | ||
17 | |||
18 | #ifdef CONFIG_X86_64 | ||
19 | #define VCPU_R8 __VCPU_REGS_R8 * WORD_SIZE | ||
20 | #define VCPU_R9 __VCPU_REGS_R9 * WORD_SIZE | ||
21 | #define VCPU_R10 __VCPU_REGS_R10 * WORD_SIZE | ||
22 | #define VCPU_R11 __VCPU_REGS_R11 * WORD_SIZE | ||
23 | #define VCPU_R12 __VCPU_REGS_R12 * WORD_SIZE | ||
24 | #define VCPU_R13 __VCPU_REGS_R13 * WORD_SIZE | ||
25 | #define VCPU_R14 __VCPU_REGS_R14 * WORD_SIZE | ||
26 | #define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE | ||
27 | #endif | ||
4 | 28 | ||
5 | .text | 29 | .text |
6 | 30 | ||
@@ -55,3 +79,146 @@ ENDPROC(vmx_vmenter) | |||
55 | ENTRY(vmx_vmexit) | 79 | ENTRY(vmx_vmexit) |
56 | ret | 80 | ret |
57 | ENDPROC(vmx_vmexit) | 81 | ENDPROC(vmx_vmexit) |
82 | |||
83 | /** | ||
84 | * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode | ||
85 | * @vmx: struct vcpu_vmx * | ||
86 | * @regs: unsigned long * (to guest registers) | ||
87 | * @launched: %true if the VMCS has been launched | ||
88 | * | ||
89 | * Returns: | ||
90 | * 0 on VM-Exit, 1 on VM-Fail | ||
91 | */ | ||
92 | ENTRY(__vmx_vcpu_run) | ||
93 | push %_ASM_BP | ||
94 | mov %_ASM_SP, %_ASM_BP | ||
95 | #ifdef CONFIG_X86_64 | ||
96 | push %r15 | ||
97 | push %r14 | ||
98 | push %r13 | ||
99 | push %r12 | ||
100 | #else | ||
101 | push %edi | ||
102 | push %esi | ||
103 | #endif | ||
104 | push %_ASM_BX | ||
105 | |||
106 | /* | ||
107 | * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and | ||
108 | * @regs is needed after VM-Exit to save the guest's register values. | ||
109 | */ | ||
110 | push %_ASM_ARG2 | ||
111 | |||
112 | /* Copy @launched to BL, _ASM_ARG3 is volatile. */ | ||
113 | mov %_ASM_ARG3B, %bl | ||
114 | |||
115 | /* Adjust RSP to account for the CALL to vmx_vmenter(). */ | ||
116 | lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2 | ||
117 | call vmx_update_host_rsp | ||
118 | |||
119 | /* Load @regs to RAX. */ | ||
120 | mov (%_ASM_SP), %_ASM_AX | ||
121 | |||
122 | /* Check if vmlaunch or vmresume is needed */ | ||
123 | cmpb $0, %bl | ||
124 | |||
125 | /* Load guest registers. Don't clobber flags. */ | ||
126 | mov VCPU_RBX(%_ASM_AX), %_ASM_BX | ||
127 | mov VCPU_RCX(%_ASM_AX), %_ASM_CX | ||
128 | mov VCPU_RDX(%_ASM_AX), %_ASM_DX | ||
129 | mov VCPU_RSI(%_ASM_AX), %_ASM_SI | ||
130 | mov VCPU_RDI(%_ASM_AX), %_ASM_DI | ||
131 | mov VCPU_RBP(%_ASM_AX), %_ASM_BP | ||
132 | #ifdef CONFIG_X86_64 | ||
133 | mov VCPU_R8 (%_ASM_AX), %r8 | ||
134 | mov VCPU_R9 (%_ASM_AX), %r9 | ||
135 | mov VCPU_R10(%_ASM_AX), %r10 | ||
136 | mov VCPU_R11(%_ASM_AX), %r11 | ||
137 | mov VCPU_R12(%_ASM_AX), %r12 | ||
138 | mov VCPU_R13(%_ASM_AX), %r13 | ||
139 | mov VCPU_R14(%_ASM_AX), %r14 | ||
140 | mov VCPU_R15(%_ASM_AX), %r15 | ||
141 | #endif | ||
142 | /* Load guest RAX. This kills the vmx_vcpu pointer! */ | ||
143 | mov VCPU_RAX(%_ASM_AX), %_ASM_AX | ||
144 | |||
145 | /* Enter guest mode */ | ||
146 | call vmx_vmenter | ||
147 | |||
148 | /* Jump on VM-Fail. */ | ||
149 | jbe 2f | ||
150 | |||
151 | /* Temporarily save guest's RAX. */ | ||
152 | push %_ASM_AX | ||
153 | |||
154 | /* Reload @regs to RAX. */ | ||
155 | mov WORD_SIZE(%_ASM_SP), %_ASM_AX | ||
156 | |||
157 | /* Save all guest registers, including RAX from the stack */ | ||
158 | __ASM_SIZE(pop) VCPU_RAX(%_ASM_AX) | ||
159 | mov %_ASM_BX, VCPU_RBX(%_ASM_AX) | ||
160 | mov %_ASM_CX, VCPU_RCX(%_ASM_AX) | ||
161 | mov %_ASM_DX, VCPU_RDX(%_ASM_AX) | ||
162 | mov %_ASM_SI, VCPU_RSI(%_ASM_AX) | ||
163 | mov %_ASM_DI, VCPU_RDI(%_ASM_AX) | ||
164 | mov %_ASM_BP, VCPU_RBP(%_ASM_AX) | ||
165 | #ifdef CONFIG_X86_64 | ||
166 | mov %r8, VCPU_R8 (%_ASM_AX) | ||
167 | mov %r9, VCPU_R9 (%_ASM_AX) | ||
168 | mov %r10, VCPU_R10(%_ASM_AX) | ||
169 | mov %r11, VCPU_R11(%_ASM_AX) | ||
170 | mov %r12, VCPU_R12(%_ASM_AX) | ||
171 | mov %r13, VCPU_R13(%_ASM_AX) | ||
172 | mov %r14, VCPU_R14(%_ASM_AX) | ||
173 | mov %r15, VCPU_R15(%_ASM_AX) | ||
174 | #endif | ||
175 | |||
176 | /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */ | ||
177 | xor %eax, %eax | ||
178 | |||
179 | /* | ||
180 | * Clear all general purpose registers except RSP and RAX to prevent | ||
181 | * speculative use of the guest's values, even those that are reloaded | ||
182 | * via the stack. In theory, an L1 cache miss when restoring registers | ||
183 | * could lead to speculative execution with the guest's values. | ||
184 | * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially | ||
185 | * free. RSP and RAX are exempt as RSP is restored by hardware during | ||
186 | * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail. | ||
187 | */ | ||
188 | 1: xor %ebx, %ebx | ||
189 | xor %ecx, %ecx | ||
190 | xor %edx, %edx | ||
191 | xor %esi, %esi | ||
192 | xor %edi, %edi | ||
193 | xor %ebp, %ebp | ||
194 | #ifdef CONFIG_X86_64 | ||
195 | xor %r8d, %r8d | ||
196 | xor %r9d, %r9d | ||
197 | xor %r10d, %r10d | ||
198 | xor %r11d, %r11d | ||
199 | xor %r12d, %r12d | ||
200 | xor %r13d, %r13d | ||
201 | xor %r14d, %r14d | ||
202 | xor %r15d, %r15d | ||
203 | #endif | ||
204 | |||
205 | /* "POP" @regs. */ | ||
206 | add $WORD_SIZE, %_ASM_SP | ||
207 | pop %_ASM_BX | ||
208 | |||
209 | #ifdef CONFIG_X86_64 | ||
210 | pop %r12 | ||
211 | pop %r13 | ||
212 | pop %r14 | ||
213 | pop %r15 | ||
214 | #else | ||
215 | pop %esi | ||
216 | pop %edi | ||
217 | #endif | ||
218 | pop %_ASM_BP | ||
219 | ret | ||
220 | |||
221 | /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */ | ||
222 | 2: mov $1, %eax | ||
223 | jmp 1b | ||
224 | ENDPROC(__vmx_vcpu_run) | ||
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 30a6bcd735ec..c73375e01ab8 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c | |||
@@ -246,6 +246,10 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) | |||
246 | 246 | ||
247 | if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages && | 247 | if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages && |
248 | !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) { | 248 | !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) { |
249 | /* | ||
250 | * This allocation for vmx_l1d_flush_pages is not tied to a VM | ||
251 | * lifetime and so should not be charged to a memcg. | ||
252 | */ | ||
249 | page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER); | 253 | page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER); |
250 | if (!page) | 254 | if (!page) |
251 | return -ENOMEM; | 255 | return -ENOMEM; |
@@ -2387,13 +2391,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, | |||
2387 | return 0; | 2391 | return 0; |
2388 | } | 2392 | } |
2389 | 2393 | ||
2390 | struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu) | 2394 | struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) |
2391 | { | 2395 | { |
2392 | int node = cpu_to_node(cpu); | 2396 | int node = cpu_to_node(cpu); |
2393 | struct page *pages; | 2397 | struct page *pages; |
2394 | struct vmcs *vmcs; | 2398 | struct vmcs *vmcs; |
2395 | 2399 | ||
2396 | pages = __alloc_pages_node(node, GFP_KERNEL, vmcs_config.order); | 2400 | pages = __alloc_pages_node(node, flags, vmcs_config.order); |
2397 | if (!pages) | 2401 | if (!pages) |
2398 | return NULL; | 2402 | return NULL; |
2399 | vmcs = page_address(pages); | 2403 | vmcs = page_address(pages); |
@@ -2440,7 +2444,8 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) | |||
2440 | loaded_vmcs_init(loaded_vmcs); | 2444 | loaded_vmcs_init(loaded_vmcs); |
2441 | 2445 | ||
2442 | if (cpu_has_vmx_msr_bitmap()) { | 2446 | if (cpu_has_vmx_msr_bitmap()) { |
2443 | loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); | 2447 | loaded_vmcs->msr_bitmap = (unsigned long *) |
2448 | __get_free_page(GFP_KERNEL_ACCOUNT); | ||
2444 | if (!loaded_vmcs->msr_bitmap) | 2449 | if (!loaded_vmcs->msr_bitmap) |
2445 | goto out_vmcs; | 2450 | goto out_vmcs; |
2446 | memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); | 2451 | memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); |
@@ -2481,7 +2486,7 @@ static __init int alloc_kvm_area(void) | |||
2481 | for_each_possible_cpu(cpu) { | 2486 | for_each_possible_cpu(cpu) { |
2482 | struct vmcs *vmcs; | 2487 | struct vmcs *vmcs; |
2483 | 2488 | ||
2484 | vmcs = alloc_vmcs_cpu(false, cpu); | 2489 | vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL); |
2485 | if (!vmcs) { | 2490 | if (!vmcs) { |
2486 | free_kvm_area(); | 2491 | free_kvm_area(); |
2487 | return -ENOMEM; | 2492 | return -ENOMEM; |
@@ -6360,150 +6365,15 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) | |||
6360 | vmx->loaded_vmcs->hv_timer_armed = false; | 6365 | vmx->loaded_vmcs->hv_timer_armed = false; |
6361 | } | 6366 | } |
6362 | 6367 | ||
6363 | static void __vmx_vcpu_run(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) | 6368 | void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) |
6364 | { | 6369 | { |
6365 | unsigned long evmcs_rsp; | 6370 | if (unlikely(host_rsp != vmx->loaded_vmcs->host_state.rsp)) { |
6366 | 6371 | vmx->loaded_vmcs->host_state.rsp = host_rsp; | |
6367 | vmx->__launched = vmx->loaded_vmcs->launched; | 6372 | vmcs_writel(HOST_RSP, host_rsp); |
6368 | 6373 | } | |
6369 | evmcs_rsp = static_branch_unlikely(&enable_evmcs) ? | ||
6370 | (unsigned long)¤t_evmcs->host_rsp : 0; | ||
6371 | |||
6372 | if (static_branch_unlikely(&vmx_l1d_should_flush)) | ||
6373 | vmx_l1d_flush(vcpu); | ||
6374 | |||
6375 | asm( | ||
6376 | /* Store host registers */ | ||
6377 | "push %%" _ASM_DX "; push %%" _ASM_BP ";" | ||
6378 | "push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */ | ||
6379 | "push %%" _ASM_CX " \n\t" | ||
6380 | "sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */ | ||
6381 | "cmp %%" _ASM_SP ", %c[host_rsp](%%" _ASM_CX ") \n\t" | ||
6382 | "je 1f \n\t" | ||
6383 | "mov %%" _ASM_SP ", %c[host_rsp](%%" _ASM_CX ") \n\t" | ||
6384 | /* Avoid VMWRITE when Enlightened VMCS is in use */ | ||
6385 | "test %%" _ASM_SI ", %%" _ASM_SI " \n\t" | ||
6386 | "jz 2f \n\t" | ||
6387 | "mov %%" _ASM_SP ", (%%" _ASM_SI ") \n\t" | ||
6388 | "jmp 1f \n\t" | ||
6389 | "2: \n\t" | ||
6390 | __ex("vmwrite %%" _ASM_SP ", %%" _ASM_DX) "\n\t" | ||
6391 | "1: \n\t" | ||
6392 | "add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */ | ||
6393 | |||
6394 | /* Reload cr2 if changed */ | ||
6395 | "mov %c[cr2](%%" _ASM_CX "), %%" _ASM_AX " \n\t" | ||
6396 | "mov %%cr2, %%" _ASM_DX " \n\t" | ||
6397 | "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t" | ||
6398 | "je 3f \n\t" | ||
6399 | "mov %%" _ASM_AX", %%cr2 \n\t" | ||
6400 | "3: \n\t" | ||
6401 | /* Check if vmlaunch or vmresume is needed */ | ||
6402 | "cmpl $0, %c[launched](%%" _ASM_CX ") \n\t" | ||
6403 | /* Load guest registers. Don't clobber flags. */ | ||
6404 | "mov %c[rax](%%" _ASM_CX "), %%" _ASM_AX " \n\t" | ||
6405 | "mov %c[rbx](%%" _ASM_CX "), %%" _ASM_BX " \n\t" | ||
6406 | "mov %c[rdx](%%" _ASM_CX "), %%" _ASM_DX " \n\t" | ||
6407 | "mov %c[rsi](%%" _ASM_CX "), %%" _ASM_SI " \n\t" | ||
6408 | "mov %c[rdi](%%" _ASM_CX "), %%" _ASM_DI " \n\t" | ||
6409 | "mov %c[rbp](%%" _ASM_CX "), %%" _ASM_BP " \n\t" | ||
6410 | #ifdef CONFIG_X86_64 | ||
6411 | "mov %c[r8](%%" _ASM_CX "), %%r8 \n\t" | ||
6412 | "mov %c[r9](%%" _ASM_CX "), %%r9 \n\t" | ||
6413 | "mov %c[r10](%%" _ASM_CX "), %%r10 \n\t" | ||
6414 | "mov %c[r11](%%" _ASM_CX "), %%r11 \n\t" | ||
6415 | "mov %c[r12](%%" _ASM_CX "), %%r12 \n\t" | ||
6416 | "mov %c[r13](%%" _ASM_CX "), %%r13 \n\t" | ||
6417 | "mov %c[r14](%%" _ASM_CX "), %%r14 \n\t" | ||
6418 | "mov %c[r15](%%" _ASM_CX "), %%r15 \n\t" | ||
6419 | #endif | ||
6420 | /* Load guest RCX. This kills the vmx_vcpu pointer! */ | ||
6421 | "mov %c[rcx](%%" _ASM_CX "), %%" _ASM_CX " \n\t" | ||
6422 | |||
6423 | /* Enter guest mode */ | ||
6424 | "call vmx_vmenter\n\t" | ||
6425 | |||
6426 | /* Save guest's RCX to the stack placeholder (see above) */ | ||
6427 | "mov %%" _ASM_CX ", %c[wordsize](%%" _ASM_SP ") \n\t" | ||
6428 | |||
6429 | /* Load host's RCX, i.e. the vmx_vcpu pointer */ | ||
6430 | "pop %%" _ASM_CX " \n\t" | ||
6431 | |||
6432 | /* Set vmx->fail based on EFLAGS.{CF,ZF} */ | ||
6433 | "setbe %c[fail](%%" _ASM_CX ")\n\t" | ||
6434 | |||
6435 | /* Save all guest registers, including RCX from the stack */ | ||
6436 | "mov %%" _ASM_AX ", %c[rax](%%" _ASM_CX ") \n\t" | ||
6437 | "mov %%" _ASM_BX ", %c[rbx](%%" _ASM_CX ") \n\t" | ||
6438 | __ASM_SIZE(pop) " %c[rcx](%%" _ASM_CX ") \n\t" | ||
6439 | "mov %%" _ASM_DX ", %c[rdx](%%" _ASM_CX ") \n\t" | ||
6440 | "mov %%" _ASM_SI ", %c[rsi](%%" _ASM_CX ") \n\t" | ||
6441 | "mov %%" _ASM_DI ", %c[rdi](%%" _ASM_CX ") \n\t" | ||
6442 | "mov %%" _ASM_BP ", %c[rbp](%%" _ASM_CX ") \n\t" | ||
6443 | #ifdef CONFIG_X86_64 | ||
6444 | "mov %%r8, %c[r8](%%" _ASM_CX ") \n\t" | ||
6445 | "mov %%r9, %c[r9](%%" _ASM_CX ") \n\t" | ||
6446 | "mov %%r10, %c[r10](%%" _ASM_CX ") \n\t" | ||
6447 | "mov %%r11, %c[r11](%%" _ASM_CX ") \n\t" | ||
6448 | "mov %%r12, %c[r12](%%" _ASM_CX ") \n\t" | ||
6449 | "mov %%r13, %c[r13](%%" _ASM_CX ") \n\t" | ||
6450 | "mov %%r14, %c[r14](%%" _ASM_CX ") \n\t" | ||
6451 | "mov %%r15, %c[r15](%%" _ASM_CX ") \n\t" | ||
6452 | /* | ||
6453 | * Clear host registers marked as clobbered to prevent | ||
6454 | * speculative use. | ||
6455 | */ | ||
6456 | "xor %%r8d, %%r8d \n\t" | ||
6457 | "xor %%r9d, %%r9d \n\t" | ||
6458 | "xor %%r10d, %%r10d \n\t" | ||
6459 | "xor %%r11d, %%r11d \n\t" | ||
6460 | "xor %%r12d, %%r12d \n\t" | ||
6461 | "xor %%r13d, %%r13d \n\t" | ||
6462 | "xor %%r14d, %%r14d \n\t" | ||
6463 | "xor %%r15d, %%r15d \n\t" | ||
6464 | #endif | ||
6465 | "mov %%cr2, %%" _ASM_AX " \n\t" | ||
6466 | "mov %%" _ASM_AX ", %c[cr2](%%" _ASM_CX ") \n\t" | ||
6467 | |||
6468 | "xor %%eax, %%eax \n\t" | ||
6469 | "xor %%ebx, %%ebx \n\t" | ||
6470 | "xor %%esi, %%esi \n\t" | ||
6471 | "xor %%edi, %%edi \n\t" | ||
6472 | "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t" | ||
6473 | : ASM_CALL_CONSTRAINT | ||
6474 | : "c"(vmx), "d"((unsigned long)HOST_RSP), "S"(evmcs_rsp), | ||
6475 | [launched]"i"(offsetof(struct vcpu_vmx, __launched)), | ||
6476 | [fail]"i"(offsetof(struct vcpu_vmx, fail)), | ||
6477 | [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)), | ||
6478 | [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])), | ||
6479 | [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])), | ||
6480 | [rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])), | ||
6481 | [rdx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDX])), | ||
6482 | [rsi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RSI])), | ||
6483 | [rdi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDI])), | ||
6484 | [rbp]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBP])), | ||
6485 | #ifdef CONFIG_X86_64 | ||
6486 | [r8]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R8])), | ||
6487 | [r9]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R9])), | ||
6488 | [r10]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R10])), | ||
6489 | [r11]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R11])), | ||
6490 | [r12]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R12])), | ||
6491 | [r13]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R13])), | ||
6492 | [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])), | ||
6493 | [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])), | ||
6494 | #endif | ||
6495 | [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)), | ||
6496 | [wordsize]"i"(sizeof(ulong)) | ||
6497 | : "cc", "memory" | ||
6498 | #ifdef CONFIG_X86_64 | ||
6499 | , "rax", "rbx", "rdi" | ||
6500 | , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" | ||
6501 | #else | ||
6502 | , "eax", "ebx", "edi" | ||
6503 | #endif | ||
6504 | ); | ||
6505 | } | 6374 | } |
6506 | STACK_FRAME_NON_STANDARD(__vmx_vcpu_run); | 6375 | |
6376 | bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); | ||
6507 | 6377 | ||
6508 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | 6378 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu) |
6509 | { | 6379 | { |
@@ -6572,7 +6442,16 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6572 | */ | 6442 | */ |
6573 | x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); | 6443 | x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); |
6574 | 6444 | ||
6575 | __vmx_vcpu_run(vcpu, vmx); | 6445 | if (static_branch_unlikely(&vmx_l1d_should_flush)) |
6446 | vmx_l1d_flush(vcpu); | ||
6447 | |||
6448 | if (vcpu->arch.cr2 != read_cr2()) | ||
6449 | write_cr2(vcpu->arch.cr2); | ||
6450 | |||
6451 | vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, | ||
6452 | vmx->loaded_vmcs->launched); | ||
6453 | |||
6454 | vcpu->arch.cr2 = read_cr2(); | ||
6576 | 6455 | ||
6577 | /* | 6456 | /* |
6578 | * We do not use IBRS in the kernel. If this vCPU has used the | 6457 | * We do not use IBRS in the kernel. If this vCPU has used the |
@@ -6657,7 +6536,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6657 | 6536 | ||
6658 | static struct kvm *vmx_vm_alloc(void) | 6537 | static struct kvm *vmx_vm_alloc(void) |
6659 | { | 6538 | { |
6660 | struct kvm_vmx *kvm_vmx = vzalloc(sizeof(struct kvm_vmx)); | 6539 | struct kvm_vmx *kvm_vmx = __vmalloc(sizeof(struct kvm_vmx), |
6540 | GFP_KERNEL_ACCOUNT | __GFP_ZERO, | ||
6541 | PAGE_KERNEL); | ||
6661 | return &kvm_vmx->kvm; | 6542 | return &kvm_vmx->kvm; |
6662 | } | 6543 | } |
6663 | 6544 | ||
@@ -6673,7 +6554,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
6673 | if (enable_pml) | 6554 | if (enable_pml) |
6674 | vmx_destroy_pml_buffer(vmx); | 6555 | vmx_destroy_pml_buffer(vmx); |
6675 | free_vpid(vmx->vpid); | 6556 | free_vpid(vmx->vpid); |
6676 | leave_guest_mode(vcpu); | ||
6677 | nested_vmx_free_vcpu(vcpu); | 6557 | nested_vmx_free_vcpu(vcpu); |
6678 | free_loaded_vmcs(vmx->loaded_vmcs); | 6558 | free_loaded_vmcs(vmx->loaded_vmcs); |
6679 | kfree(vmx->guest_msrs); | 6559 | kfree(vmx->guest_msrs); |
@@ -6685,14 +6565,16 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
6685 | static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | 6565 | static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) |
6686 | { | 6566 | { |
6687 | int err; | 6567 | int err; |
6688 | struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); | 6568 | struct vcpu_vmx *vmx; |
6689 | unsigned long *msr_bitmap; | 6569 | unsigned long *msr_bitmap; |
6690 | int cpu; | 6570 | int cpu; |
6691 | 6571 | ||
6572 | vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT); | ||
6692 | if (!vmx) | 6573 | if (!vmx) |
6693 | return ERR_PTR(-ENOMEM); | 6574 | return ERR_PTR(-ENOMEM); |
6694 | 6575 | ||
6695 | vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, GFP_KERNEL); | 6576 | vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, |
6577 | GFP_KERNEL_ACCOUNT); | ||
6696 | if (!vmx->vcpu.arch.guest_fpu) { | 6578 | if (!vmx->vcpu.arch.guest_fpu) { |
6697 | printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); | 6579 | printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); |
6698 | err = -ENOMEM; | 6580 | err = -ENOMEM; |
@@ -6714,12 +6596,12 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
6714 | * for the guest, etc. | 6596 | * for the guest, etc. |
6715 | */ | 6597 | */ |
6716 | if (enable_pml) { | 6598 | if (enable_pml) { |
6717 | vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO); | 6599 | vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); |
6718 | if (!vmx->pml_pg) | 6600 | if (!vmx->pml_pg) |
6719 | goto uninit_vcpu; | 6601 | goto uninit_vcpu; |
6720 | } | 6602 | } |
6721 | 6603 | ||
6722 | vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); | 6604 | vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT); |
6723 | BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0]) | 6605 | BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0]) |
6724 | > PAGE_SIZE); | 6606 | > PAGE_SIZE); |
6725 | 6607 | ||
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 0ac0a64c7790..1554cb45b393 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h | |||
@@ -175,7 +175,6 @@ struct nested_vmx { | |||
175 | 175 | ||
176 | struct vcpu_vmx { | 176 | struct vcpu_vmx { |
177 | struct kvm_vcpu vcpu; | 177 | struct kvm_vcpu vcpu; |
178 | unsigned long host_rsp; | ||
179 | u8 fail; | 178 | u8 fail; |
180 | u8 msr_bitmap_mode; | 179 | u8 msr_bitmap_mode; |
181 | u32 exit_intr_info; | 180 | u32 exit_intr_info; |
@@ -209,7 +208,7 @@ struct vcpu_vmx { | |||
209 | struct loaded_vmcs vmcs01; | 208 | struct loaded_vmcs vmcs01; |
210 | struct loaded_vmcs *loaded_vmcs; | 209 | struct loaded_vmcs *loaded_vmcs; |
211 | struct loaded_vmcs *loaded_cpu_state; | 210 | struct loaded_vmcs *loaded_cpu_state; |
212 | bool __launched; /* temporary, used in vmx_vcpu_run */ | 211 | |
213 | struct msr_autoload { | 212 | struct msr_autoload { |
214 | struct vmx_msrs guest; | 213 | struct vmx_msrs guest; |
215 | struct vmx_msrs host; | 214 | struct vmx_msrs host; |
@@ -339,8 +338,8 @@ static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) | |||
339 | 338 | ||
340 | static inline void pi_set_sn(struct pi_desc *pi_desc) | 339 | static inline void pi_set_sn(struct pi_desc *pi_desc) |
341 | { | 340 | { |
342 | return set_bit(POSTED_INTR_SN, | 341 | set_bit(POSTED_INTR_SN, |
343 | (unsigned long *)&pi_desc->control); | 342 | (unsigned long *)&pi_desc->control); |
344 | } | 343 | } |
345 | 344 | ||
346 | static inline void pi_set_on(struct pi_desc *pi_desc) | 345 | static inline void pi_set_on(struct pi_desc *pi_desc) |
@@ -445,7 +444,8 @@ static inline u32 vmx_vmentry_ctrl(void) | |||
445 | { | 444 | { |
446 | u32 vmentry_ctrl = vmcs_config.vmentry_ctrl; | 445 | u32 vmentry_ctrl = vmcs_config.vmentry_ctrl; |
447 | if (pt_mode == PT_MODE_SYSTEM) | 446 | if (pt_mode == PT_MODE_SYSTEM) |
448 | vmentry_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP | VM_EXIT_CLEAR_IA32_RTIT_CTL); | 447 | vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP | |
448 | VM_ENTRY_LOAD_IA32_RTIT_CTL); | ||
449 | /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ | 449 | /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ |
450 | return vmentry_ctrl & | 450 | return vmentry_ctrl & |
451 | ~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER); | 451 | ~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER); |
@@ -455,9 +455,10 @@ static inline u32 vmx_vmexit_ctrl(void) | |||
455 | { | 455 | { |
456 | u32 vmexit_ctrl = vmcs_config.vmexit_ctrl; | 456 | u32 vmexit_ctrl = vmcs_config.vmexit_ctrl; |
457 | if (pt_mode == PT_MODE_SYSTEM) | 457 | if (pt_mode == PT_MODE_SYSTEM) |
458 | vmexit_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP | VM_ENTRY_LOAD_IA32_RTIT_CTL); | 458 | vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP | |
459 | VM_EXIT_CLEAR_IA32_RTIT_CTL); | ||
459 | /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ | 460 | /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ |
460 | return vmcs_config.vmexit_ctrl & | 461 | return vmexit_ctrl & |
461 | ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER); | 462 | ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER); |
462 | } | 463 | } |
463 | 464 | ||
@@ -478,7 +479,7 @@ static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) | |||
478 | return &(to_vmx(vcpu)->pi_desc); | 479 | return &(to_vmx(vcpu)->pi_desc); |
479 | } | 480 | } |
480 | 481 | ||
481 | struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu); | 482 | struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags); |
482 | void free_vmcs(struct vmcs *vmcs); | 483 | void free_vmcs(struct vmcs *vmcs); |
483 | int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs); | 484 | int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs); |
484 | void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs); | 485 | void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs); |
@@ -487,7 +488,8 @@ void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs); | |||
487 | 488 | ||
488 | static inline struct vmcs *alloc_vmcs(bool shadow) | 489 | static inline struct vmcs *alloc_vmcs(bool shadow) |
489 | { | 490 | { |
490 | return alloc_vmcs_cpu(shadow, raw_smp_processor_id()); | 491 | return alloc_vmcs_cpu(shadow, raw_smp_processor_id(), |
492 | GFP_KERNEL_ACCOUNT); | ||
491 | } | 493 | } |
492 | 494 | ||
493 | u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa); | 495 | u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa); |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 941f932373d0..65e4559eef2f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -3879,7 +3879,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
3879 | r = -EINVAL; | 3879 | r = -EINVAL; |
3880 | if (!lapic_in_kernel(vcpu)) | 3880 | if (!lapic_in_kernel(vcpu)) |
3881 | goto out; | 3881 | goto out; |
3882 | u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); | 3882 | u.lapic = kzalloc(sizeof(struct kvm_lapic_state), |
3883 | GFP_KERNEL_ACCOUNT); | ||
3883 | 3884 | ||
3884 | r = -ENOMEM; | 3885 | r = -ENOMEM; |
3885 | if (!u.lapic) | 3886 | if (!u.lapic) |
@@ -4066,7 +4067,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
4066 | break; | 4067 | break; |
4067 | } | 4068 | } |
4068 | case KVM_GET_XSAVE: { | 4069 | case KVM_GET_XSAVE: { |
4069 | u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL); | 4070 | u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT); |
4070 | r = -ENOMEM; | 4071 | r = -ENOMEM; |
4071 | if (!u.xsave) | 4072 | if (!u.xsave) |
4072 | break; | 4073 | break; |
@@ -4090,7 +4091,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
4090 | break; | 4091 | break; |
4091 | } | 4092 | } |
4092 | case KVM_GET_XCRS: { | 4093 | case KVM_GET_XCRS: { |
4093 | u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL); | 4094 | u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT); |
4094 | r = -ENOMEM; | 4095 | r = -ENOMEM; |
4095 | if (!u.xcrs) | 4096 | if (!u.xcrs) |
4096 | break; | 4097 | break; |
@@ -7055,6 +7056,13 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) | |||
7055 | 7056 | ||
7056 | void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) | 7057 | void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) |
7057 | { | 7058 | { |
7059 | if (!lapic_in_kernel(vcpu)) { | ||
7060 | WARN_ON_ONCE(vcpu->arch.apicv_active); | ||
7061 | return; | ||
7062 | } | ||
7063 | if (!vcpu->arch.apicv_active) | ||
7064 | return; | ||
7065 | |||
7058 | vcpu->arch.apicv_active = false; | 7066 | vcpu->arch.apicv_active = false; |
7059 | kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu); | 7067 | kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu); |
7060 | } | 7068 | } |
@@ -9005,7 +9013,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
9005 | struct page *page; | 9013 | struct page *page; |
9006 | int r; | 9014 | int r; |
9007 | 9015 | ||
9008 | vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu); | ||
9009 | vcpu->arch.emulate_ctxt.ops = &emulate_ops; | 9016 | vcpu->arch.emulate_ctxt.ops = &emulate_ops; |
9010 | if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu)) | 9017 | if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu)) |
9011 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 9018 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
@@ -9026,6 +9033,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
9026 | goto fail_free_pio_data; | 9033 | goto fail_free_pio_data; |
9027 | 9034 | ||
9028 | if (irqchip_in_kernel(vcpu->kvm)) { | 9035 | if (irqchip_in_kernel(vcpu->kvm)) { |
9036 | vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu); | ||
9029 | r = kvm_create_lapic(vcpu); | 9037 | r = kvm_create_lapic(vcpu); |
9030 | if (r < 0) | 9038 | if (r < 0) |
9031 | goto fail_mmu_destroy; | 9039 | goto fail_mmu_destroy; |
@@ -9033,14 +9041,15 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
9033 | static_key_slow_inc(&kvm_no_apic_vcpu); | 9041 | static_key_slow_inc(&kvm_no_apic_vcpu); |
9034 | 9042 | ||
9035 | vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4, | 9043 | vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4, |
9036 | GFP_KERNEL); | 9044 | GFP_KERNEL_ACCOUNT); |
9037 | if (!vcpu->arch.mce_banks) { | 9045 | if (!vcpu->arch.mce_banks) { |
9038 | r = -ENOMEM; | 9046 | r = -ENOMEM; |
9039 | goto fail_free_lapic; | 9047 | goto fail_free_lapic; |
9040 | } | 9048 | } |
9041 | vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; | 9049 | vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; |
9042 | 9050 | ||
9043 | if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) { | 9051 | if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, |
9052 | GFP_KERNEL_ACCOUNT)) { | ||
9044 | r = -ENOMEM; | 9053 | r = -ENOMEM; |
9045 | goto fail_free_mce_banks; | 9054 | goto fail_free_mce_banks; |
9046 | } | 9055 | } |
@@ -9104,7 +9113,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
9104 | 9113 | ||
9105 | INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); | 9114 | INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); |
9106 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 9115 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
9107 | INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); | ||
9108 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | 9116 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); |
9109 | atomic_set(&kvm->arch.noncoherent_dma_count, 0); | 9117 | atomic_set(&kvm->arch.noncoherent_dma_count, 0); |
9110 | 9118 | ||
@@ -9299,13 +9307,13 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | |||
9299 | 9307 | ||
9300 | slot->arch.rmap[i] = | 9308 | slot->arch.rmap[i] = |
9301 | kvcalloc(lpages, sizeof(*slot->arch.rmap[i]), | 9309 | kvcalloc(lpages, sizeof(*slot->arch.rmap[i]), |
9302 | GFP_KERNEL); | 9310 | GFP_KERNEL_ACCOUNT); |
9303 | if (!slot->arch.rmap[i]) | 9311 | if (!slot->arch.rmap[i]) |
9304 | goto out_free; | 9312 | goto out_free; |
9305 | if (i == 0) | 9313 | if (i == 0) |
9306 | continue; | 9314 | continue; |
9307 | 9315 | ||
9308 | linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL); | 9316 | linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); |
9309 | if (!linfo) | 9317 | if (!linfo) |
9310 | goto out_free; | 9318 | goto out_free; |
9311 | 9319 | ||
@@ -9348,13 +9356,13 @@ out_free: | |||
9348 | return -ENOMEM; | 9356 | return -ENOMEM; |
9349 | } | 9357 | } |
9350 | 9358 | ||
9351 | void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) | 9359 | void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) |
9352 | { | 9360 | { |
9353 | /* | 9361 | /* |
9354 | * memslots->generation has been incremented. | 9362 | * memslots->generation has been incremented. |
9355 | * mmio generation may have reached its maximum value. | 9363 | * mmio generation may have reached its maximum value. |
9356 | */ | 9364 | */ |
9357 | kvm_mmu_invalidate_mmio_sptes(kvm, slots); | 9365 | kvm_mmu_invalidate_mmio_sptes(kvm, gen); |
9358 | } | 9366 | } |
9359 | 9367 | ||
9360 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 9368 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
@@ -9462,7 +9470,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
9462 | 9470 | ||
9463 | void kvm_arch_flush_shadow_all(struct kvm *kvm) | 9471 | void kvm_arch_flush_shadow_all(struct kvm *kvm) |
9464 | { | 9472 | { |
9465 | kvm_mmu_invalidate_zap_all_pages(kvm); | 9473 | kvm_mmu_zap_all(kvm); |
9466 | } | 9474 | } |
9467 | 9475 | ||
9468 | void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | 9476 | void kvm_arch_flush_shadow_memslot(struct kvm *kvm, |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 224cd0a47568..28406aa1136d 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -181,6 +181,11 @@ static inline bool emul_is_noncanonical_address(u64 la, | |||
181 | static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, | 181 | static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, |
182 | gva_t gva, gfn_t gfn, unsigned access) | 182 | gva_t gva, gfn_t gfn, unsigned access) |
183 | { | 183 | { |
184 | u64 gen = kvm_memslots(vcpu->kvm)->generation; | ||
185 | |||
186 | if (unlikely(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS)) | ||
187 | return; | ||
188 | |||
184 | /* | 189 | /* |
185 | * If this is a shadow nested page table, the "GVA" is | 190 | * If this is a shadow nested page table, the "GVA" is |
186 | * actually a nGPA. | 191 | * actually a nGPA. |
@@ -188,7 +193,7 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, | |||
188 | vcpu->arch.mmio_gva = mmu_is_nested(vcpu) ? 0 : gva & PAGE_MASK; | 193 | vcpu->arch.mmio_gva = mmu_is_nested(vcpu) ? 0 : gva & PAGE_MASK; |
189 | vcpu->arch.access = access; | 194 | vcpu->arch.access = access; |
190 | vcpu->arch.mmio_gfn = gfn; | 195 | vcpu->arch.mmio_gfn = gfn; |
191 | vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation; | 196 | vcpu->arch.mmio_gen = gen; |
192 | } | 197 | } |
193 | 198 | ||
194 | static inline bool vcpu_match_mmio_gen(struct kvm_vcpu *vcpu) | 199 | static inline bool vcpu_match_mmio_gen(struct kvm_vcpu *vcpu) |
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index a8b20b65bd4b..aa4ec53281ce 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c | |||
@@ -1261,6 +1261,13 @@ static enum arch_timer_ppi_nr __init arch_timer_select_ppi(void) | |||
1261 | return ARCH_TIMER_PHYS_SECURE_PPI; | 1261 | return ARCH_TIMER_PHYS_SECURE_PPI; |
1262 | } | 1262 | } |
1263 | 1263 | ||
1264 | static void __init arch_timer_populate_kvm_info(void) | ||
1265 | { | ||
1266 | arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI]; | ||
1267 | if (is_kernel_in_hyp_mode()) | ||
1268 | arch_timer_kvm_info.physical_irq = arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]; | ||
1269 | } | ||
1270 | |||
1264 | static int __init arch_timer_of_init(struct device_node *np) | 1271 | static int __init arch_timer_of_init(struct device_node *np) |
1265 | { | 1272 | { |
1266 | int i, ret; | 1273 | int i, ret; |
@@ -1275,7 +1282,7 @@ static int __init arch_timer_of_init(struct device_node *np) | |||
1275 | for (i = ARCH_TIMER_PHYS_SECURE_PPI; i < ARCH_TIMER_MAX_TIMER_PPI; i++) | 1282 | for (i = ARCH_TIMER_PHYS_SECURE_PPI; i < ARCH_TIMER_MAX_TIMER_PPI; i++) |
1276 | arch_timer_ppi[i] = irq_of_parse_and_map(np, i); | 1283 | arch_timer_ppi[i] = irq_of_parse_and_map(np, i); |
1277 | 1284 | ||
1278 | arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI]; | 1285 | arch_timer_populate_kvm_info(); |
1279 | 1286 | ||
1280 | rate = arch_timer_get_cntfrq(); | 1287 | rate = arch_timer_get_cntfrq(); |
1281 | arch_timer_of_configure_rate(rate, np); | 1288 | arch_timer_of_configure_rate(rate, np); |
@@ -1605,7 +1612,7 @@ static int __init arch_timer_acpi_init(struct acpi_table_header *table) | |||
1605 | arch_timer_ppi[ARCH_TIMER_HYP_PPI] = | 1612 | arch_timer_ppi[ARCH_TIMER_HYP_PPI] = |
1606 | acpi_gtdt_map_ppi(ARCH_TIMER_HYP_PPI); | 1613 | acpi_gtdt_map_ppi(ARCH_TIMER_HYP_PPI); |
1607 | 1614 | ||
1608 | arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI]; | 1615 | arch_timer_populate_kvm_info(); |
1609 | 1616 | ||
1610 | /* | 1617 | /* |
1611 | * When probing via ACPI, we have no mechanism to override the sysreg | 1618 | * When probing via ACPI, we have no mechanism to override the sysreg |
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index a0baee25134c..4159c63a5fd2 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c | |||
@@ -1382,3 +1382,40 @@ int chsc_pnso_brinfo(struct subchannel_id schid, | |||
1382 | return chsc_error_from_response(brinfo_area->response.code); | 1382 | return chsc_error_from_response(brinfo_area->response.code); |
1383 | } | 1383 | } |
1384 | EXPORT_SYMBOL_GPL(chsc_pnso_brinfo); | 1384 | EXPORT_SYMBOL_GPL(chsc_pnso_brinfo); |
1385 | |||
1386 | int chsc_sgib(u32 origin) | ||
1387 | { | ||
1388 | struct { | ||
1389 | struct chsc_header request; | ||
1390 | u16 op; | ||
1391 | u8 reserved01[2]; | ||
1392 | u8 reserved02:4; | ||
1393 | u8 fmt:4; | ||
1394 | u8 reserved03[7]; | ||
1395 | /* operation data area begin */ | ||
1396 | u8 reserved04[4]; | ||
1397 | u32 gib_origin; | ||
1398 | u8 reserved05[10]; | ||
1399 | u8 aix; | ||
1400 | u8 reserved06[4029]; | ||
1401 | struct chsc_header response; | ||
1402 | u8 reserved07[4]; | ||
1403 | } *sgib_area; | ||
1404 | int ret; | ||
1405 | |||
1406 | spin_lock_irq(&chsc_page_lock); | ||
1407 | memset(chsc_page, 0, PAGE_SIZE); | ||
1408 | sgib_area = chsc_page; | ||
1409 | sgib_area->request.length = 0x0fe0; | ||
1410 | sgib_area->request.code = 0x0021; | ||
1411 | sgib_area->op = 0x1; | ||
1412 | sgib_area->gib_origin = origin; | ||
1413 | |||
1414 | ret = chsc(sgib_area); | ||
1415 | if (ret == 0) | ||
1416 | ret = chsc_error_from_response(sgib_area->response.code); | ||
1417 | spin_unlock_irq(&chsc_page_lock); | ||
1418 | |||
1419 | return ret; | ||
1420 | } | ||
1421 | EXPORT_SYMBOL_GPL(chsc_sgib); | ||
diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h index 78aba8d94eec..e57d68e325a3 100644 --- a/drivers/s390/cio/chsc.h +++ b/drivers/s390/cio/chsc.h | |||
@@ -164,6 +164,7 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp); | |||
164 | int chsc_ssqd(struct subchannel_id schid, struct chsc_ssqd_area *ssqd); | 164 | int chsc_ssqd(struct subchannel_id schid, struct chsc_ssqd_area *ssqd); |
165 | int chsc_sadc(struct subchannel_id schid, struct chsc_scssc_area *scssc, | 165 | int chsc_sadc(struct subchannel_id schid, struct chsc_scssc_area *scssc, |
166 | u64 summary_indicator_addr, u64 subchannel_indicator_addr); | 166 | u64 summary_indicator_addr, u64 subchannel_indicator_addr); |
167 | int chsc_sgib(u32 origin); | ||
167 | int chsc_error_from_response(int response); | 168 | int chsc_error_from_response(int response); |
168 | 169 | ||
169 | int chsc_siosl(struct subchannel_id schid); | 170 | int chsc_siosl(struct subchannel_id schid); |
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h index 349e5957c949..702967d996bb 100644 --- a/include/clocksource/arm_arch_timer.h +++ b/include/clocksource/arm_arch_timer.h | |||
@@ -74,6 +74,7 @@ enum arch_timer_spi_nr { | |||
74 | struct arch_timer_kvm_info { | 74 | struct arch_timer_kvm_info { |
75 | struct timecounter timecounter; | 75 | struct timecounter timecounter; |
76 | int virtual_irq; | 76 | int virtual_irq; |
77 | int physical_irq; | ||
77 | }; | 78 | }; |
78 | 79 | ||
79 | struct arch_timer_mem_frame { | 80 | struct arch_timer_mem_frame { |
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 33771352dcd6..05a18dd265b5 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h | |||
@@ -22,7 +22,22 @@ | |||
22 | #include <linux/clocksource.h> | 22 | #include <linux/clocksource.h> |
23 | #include <linux/hrtimer.h> | 23 | #include <linux/hrtimer.h> |
24 | 24 | ||
25 | enum kvm_arch_timers { | ||
26 | TIMER_PTIMER, | ||
27 | TIMER_VTIMER, | ||
28 | NR_KVM_TIMERS | ||
29 | }; | ||
30 | |||
31 | enum kvm_arch_timer_regs { | ||
32 | TIMER_REG_CNT, | ||
33 | TIMER_REG_CVAL, | ||
34 | TIMER_REG_TVAL, | ||
35 | TIMER_REG_CTL, | ||
36 | }; | ||
37 | |||
25 | struct arch_timer_context { | 38 | struct arch_timer_context { |
39 | struct kvm_vcpu *vcpu; | ||
40 | |||
26 | /* Registers: control register, timer value */ | 41 | /* Registers: control register, timer value */ |
27 | u32 cnt_ctl; | 42 | u32 cnt_ctl; |
28 | u64 cnt_cval; | 43 | u64 cnt_cval; |
@@ -30,30 +45,36 @@ struct arch_timer_context { | |||
30 | /* Timer IRQ */ | 45 | /* Timer IRQ */ |
31 | struct kvm_irq_level irq; | 46 | struct kvm_irq_level irq; |
32 | 47 | ||
48 | /* Virtual offset */ | ||
49 | u64 cntvoff; | ||
50 | |||
51 | /* Emulated Timer (may be unused) */ | ||
52 | struct hrtimer hrtimer; | ||
53 | |||
33 | /* | 54 | /* |
34 | * We have multiple paths which can save/restore the timer state | 55 | * We have multiple paths which can save/restore the timer state onto |
35 | * onto the hardware, so we need some way of keeping track of | 56 | * the hardware, so we need some way of keeping track of where the |
36 | * where the latest state is. | 57 | * latest state is. |
37 | * | ||
38 | * loaded == true: State is loaded on the hardware registers. | ||
39 | * loaded == false: State is stored in memory. | ||
40 | */ | 58 | */ |
41 | bool loaded; | 59 | bool loaded; |
42 | 60 | ||
43 | /* Virtual offset */ | 61 | /* Duplicated state from arch_timer.c for convenience */ |
44 | u64 cntvoff; | 62 | u32 host_timer_irq; |
63 | u32 host_timer_irq_flags; | ||
64 | }; | ||
65 | |||
66 | struct timer_map { | ||
67 | struct arch_timer_context *direct_vtimer; | ||
68 | struct arch_timer_context *direct_ptimer; | ||
69 | struct arch_timer_context *emul_ptimer; | ||
45 | }; | 70 | }; |
46 | 71 | ||
47 | struct arch_timer_cpu { | 72 | struct arch_timer_cpu { |
48 | struct arch_timer_context vtimer; | 73 | struct arch_timer_context timers[NR_KVM_TIMERS]; |
49 | struct arch_timer_context ptimer; | ||
50 | 74 | ||
51 | /* Background timer used when the guest is not running */ | 75 | /* Background timer used when the guest is not running */ |
52 | struct hrtimer bg_timer; | 76 | struct hrtimer bg_timer; |
53 | 77 | ||
54 | /* Physical timer emulation */ | ||
55 | struct hrtimer phys_timer; | ||
56 | |||
57 | /* Is the timer enabled */ | 78 | /* Is the timer enabled */ |
58 | bool enabled; | 79 | bool enabled; |
59 | }; | 80 | }; |
@@ -76,9 +97,6 @@ int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); | |||
76 | 97 | ||
77 | bool kvm_timer_is_pending(struct kvm_vcpu *vcpu); | 98 | bool kvm_timer_is_pending(struct kvm_vcpu *vcpu); |
78 | 99 | ||
79 | void kvm_timer_schedule(struct kvm_vcpu *vcpu); | ||
80 | void kvm_timer_unschedule(struct kvm_vcpu *vcpu); | ||
81 | |||
82 | u64 kvm_phys_timer_read(void); | 100 | u64 kvm_phys_timer_read(void); |
83 | 101 | ||
84 | void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu); | 102 | void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu); |
@@ -88,7 +106,19 @@ void kvm_timer_init_vhe(void); | |||
88 | 106 | ||
89 | bool kvm_arch_timer_get_input_level(int vintid); | 107 | bool kvm_arch_timer_get_input_level(int vintid); |
90 | 108 | ||
91 | #define vcpu_vtimer(v) (&(v)->arch.timer_cpu.vtimer) | 109 | #define vcpu_timer(v) (&(v)->arch.timer_cpu) |
92 | #define vcpu_ptimer(v) (&(v)->arch.timer_cpu.ptimer) | 110 | #define vcpu_get_timer(v,t) (&vcpu_timer(v)->timers[(t)]) |
111 | #define vcpu_vtimer(v) (&(v)->arch.timer_cpu.timers[TIMER_VTIMER]) | ||
112 | #define vcpu_ptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_PTIMER]) | ||
113 | |||
114 | #define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers) | ||
115 | |||
116 | u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, | ||
117 | enum kvm_arch_timers tmr, | ||
118 | enum kvm_arch_timer_regs treg); | ||
119 | void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, | ||
120 | enum kvm_arch_timers tmr, | ||
121 | enum kvm_arch_timer_regs treg, | ||
122 | u64 val); | ||
93 | 123 | ||
94 | #endif | 124 | #endif |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c38cc5eb7e73..9d55c63db09b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -48,6 +48,27 @@ | |||
48 | */ | 48 | */ |
49 | #define KVM_MEMSLOT_INVALID (1UL << 16) | 49 | #define KVM_MEMSLOT_INVALID (1UL << 16) |
50 | 50 | ||
51 | /* | ||
52 | * Bit 63 of the memslot generation number is an "update in-progress flag", | ||
53 | * e.g. is temporarily set for the duration of install_new_memslots(). | ||
54 | * This flag effectively creates a unique generation number that is used to | ||
55 | * mark cached memslot data, e.g. MMIO accesses, as potentially being stale, | ||
56 | * i.e. may (or may not) have come from the previous memslots generation. | ||
57 | * | ||
58 | * This is necessary because the actual memslots update is not atomic with | ||
59 | * respect to the generation number update. Updating the generation number | ||
60 | * first would allow a vCPU to cache a spte from the old memslots using the | ||
61 | * new generation number, and updating the generation number after switching | ||
62 | * to the new memslots would allow cache hits using the old generation number | ||
63 | * to reference the defunct memslots. | ||
64 | * | ||
65 | * This mechanism is used to prevent getting hits in KVM's caches while a | ||
66 | * memslot update is in-progress, and to prevent cache hits *after* updating | ||
67 | * the actual generation number against accesses that were inserted into the | ||
68 | * cache *before* the memslots were updated. | ||
69 | */ | ||
70 | #define KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS BIT_ULL(63) | ||
71 | |||
51 | /* Two fragments for cross MMIO pages. */ | 72 | /* Two fragments for cross MMIO pages. */ |
52 | #define KVM_MAX_MMIO_FRAGMENTS 2 | 73 | #define KVM_MAX_MMIO_FRAGMENTS 2 |
53 | 74 | ||
@@ -634,7 +655,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, | |||
634 | struct kvm_memory_slot *dont); | 655 | struct kvm_memory_slot *dont); |
635 | int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | 656 | int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, |
636 | unsigned long npages); | 657 | unsigned long npages); |
637 | void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots); | 658 | void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen); |
638 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 659 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
639 | struct kvm_memory_slot *memslot, | 660 | struct kvm_memory_slot *memslot, |
640 | const struct kvm_userspace_memory_region *mem, | 661 | const struct kvm_userspace_memory_region *mem, |
@@ -1182,6 +1203,7 @@ extern bool kvm_rebooting; | |||
1182 | 1203 | ||
1183 | extern unsigned int halt_poll_ns; | 1204 | extern unsigned int halt_poll_ns; |
1184 | extern unsigned int halt_poll_ns_grow; | 1205 | extern unsigned int halt_poll_ns_grow; |
1206 | extern unsigned int halt_poll_ns_grow_start; | ||
1185 | extern unsigned int halt_poll_ns_shrink; | 1207 | extern unsigned int halt_poll_ns_shrink; |
1186 | 1208 | ||
1187 | struct kvm_device { | 1209 | struct kvm_device { |
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 6210ba41c29e..2689d1ea6d7a 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore | |||
@@ -3,6 +3,7 @@ | |||
3 | /x86_64/platform_info_test | 3 | /x86_64/platform_info_test |
4 | /x86_64/set_sregs_test | 4 | /x86_64/set_sregs_test |
5 | /x86_64/sync_regs_test | 5 | /x86_64/sync_regs_test |
6 | /x86_64/vmx_close_while_nested_test | ||
6 | /x86_64/vmx_tsc_adjust_test | 7 | /x86_64/vmx_tsc_adjust_test |
7 | /x86_64/state_test | 8 | /x86_64/state_test |
8 | /dirty_log_test | 9 | /dirty_log_test |
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index f9a0e9938480..3c1f4bdf9000 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile | |||
@@ -16,6 +16,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test | |||
16 | TEST_GEN_PROGS_x86_64 += x86_64/state_test | 16 | TEST_GEN_PROGS_x86_64 += x86_64/state_test |
17 | TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test | 17 | TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test |
18 | TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid | 18 | TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid |
19 | TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test | ||
19 | TEST_GEN_PROGS_x86_64 += dirty_log_test | 20 | TEST_GEN_PROGS_x86_64 += dirty_log_test |
20 | TEST_GEN_PROGS_x86_64 += clear_dirty_log_test | 21 | TEST_GEN_PROGS_x86_64 += clear_dirty_log_test |
21 | 22 | ||
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c new file mode 100644 index 000000000000..6edec6fd790b --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c | |||
@@ -0,0 +1,95 @@ | |||
1 | /* | ||
2 | * vmx_close_while_nested | ||
3 | * | ||
4 | * Copyright (C) 2019, Red Hat, Inc. | ||
5 | * | ||
6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
7 | * | ||
8 | * Verify that nothing bad happens if a KVM user exits with open | ||
9 | * file descriptors while executing a nested guest. | ||
10 | */ | ||
11 | |||
12 | #include "test_util.h" | ||
13 | #include "kvm_util.h" | ||
14 | #include "processor.h" | ||
15 | #include "vmx.h" | ||
16 | |||
17 | #include <string.h> | ||
18 | #include <sys/ioctl.h> | ||
19 | |||
20 | #include "kselftest.h" | ||
21 | |||
22 | #define VCPU_ID 5 | ||
23 | |||
24 | enum { | ||
25 | PORT_L0_EXIT = 0x2000, | ||
26 | }; | ||
27 | |||
28 | /* The virtual machine object. */ | ||
29 | static struct kvm_vm *vm; | ||
30 | |||
31 | static void l2_guest_code(void) | ||
32 | { | ||
33 | /* Exit to L0 */ | ||
34 | asm volatile("inb %%dx, %%al" | ||
35 | : : [port] "d" (PORT_L0_EXIT) : "rax"); | ||
36 | } | ||
37 | |||
38 | static void l1_guest_code(struct vmx_pages *vmx_pages) | ||
39 | { | ||
40 | #define L2_GUEST_STACK_SIZE 64 | ||
41 | unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; | ||
42 | uint32_t control; | ||
43 | uintptr_t save_cr3; | ||
44 | |||
45 | GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); | ||
46 | GUEST_ASSERT(load_vmcs(vmx_pages)); | ||
47 | |||
48 | /* Prepare the VMCS for L2 execution. */ | ||
49 | prepare_vmcs(vmx_pages, l2_guest_code, | ||
50 | &l2_guest_stack[L2_GUEST_STACK_SIZE]); | ||
51 | |||
52 | GUEST_ASSERT(!vmlaunch()); | ||
53 | GUEST_ASSERT(0); | ||
54 | } | ||
55 | |||
56 | int main(int argc, char *argv[]) | ||
57 | { | ||
58 | struct vmx_pages *vmx_pages; | ||
59 | vm_vaddr_t vmx_pages_gva; | ||
60 | struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); | ||
61 | |||
62 | if (!(entry->ecx & CPUID_VMX)) { | ||
63 | fprintf(stderr, "nested VMX not enabled, skipping test\n"); | ||
64 | exit(KSFT_SKIP); | ||
65 | } | ||
66 | |||
67 | vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); | ||
68 | vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); | ||
69 | |||
70 | /* Allocate VMX pages and shared descriptors (vmx_pages). */ | ||
71 | vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva); | ||
72 | vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); | ||
73 | |||
74 | for (;;) { | ||
75 | volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); | ||
76 | struct ucall uc; | ||
77 | |||
78 | vcpu_run(vm, VCPU_ID); | ||
79 | TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, | ||
80 | "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", | ||
81 | run->exit_reason, | ||
82 | exit_reason_str(run->exit_reason)); | ||
83 | |||
84 | if (run->io.port == PORT_L0_EXIT) | ||
85 | break; | ||
86 | |||
87 | switch (get_ucall(vm, VCPU_ID, &uc)) { | ||
88 | case UCALL_ABORT: | ||
89 | TEST_ASSERT(false, "%s", (const char *)uc.args[0]); | ||
90 | /* NOT REACHED */ | ||
91 | default: | ||
92 | TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); | ||
93 | } | ||
94 | } | ||
95 | } | ||
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index b07ac4614e1c..3417f2dbc366 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c | |||
@@ -25,6 +25,7 @@ | |||
25 | 25 | ||
26 | #include <clocksource/arm_arch_timer.h> | 26 | #include <clocksource/arm_arch_timer.h> |
27 | #include <asm/arch_timer.h> | 27 | #include <asm/arch_timer.h> |
28 | #include <asm/kvm_emulate.h> | ||
28 | #include <asm/kvm_hyp.h> | 29 | #include <asm/kvm_hyp.h> |
29 | 30 | ||
30 | #include <kvm/arm_vgic.h> | 31 | #include <kvm/arm_vgic.h> |
@@ -34,7 +35,9 @@ | |||
34 | 35 | ||
35 | static struct timecounter *timecounter; | 36 | static struct timecounter *timecounter; |
36 | static unsigned int host_vtimer_irq; | 37 | static unsigned int host_vtimer_irq; |
38 | static unsigned int host_ptimer_irq; | ||
37 | static u32 host_vtimer_irq_flags; | 39 | static u32 host_vtimer_irq_flags; |
40 | static u32 host_ptimer_irq_flags; | ||
38 | 41 | ||
39 | static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); | 42 | static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); |
40 | 43 | ||
@@ -52,12 +55,34 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); | |||
52 | static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, | 55 | static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, |
53 | struct arch_timer_context *timer_ctx); | 56 | struct arch_timer_context *timer_ctx); |
54 | static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); | 57 | static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); |
58 | static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, | ||
59 | struct arch_timer_context *timer, | ||
60 | enum kvm_arch_timer_regs treg, | ||
61 | u64 val); | ||
62 | static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, | ||
63 | struct arch_timer_context *timer, | ||
64 | enum kvm_arch_timer_regs treg); | ||
55 | 65 | ||
56 | u64 kvm_phys_timer_read(void) | 66 | u64 kvm_phys_timer_read(void) |
57 | { | 67 | { |
58 | return timecounter->cc->read(timecounter->cc); | 68 | return timecounter->cc->read(timecounter->cc); |
59 | } | 69 | } |
60 | 70 | ||
71 | static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) | ||
72 | { | ||
73 | if (has_vhe()) { | ||
74 | map->direct_vtimer = vcpu_vtimer(vcpu); | ||
75 | map->direct_ptimer = vcpu_ptimer(vcpu); | ||
76 | map->emul_ptimer = NULL; | ||
77 | } else { | ||
78 | map->direct_vtimer = vcpu_vtimer(vcpu); | ||
79 | map->direct_ptimer = NULL; | ||
80 | map->emul_ptimer = vcpu_ptimer(vcpu); | ||
81 | } | ||
82 | |||
83 | trace_kvm_get_timer_map(vcpu->vcpu_id, map); | ||
84 | } | ||
85 | |||
61 | static inline bool userspace_irqchip(struct kvm *kvm) | 86 | static inline bool userspace_irqchip(struct kvm *kvm) |
62 | { | 87 | { |
63 | return static_branch_unlikely(&userspace_irqchip_in_use) && | 88 | return static_branch_unlikely(&userspace_irqchip_in_use) && |
@@ -78,20 +103,27 @@ static void soft_timer_cancel(struct hrtimer *hrt) | |||
78 | static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) | 103 | static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) |
79 | { | 104 | { |
80 | struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; | 105 | struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; |
81 | struct arch_timer_context *vtimer; | 106 | struct arch_timer_context *ctx; |
107 | struct timer_map map; | ||
82 | 108 | ||
83 | /* | 109 | /* |
84 | * We may see a timer interrupt after vcpu_put() has been called which | 110 | * We may see a timer interrupt after vcpu_put() has been called which |
85 | * sets the CPU's vcpu pointer to NULL, because even though the timer | 111 | * sets the CPU's vcpu pointer to NULL, because even though the timer |
86 | * has been disabled in vtimer_save_state(), the hardware interrupt | 112 | * has been disabled in timer_save_state(), the hardware interrupt |
87 | * signal may not have been retired from the interrupt controller yet. | 113 | * signal may not have been retired from the interrupt controller yet. |
88 | */ | 114 | */ |
89 | if (!vcpu) | 115 | if (!vcpu) |
90 | return IRQ_HANDLED; | 116 | return IRQ_HANDLED; |
91 | 117 | ||
92 | vtimer = vcpu_vtimer(vcpu); | 118 | get_timer_map(vcpu, &map); |
93 | if (kvm_timer_should_fire(vtimer)) | 119 | |
94 | kvm_timer_update_irq(vcpu, true, vtimer); | 120 | if (irq == host_vtimer_irq) |
121 | ctx = map.direct_vtimer; | ||
122 | else | ||
123 | ctx = map.direct_ptimer; | ||
124 | |||
125 | if (kvm_timer_should_fire(ctx)) | ||
126 | kvm_timer_update_irq(vcpu, true, ctx); | ||
95 | 127 | ||
96 | if (userspace_irqchip(vcpu->kvm) && | 128 | if (userspace_irqchip(vcpu->kvm) && |
97 | !static_branch_unlikely(&has_gic_active_state)) | 129 | !static_branch_unlikely(&has_gic_active_state)) |
@@ -122,7 +154,9 @@ static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) | |||
122 | 154 | ||
123 | static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) | 155 | static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) |
124 | { | 156 | { |
125 | return !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) && | 157 | WARN_ON(timer_ctx && timer_ctx->loaded); |
158 | return timer_ctx && | ||
159 | !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) && | ||
126 | (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE); | 160 | (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE); |
127 | } | 161 | } |
128 | 162 | ||
@@ -132,21 +166,22 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) | |||
132 | */ | 166 | */ |
133 | static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) | 167 | static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) |
134 | { | 168 | { |
135 | u64 min_virt = ULLONG_MAX, min_phys = ULLONG_MAX; | 169 | u64 min_delta = ULLONG_MAX; |
136 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 170 | int i; |
137 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
138 | 171 | ||
139 | if (kvm_timer_irq_can_fire(vtimer)) | 172 | for (i = 0; i < NR_KVM_TIMERS; i++) { |
140 | min_virt = kvm_timer_compute_delta(vtimer); | 173 | struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i]; |
141 | 174 | ||
142 | if (kvm_timer_irq_can_fire(ptimer)) | 175 | WARN(ctx->loaded, "timer %d loaded\n", i); |
143 | min_phys = kvm_timer_compute_delta(ptimer); | 176 | if (kvm_timer_irq_can_fire(ctx)) |
177 | min_delta = min(min_delta, kvm_timer_compute_delta(ctx)); | ||
178 | } | ||
144 | 179 | ||
145 | /* If none of timers can fire, then return 0 */ | 180 | /* If none of timers can fire, then return 0 */ |
146 | if ((min_virt == ULLONG_MAX) && (min_phys == ULLONG_MAX)) | 181 | if (min_delta == ULLONG_MAX) |
147 | return 0; | 182 | return 0; |
148 | 183 | ||
149 | return min(min_virt, min_phys); | 184 | return min_delta; |
150 | } | 185 | } |
151 | 186 | ||
152 | static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) | 187 | static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) |
@@ -173,41 +208,58 @@ static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) | |||
173 | return HRTIMER_NORESTART; | 208 | return HRTIMER_NORESTART; |
174 | } | 209 | } |
175 | 210 | ||
176 | static enum hrtimer_restart kvm_phys_timer_expire(struct hrtimer *hrt) | 211 | static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt) |
177 | { | 212 | { |
178 | struct arch_timer_context *ptimer; | 213 | struct arch_timer_context *ctx; |
179 | struct arch_timer_cpu *timer; | ||
180 | struct kvm_vcpu *vcpu; | 214 | struct kvm_vcpu *vcpu; |
181 | u64 ns; | 215 | u64 ns; |
182 | 216 | ||
183 | timer = container_of(hrt, struct arch_timer_cpu, phys_timer); | 217 | ctx = container_of(hrt, struct arch_timer_context, hrtimer); |
184 | vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); | 218 | vcpu = ctx->vcpu; |
185 | ptimer = vcpu_ptimer(vcpu); | 219 | |
220 | trace_kvm_timer_hrtimer_expire(ctx); | ||
186 | 221 | ||
187 | /* | 222 | /* |
188 | * Check that the timer has really expired from the guest's | 223 | * Check that the timer has really expired from the guest's |
189 | * PoV (NTP on the host may have forced it to expire | 224 | * PoV (NTP on the host may have forced it to expire |
190 | * early). If not ready, schedule for a later time. | 225 | * early). If not ready, schedule for a later time. |
191 | */ | 226 | */ |
192 | ns = kvm_timer_compute_delta(ptimer); | 227 | ns = kvm_timer_compute_delta(ctx); |
193 | if (unlikely(ns)) { | 228 | if (unlikely(ns)) { |
194 | hrtimer_forward_now(hrt, ns_to_ktime(ns)); | 229 | hrtimer_forward_now(hrt, ns_to_ktime(ns)); |
195 | return HRTIMER_RESTART; | 230 | return HRTIMER_RESTART; |
196 | } | 231 | } |
197 | 232 | ||
198 | kvm_timer_update_irq(vcpu, true, ptimer); | 233 | kvm_timer_update_irq(vcpu, true, ctx); |
199 | return HRTIMER_NORESTART; | 234 | return HRTIMER_NORESTART; |
200 | } | 235 | } |
201 | 236 | ||
202 | static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) | 237 | static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) |
203 | { | 238 | { |
239 | enum kvm_arch_timers index; | ||
204 | u64 cval, now; | 240 | u64 cval, now; |
205 | 241 | ||
242 | if (!timer_ctx) | ||
243 | return false; | ||
244 | |||
245 | index = arch_timer_ctx_index(timer_ctx); | ||
246 | |||
206 | if (timer_ctx->loaded) { | 247 | if (timer_ctx->loaded) { |
207 | u32 cnt_ctl; | 248 | u32 cnt_ctl = 0; |
249 | |||
250 | switch (index) { | ||
251 | case TIMER_VTIMER: | ||
252 | cnt_ctl = read_sysreg_el0(cntv_ctl); | ||
253 | break; | ||
254 | case TIMER_PTIMER: | ||
255 | cnt_ctl = read_sysreg_el0(cntp_ctl); | ||
256 | break; | ||
257 | case NR_KVM_TIMERS: | ||
258 | /* GCC is braindead */ | ||
259 | cnt_ctl = 0; | ||
260 | break; | ||
261 | } | ||
208 | 262 | ||
209 | /* Only the virtual timer can be loaded so far */ | ||
210 | cnt_ctl = read_sysreg_el0(cntv_ctl); | ||
211 | return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) && | 263 | return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) && |
212 | (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) && | 264 | (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) && |
213 | !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK); | 265 | !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK); |
@@ -224,13 +276,13 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) | |||
224 | 276 | ||
225 | bool kvm_timer_is_pending(struct kvm_vcpu *vcpu) | 277 | bool kvm_timer_is_pending(struct kvm_vcpu *vcpu) |
226 | { | 278 | { |
227 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 279 | struct timer_map map; |
228 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
229 | 280 | ||
230 | if (kvm_timer_should_fire(vtimer)) | 281 | get_timer_map(vcpu, &map); |
231 | return true; | ||
232 | 282 | ||
233 | return kvm_timer_should_fire(ptimer); | 283 | return kvm_timer_should_fire(map.direct_vtimer) || |
284 | kvm_timer_should_fire(map.direct_ptimer) || | ||
285 | kvm_timer_should_fire(map.emul_ptimer); | ||
234 | } | 286 | } |
235 | 287 | ||
236 | /* | 288 | /* |
@@ -269,77 +321,70 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, | |||
269 | } | 321 | } |
270 | } | 322 | } |
271 | 323 | ||
272 | /* Schedule the background timer for the emulated timer. */ | 324 | static void timer_emulate(struct arch_timer_context *ctx) |
273 | static void phys_timer_emulate(struct kvm_vcpu *vcpu) | ||
274 | { | 325 | { |
275 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 326 | bool should_fire = kvm_timer_should_fire(ctx); |
276 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | 327 | |
328 | trace_kvm_timer_emulate(ctx, should_fire); | ||
329 | |||
330 | if (should_fire) { | ||
331 | kvm_timer_update_irq(ctx->vcpu, true, ctx); | ||
332 | return; | ||
333 | } | ||
277 | 334 | ||
278 | /* | 335 | /* |
279 | * If the timer can fire now, we don't need to have a soft timer | 336 | * If the timer can fire now, we don't need to have a soft timer |
280 | * scheduled for the future. If the timer cannot fire at all, | 337 | * scheduled for the future. If the timer cannot fire at all, |
281 | * then we also don't need a soft timer. | 338 | * then we also don't need a soft timer. |
282 | */ | 339 | */ |
283 | if (kvm_timer_should_fire(ptimer) || !kvm_timer_irq_can_fire(ptimer)) { | 340 | if (!kvm_timer_irq_can_fire(ctx)) { |
284 | soft_timer_cancel(&timer->phys_timer); | 341 | soft_timer_cancel(&ctx->hrtimer); |
285 | return; | 342 | return; |
286 | } | 343 | } |
287 | 344 | ||
288 | soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(ptimer)); | 345 | soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx)); |
289 | } | 346 | } |
290 | 347 | ||
291 | /* | 348 | static void timer_save_state(struct arch_timer_context *ctx) |
292 | * Check if there was a change in the timer state, so that we should either | ||
293 | * raise or lower the line level to the GIC or schedule a background timer to | ||
294 | * emulate the physical timer. | ||
295 | */ | ||
296 | static void kvm_timer_update_state(struct kvm_vcpu *vcpu) | ||
297 | { | 349 | { |
298 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 350 | struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); |
299 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 351 | enum kvm_arch_timers index = arch_timer_ctx_index(ctx); |
300 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | 352 | unsigned long flags; |
301 | bool level; | ||
302 | 353 | ||
303 | if (unlikely(!timer->enabled)) | 354 | if (!timer->enabled) |
304 | return; | 355 | return; |
305 | 356 | ||
306 | /* | 357 | local_irq_save(flags); |
307 | * The vtimer virtual interrupt is a 'mapped' interrupt, meaning part | ||
308 | * of its lifecycle is offloaded to the hardware, and we therefore may | ||
309 | * not have lowered the irq.level value before having to signal a new | ||
310 | * interrupt, but have to signal an interrupt every time the level is | ||
311 | * asserted. | ||
312 | */ | ||
313 | level = kvm_timer_should_fire(vtimer); | ||
314 | kvm_timer_update_irq(vcpu, level, vtimer); | ||
315 | 358 | ||
316 | phys_timer_emulate(vcpu); | 359 | if (!ctx->loaded) |
360 | goto out; | ||
317 | 361 | ||
318 | if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) | 362 | switch (index) { |
319 | kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); | 363 | case TIMER_VTIMER: |
320 | } | 364 | ctx->cnt_ctl = read_sysreg_el0(cntv_ctl); |
365 | ctx->cnt_cval = read_sysreg_el0(cntv_cval); | ||
321 | 366 | ||
322 | static void vtimer_save_state(struct kvm_vcpu *vcpu) | 367 | /* Disable the timer */ |
323 | { | 368 | write_sysreg_el0(0, cntv_ctl); |
324 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 369 | isb(); |
325 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | ||
326 | unsigned long flags; | ||
327 | 370 | ||
328 | local_irq_save(flags); | 371 | break; |
372 | case TIMER_PTIMER: | ||
373 | ctx->cnt_ctl = read_sysreg_el0(cntp_ctl); | ||
374 | ctx->cnt_cval = read_sysreg_el0(cntp_cval); | ||
329 | 375 | ||
330 | if (!vtimer->loaded) | 376 | /* Disable the timer */ |
331 | goto out; | 377 | write_sysreg_el0(0, cntp_ctl); |
378 | isb(); | ||
332 | 379 | ||
333 | if (timer->enabled) { | 380 | break; |
334 | vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); | 381 | case NR_KVM_TIMERS: |
335 | vtimer->cnt_cval = read_sysreg_el0(cntv_cval); | 382 | BUG(); |
336 | } | 383 | } |
337 | 384 | ||
338 | /* Disable the virtual timer */ | 385 | trace_kvm_timer_save_state(ctx); |
339 | write_sysreg_el0(0, cntv_ctl); | ||
340 | isb(); | ||
341 | 386 | ||
342 | vtimer->loaded = false; | 387 | ctx->loaded = false; |
343 | out: | 388 | out: |
344 | local_irq_restore(flags); | 389 | local_irq_restore(flags); |
345 | } | 390 | } |
@@ -349,67 +394,72 @@ out: | |||
349 | * thread is removed from its waitqueue and made runnable when there's a timer | 394 | * thread is removed from its waitqueue and made runnable when there's a timer |
350 | * interrupt to handle. | 395 | * interrupt to handle. |
351 | */ | 396 | */ |
352 | void kvm_timer_schedule(struct kvm_vcpu *vcpu) | 397 | static void kvm_timer_blocking(struct kvm_vcpu *vcpu) |
353 | { | 398 | { |
354 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 399 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
355 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 400 | struct timer_map map; |
356 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
357 | |||
358 | vtimer_save_state(vcpu); | ||
359 | 401 | ||
360 | /* | 402 | get_timer_map(vcpu, &map); |
361 | * No need to schedule a background timer if any guest timer has | ||
362 | * already expired, because kvm_vcpu_block will return before putting | ||
363 | * the thread to sleep. | ||
364 | */ | ||
365 | if (kvm_timer_should_fire(vtimer) || kvm_timer_should_fire(ptimer)) | ||
366 | return; | ||
367 | 403 | ||
368 | /* | 404 | /* |
369 | * If both timers are not capable of raising interrupts (disabled or | 405 | * If no timers are capable of raising interrupts (disabled or |
370 | * masked), then there's no more work for us to do. | 406 | * masked), then there's no more work for us to do. |
371 | */ | 407 | */ |
372 | if (!kvm_timer_irq_can_fire(vtimer) && !kvm_timer_irq_can_fire(ptimer)) | 408 | if (!kvm_timer_irq_can_fire(map.direct_vtimer) && |
409 | !kvm_timer_irq_can_fire(map.direct_ptimer) && | ||
410 | !kvm_timer_irq_can_fire(map.emul_ptimer)) | ||
373 | return; | 411 | return; |
374 | 412 | ||
375 | /* | 413 | /* |
376 | * The guest timers have not yet expired, schedule a background timer. | 414 | * At least one guest time will expire. Schedule a background timer. |
377 | * Set the earliest expiration time among the guest timers. | 415 | * Set the earliest expiration time among the guest timers. |
378 | */ | 416 | */ |
379 | soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu)); | 417 | soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu)); |
380 | } | 418 | } |
381 | 419 | ||
382 | static void vtimer_restore_state(struct kvm_vcpu *vcpu) | 420 | static void kvm_timer_unblocking(struct kvm_vcpu *vcpu) |
383 | { | 421 | { |
384 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 422 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
385 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 423 | |
424 | soft_timer_cancel(&timer->bg_timer); | ||
425 | } | ||
426 | |||
427 | static void timer_restore_state(struct arch_timer_context *ctx) | ||
428 | { | ||
429 | struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); | ||
430 | enum kvm_arch_timers index = arch_timer_ctx_index(ctx); | ||
386 | unsigned long flags; | 431 | unsigned long flags; |
387 | 432 | ||
433 | if (!timer->enabled) | ||
434 | return; | ||
435 | |||
388 | local_irq_save(flags); | 436 | local_irq_save(flags); |
389 | 437 | ||
390 | if (vtimer->loaded) | 438 | if (ctx->loaded) |
391 | goto out; | 439 | goto out; |
392 | 440 | ||
393 | if (timer->enabled) { | 441 | switch (index) { |
394 | write_sysreg_el0(vtimer->cnt_cval, cntv_cval); | 442 | case TIMER_VTIMER: |
443 | write_sysreg_el0(ctx->cnt_cval, cntv_cval); | ||
395 | isb(); | 444 | isb(); |
396 | write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl); | 445 | write_sysreg_el0(ctx->cnt_ctl, cntv_ctl); |
446 | break; | ||
447 | case TIMER_PTIMER: | ||
448 | write_sysreg_el0(ctx->cnt_cval, cntp_cval); | ||
449 | isb(); | ||
450 | write_sysreg_el0(ctx->cnt_ctl, cntp_ctl); | ||
451 | break; | ||
452 | case NR_KVM_TIMERS: | ||
453 | BUG(); | ||
397 | } | 454 | } |
398 | 455 | ||
399 | vtimer->loaded = true; | 456 | trace_kvm_timer_restore_state(ctx); |
457 | |||
458 | ctx->loaded = true; | ||
400 | out: | 459 | out: |
401 | local_irq_restore(flags); | 460 | local_irq_restore(flags); |
402 | } | 461 | } |
403 | 462 | ||
404 | void kvm_timer_unschedule(struct kvm_vcpu *vcpu) | ||
405 | { | ||
406 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
407 | |||
408 | vtimer_restore_state(vcpu); | ||
409 | |||
410 | soft_timer_cancel(&timer->bg_timer); | ||
411 | } | ||
412 | |||
413 | static void set_cntvoff(u64 cntvoff) | 463 | static void set_cntvoff(u64 cntvoff) |
414 | { | 464 | { |
415 | u32 low = lower_32_bits(cntvoff); | 465 | u32 low = lower_32_bits(cntvoff); |
@@ -425,23 +475,32 @@ static void set_cntvoff(u64 cntvoff) | |||
425 | kvm_call_hyp(__kvm_timer_set_cntvoff, low, high); | 475 | kvm_call_hyp(__kvm_timer_set_cntvoff, low, high); |
426 | } | 476 | } |
427 | 477 | ||
428 | static inline void set_vtimer_irq_phys_active(struct kvm_vcpu *vcpu, bool active) | 478 | static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active) |
429 | { | 479 | { |
430 | int r; | 480 | int r; |
431 | r = irq_set_irqchip_state(host_vtimer_irq, IRQCHIP_STATE_ACTIVE, active); | 481 | r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active); |
432 | WARN_ON(r); | 482 | WARN_ON(r); |
433 | } | 483 | } |
434 | 484 | ||
435 | static void kvm_timer_vcpu_load_gic(struct kvm_vcpu *vcpu) | 485 | static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) |
436 | { | 486 | { |
437 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 487 | struct kvm_vcpu *vcpu = ctx->vcpu; |
438 | bool phys_active; | 488 | bool phys_active = false; |
489 | |||
490 | /* | ||
491 | * Update the timer output so that it is likely to match the | ||
492 | * state we're about to restore. If the timer expires between | ||
493 | * this point and the register restoration, we'll take the | ||
494 | * interrupt anyway. | ||
495 | */ | ||
496 | kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx); | ||
439 | 497 | ||
440 | if (irqchip_in_kernel(vcpu->kvm)) | 498 | if (irqchip_in_kernel(vcpu->kvm)) |
441 | phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); | 499 | phys_active = kvm_vgic_map_is_active(vcpu, ctx->irq.irq); |
442 | else | 500 | |
443 | phys_active = vtimer->irq.level; | 501 | phys_active |= ctx->irq.level; |
444 | set_vtimer_irq_phys_active(vcpu, phys_active); | 502 | |
503 | set_timer_irq_phys_active(ctx, phys_active); | ||
445 | } | 504 | } |
446 | 505 | ||
447 | static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) | 506 | static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) |
@@ -466,28 +525,32 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) | |||
466 | 525 | ||
467 | void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) | 526 | void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) |
468 | { | 527 | { |
469 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 528 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
470 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 529 | struct timer_map map; |
471 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
472 | 530 | ||
473 | if (unlikely(!timer->enabled)) | 531 | if (unlikely(!timer->enabled)) |
474 | return; | 532 | return; |
475 | 533 | ||
476 | if (static_branch_likely(&has_gic_active_state)) | 534 | get_timer_map(vcpu, &map); |
477 | kvm_timer_vcpu_load_gic(vcpu); | 535 | |
478 | else | 536 | if (static_branch_likely(&has_gic_active_state)) { |
537 | kvm_timer_vcpu_load_gic(map.direct_vtimer); | ||
538 | if (map.direct_ptimer) | ||
539 | kvm_timer_vcpu_load_gic(map.direct_ptimer); | ||
540 | } else { | ||
479 | kvm_timer_vcpu_load_nogic(vcpu); | 541 | kvm_timer_vcpu_load_nogic(vcpu); |
542 | } | ||
480 | 543 | ||
481 | set_cntvoff(vtimer->cntvoff); | 544 | set_cntvoff(map.direct_vtimer->cntvoff); |
482 | 545 | ||
483 | vtimer_restore_state(vcpu); | 546 | kvm_timer_unblocking(vcpu); |
484 | 547 | ||
485 | /* Set the background timer for the physical timer emulation. */ | 548 | timer_restore_state(map.direct_vtimer); |
486 | phys_timer_emulate(vcpu); | 549 | if (map.direct_ptimer) |
550 | timer_restore_state(map.direct_ptimer); | ||
487 | 551 | ||
488 | /* If the timer fired while we weren't running, inject it now */ | 552 | if (map.emul_ptimer) |
489 | if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) | 553 | timer_emulate(map.emul_ptimer); |
490 | kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); | ||
491 | } | 554 | } |
492 | 555 | ||
493 | bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) | 556 | bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) |
@@ -509,15 +572,20 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) | |||
509 | 572 | ||
510 | void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) | 573 | void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) |
511 | { | 574 | { |
512 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 575 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
576 | struct timer_map map; | ||
513 | 577 | ||
514 | if (unlikely(!timer->enabled)) | 578 | if (unlikely(!timer->enabled)) |
515 | return; | 579 | return; |
516 | 580 | ||
517 | vtimer_save_state(vcpu); | 581 | get_timer_map(vcpu, &map); |
582 | |||
583 | timer_save_state(map.direct_vtimer); | ||
584 | if (map.direct_ptimer) | ||
585 | timer_save_state(map.direct_ptimer); | ||
518 | 586 | ||
519 | /* | 587 | /* |
520 | * Cancel the physical timer emulation, because the only case where we | 588 | * Cancel soft timer emulation, because the only case where we |
521 | * need it after a vcpu_put is in the context of a sleeping VCPU, and | 589 | * need it after a vcpu_put is in the context of a sleeping VCPU, and |
522 | * in that case we already factor in the deadline for the physical | 590 | * in that case we already factor in the deadline for the physical |
523 | * timer when scheduling the bg_timer. | 591 | * timer when scheduling the bg_timer. |
@@ -525,7 +593,11 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) | |||
525 | * In any case, we re-schedule the hrtimer for the physical timer when | 593 | * In any case, we re-schedule the hrtimer for the physical timer when |
526 | * coming back to the VCPU thread in kvm_timer_vcpu_load(). | 594 | * coming back to the VCPU thread in kvm_timer_vcpu_load(). |
527 | */ | 595 | */ |
528 | soft_timer_cancel(&timer->phys_timer); | 596 | if (map.emul_ptimer) |
597 | soft_timer_cancel(&map.emul_ptimer->hrtimer); | ||
598 | |||
599 | if (swait_active(kvm_arch_vcpu_wq(vcpu))) | ||
600 | kvm_timer_blocking(vcpu); | ||
529 | 601 | ||
530 | /* | 602 | /* |
531 | * The kernel may decide to run userspace after calling vcpu_put, so | 603 | * The kernel may decide to run userspace after calling vcpu_put, so |
@@ -534,8 +606,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) | |||
534 | * counter of non-VHE case. For VHE, the virtual counter uses a fixed | 606 | * counter of non-VHE case. For VHE, the virtual counter uses a fixed |
535 | * virtual offset of zero, so no need to zero CNTVOFF_EL2 register. | 607 | * virtual offset of zero, so no need to zero CNTVOFF_EL2 register. |
536 | */ | 608 | */ |
537 | if (!has_vhe()) | 609 | set_cntvoff(0); |
538 | set_cntvoff(0); | ||
539 | } | 610 | } |
540 | 611 | ||
541 | /* | 612 | /* |
@@ -550,7 +621,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) | |||
550 | if (!kvm_timer_should_fire(vtimer)) { | 621 | if (!kvm_timer_should_fire(vtimer)) { |
551 | kvm_timer_update_irq(vcpu, false, vtimer); | 622 | kvm_timer_update_irq(vcpu, false, vtimer); |
552 | if (static_branch_likely(&has_gic_active_state)) | 623 | if (static_branch_likely(&has_gic_active_state)) |
553 | set_vtimer_irq_phys_active(vcpu, false); | 624 | set_timer_irq_phys_active(vtimer, false); |
554 | else | 625 | else |
555 | enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); | 626 | enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); |
556 | } | 627 | } |
@@ -558,7 +629,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) | |||
558 | 629 | ||
559 | void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) | 630 | void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) |
560 | { | 631 | { |
561 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 632 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
562 | 633 | ||
563 | if (unlikely(!timer->enabled)) | 634 | if (unlikely(!timer->enabled)) |
564 | return; | 635 | return; |
@@ -569,9 +640,10 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) | |||
569 | 640 | ||
570 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) | 641 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) |
571 | { | 642 | { |
572 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 643 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
573 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 644 | struct timer_map map; |
574 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | 645 | |
646 | get_timer_map(vcpu, &map); | ||
575 | 647 | ||
576 | /* | 648 | /* |
577 | * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 | 649 | * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 |
@@ -579,12 +651,22 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) | |||
579 | * resets the timer to be disabled and unmasked and is compliant with | 651 | * resets the timer to be disabled and unmasked and is compliant with |
580 | * the ARMv7 architecture. | 652 | * the ARMv7 architecture. |
581 | */ | 653 | */ |
582 | vtimer->cnt_ctl = 0; | 654 | vcpu_vtimer(vcpu)->cnt_ctl = 0; |
583 | ptimer->cnt_ctl = 0; | 655 | vcpu_ptimer(vcpu)->cnt_ctl = 0; |
584 | kvm_timer_update_state(vcpu); | ||
585 | 656 | ||
586 | if (timer->enabled && irqchip_in_kernel(vcpu->kvm)) | 657 | if (timer->enabled) { |
587 | kvm_vgic_reset_mapped_irq(vcpu, vtimer->irq.irq); | 658 | kvm_timer_update_irq(vcpu, false, vcpu_vtimer(vcpu)); |
659 | kvm_timer_update_irq(vcpu, false, vcpu_ptimer(vcpu)); | ||
660 | |||
661 | if (irqchip_in_kernel(vcpu->kvm)) { | ||
662 | kvm_vgic_reset_mapped_irq(vcpu, map.direct_vtimer->irq.irq); | ||
663 | if (map.direct_ptimer) | ||
664 | kvm_vgic_reset_mapped_irq(vcpu, map.direct_ptimer->irq.irq); | ||
665 | } | ||
666 | } | ||
667 | |||
668 | if (map.emul_ptimer) | ||
669 | soft_timer_cancel(&map.emul_ptimer->hrtimer); | ||
588 | 670 | ||
589 | return 0; | 671 | return 0; |
590 | } | 672 | } |
@@ -610,56 +692,76 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff) | |||
610 | 692 | ||
611 | void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) | 693 | void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) |
612 | { | 694 | { |
613 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 695 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
614 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 696 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); |
615 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | 697 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); |
616 | 698 | ||
617 | /* Synchronize cntvoff across all vtimers of a VM. */ | 699 | /* Synchronize cntvoff across all vtimers of a VM. */ |
618 | update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); | 700 | update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); |
619 | vcpu_ptimer(vcpu)->cntvoff = 0; | 701 | ptimer->cntvoff = 0; |
620 | 702 | ||
621 | hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 703 | hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
622 | timer->bg_timer.function = kvm_bg_timer_expire; | 704 | timer->bg_timer.function = kvm_bg_timer_expire; |
623 | 705 | ||
624 | hrtimer_init(&timer->phys_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 706 | hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
625 | timer->phys_timer.function = kvm_phys_timer_expire; | 707 | hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
708 | vtimer->hrtimer.function = kvm_hrtimer_expire; | ||
709 | ptimer->hrtimer.function = kvm_hrtimer_expire; | ||
626 | 710 | ||
627 | vtimer->irq.irq = default_vtimer_irq.irq; | 711 | vtimer->irq.irq = default_vtimer_irq.irq; |
628 | ptimer->irq.irq = default_ptimer_irq.irq; | 712 | ptimer->irq.irq = default_ptimer_irq.irq; |
713 | |||
714 | vtimer->host_timer_irq = host_vtimer_irq; | ||
715 | ptimer->host_timer_irq = host_ptimer_irq; | ||
716 | |||
717 | vtimer->host_timer_irq_flags = host_vtimer_irq_flags; | ||
718 | ptimer->host_timer_irq_flags = host_ptimer_irq_flags; | ||
719 | |||
720 | vtimer->vcpu = vcpu; | ||
721 | ptimer->vcpu = vcpu; | ||
629 | } | 722 | } |
630 | 723 | ||
631 | static void kvm_timer_init_interrupt(void *info) | 724 | static void kvm_timer_init_interrupt(void *info) |
632 | { | 725 | { |
633 | enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); | 726 | enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); |
727 | enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags); | ||
634 | } | 728 | } |
635 | 729 | ||
636 | int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) | 730 | int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) |
637 | { | 731 | { |
638 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 732 | struct arch_timer_context *timer; |
639 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | 733 | bool level; |
640 | 734 | ||
641 | switch (regid) { | 735 | switch (regid) { |
642 | case KVM_REG_ARM_TIMER_CTL: | 736 | case KVM_REG_ARM_TIMER_CTL: |
643 | vtimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT; | 737 | timer = vcpu_vtimer(vcpu); |
738 | kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); | ||
644 | break; | 739 | break; |
645 | case KVM_REG_ARM_TIMER_CNT: | 740 | case KVM_REG_ARM_TIMER_CNT: |
741 | timer = vcpu_vtimer(vcpu); | ||
646 | update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value); | 742 | update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value); |
647 | break; | 743 | break; |
648 | case KVM_REG_ARM_TIMER_CVAL: | 744 | case KVM_REG_ARM_TIMER_CVAL: |
649 | vtimer->cnt_cval = value; | 745 | timer = vcpu_vtimer(vcpu); |
746 | kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); | ||
650 | break; | 747 | break; |
651 | case KVM_REG_ARM_PTIMER_CTL: | 748 | case KVM_REG_ARM_PTIMER_CTL: |
652 | ptimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT; | 749 | timer = vcpu_ptimer(vcpu); |
750 | kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); | ||
653 | break; | 751 | break; |
654 | case KVM_REG_ARM_PTIMER_CVAL: | 752 | case KVM_REG_ARM_PTIMER_CVAL: |
655 | ptimer->cnt_cval = value; | 753 | timer = vcpu_ptimer(vcpu); |
754 | kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); | ||
656 | break; | 755 | break; |
657 | 756 | ||
658 | default: | 757 | default: |
659 | return -1; | 758 | return -1; |
660 | } | 759 | } |
661 | 760 | ||
662 | kvm_timer_update_state(vcpu); | 761 | level = kvm_timer_should_fire(timer); |
762 | kvm_timer_update_irq(vcpu, level, timer); | ||
763 | timer_emulate(timer); | ||
764 | |||
663 | return 0; | 765 | return 0; |
664 | } | 766 | } |
665 | 767 | ||
@@ -679,26 +781,113 @@ static u64 read_timer_ctl(struct arch_timer_context *timer) | |||
679 | 781 | ||
680 | u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) | 782 | u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) |
681 | { | 783 | { |
682 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
683 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | ||
684 | |||
685 | switch (regid) { | 784 | switch (regid) { |
686 | case KVM_REG_ARM_TIMER_CTL: | 785 | case KVM_REG_ARM_TIMER_CTL: |
687 | return read_timer_ctl(vtimer); | 786 | return kvm_arm_timer_read(vcpu, |
787 | vcpu_vtimer(vcpu), TIMER_REG_CTL); | ||
688 | case KVM_REG_ARM_TIMER_CNT: | 788 | case KVM_REG_ARM_TIMER_CNT: |
689 | return kvm_phys_timer_read() - vtimer->cntvoff; | 789 | return kvm_arm_timer_read(vcpu, |
790 | vcpu_vtimer(vcpu), TIMER_REG_CNT); | ||
690 | case KVM_REG_ARM_TIMER_CVAL: | 791 | case KVM_REG_ARM_TIMER_CVAL: |
691 | return vtimer->cnt_cval; | 792 | return kvm_arm_timer_read(vcpu, |
793 | vcpu_vtimer(vcpu), TIMER_REG_CVAL); | ||
692 | case KVM_REG_ARM_PTIMER_CTL: | 794 | case KVM_REG_ARM_PTIMER_CTL: |
693 | return read_timer_ctl(ptimer); | 795 | return kvm_arm_timer_read(vcpu, |
694 | case KVM_REG_ARM_PTIMER_CVAL: | 796 | vcpu_ptimer(vcpu), TIMER_REG_CTL); |
695 | return ptimer->cnt_cval; | ||
696 | case KVM_REG_ARM_PTIMER_CNT: | 797 | case KVM_REG_ARM_PTIMER_CNT: |
697 | return kvm_phys_timer_read(); | 798 | return kvm_arm_timer_read(vcpu, |
799 | vcpu_vtimer(vcpu), TIMER_REG_CNT); | ||
800 | case KVM_REG_ARM_PTIMER_CVAL: | ||
801 | return kvm_arm_timer_read(vcpu, | ||
802 | vcpu_ptimer(vcpu), TIMER_REG_CVAL); | ||
698 | } | 803 | } |
699 | return (u64)-1; | 804 | return (u64)-1; |
700 | } | 805 | } |
701 | 806 | ||
807 | static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, | ||
808 | struct arch_timer_context *timer, | ||
809 | enum kvm_arch_timer_regs treg) | ||
810 | { | ||
811 | u64 val; | ||
812 | |||
813 | switch (treg) { | ||
814 | case TIMER_REG_TVAL: | ||
815 | val = kvm_phys_timer_read() - timer->cntvoff - timer->cnt_cval; | ||
816 | break; | ||
817 | |||
818 | case TIMER_REG_CTL: | ||
819 | val = read_timer_ctl(timer); | ||
820 | break; | ||
821 | |||
822 | case TIMER_REG_CVAL: | ||
823 | val = timer->cnt_cval; | ||
824 | break; | ||
825 | |||
826 | case TIMER_REG_CNT: | ||
827 | val = kvm_phys_timer_read() - timer->cntvoff; | ||
828 | break; | ||
829 | |||
830 | default: | ||
831 | BUG(); | ||
832 | } | ||
833 | |||
834 | return val; | ||
835 | } | ||
836 | |||
837 | u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, | ||
838 | enum kvm_arch_timers tmr, | ||
839 | enum kvm_arch_timer_regs treg) | ||
840 | { | ||
841 | u64 val; | ||
842 | |||
843 | preempt_disable(); | ||
844 | kvm_timer_vcpu_put(vcpu); | ||
845 | |||
846 | val = kvm_arm_timer_read(vcpu, vcpu_get_timer(vcpu, tmr), treg); | ||
847 | |||
848 | kvm_timer_vcpu_load(vcpu); | ||
849 | preempt_enable(); | ||
850 | |||
851 | return val; | ||
852 | } | ||
853 | |||
854 | static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, | ||
855 | struct arch_timer_context *timer, | ||
856 | enum kvm_arch_timer_regs treg, | ||
857 | u64 val) | ||
858 | { | ||
859 | switch (treg) { | ||
860 | case TIMER_REG_TVAL: | ||
861 | timer->cnt_cval = val - kvm_phys_timer_read() - timer->cntvoff; | ||
862 | break; | ||
863 | |||
864 | case TIMER_REG_CTL: | ||
865 | timer->cnt_ctl = val & ~ARCH_TIMER_CTRL_IT_STAT; | ||
866 | break; | ||
867 | |||
868 | case TIMER_REG_CVAL: | ||
869 | timer->cnt_cval = val; | ||
870 | break; | ||
871 | |||
872 | default: | ||
873 | BUG(); | ||
874 | } | ||
875 | } | ||
876 | |||
877 | void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, | ||
878 | enum kvm_arch_timers tmr, | ||
879 | enum kvm_arch_timer_regs treg, | ||
880 | u64 val) | ||
881 | { | ||
882 | preempt_disable(); | ||
883 | kvm_timer_vcpu_put(vcpu); | ||
884 | |||
885 | kvm_arm_timer_write(vcpu, vcpu_get_timer(vcpu, tmr), treg, val); | ||
886 | |||
887 | kvm_timer_vcpu_load(vcpu); | ||
888 | preempt_enable(); | ||
889 | } | ||
890 | |||
702 | static int kvm_timer_starting_cpu(unsigned int cpu) | 891 | static int kvm_timer_starting_cpu(unsigned int cpu) |
703 | { | 892 | { |
704 | kvm_timer_init_interrupt(NULL); | 893 | kvm_timer_init_interrupt(NULL); |
@@ -724,6 +913,8 @@ int kvm_timer_hyp_init(bool has_gic) | |||
724 | return -ENODEV; | 913 | return -ENODEV; |
725 | } | 914 | } |
726 | 915 | ||
916 | /* First, do the virtual EL1 timer irq */ | ||
917 | |||
727 | if (info->virtual_irq <= 0) { | 918 | if (info->virtual_irq <= 0) { |
728 | kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", | 919 | kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", |
729 | info->virtual_irq); | 920 | info->virtual_irq); |
@@ -734,15 +925,15 @@ int kvm_timer_hyp_init(bool has_gic) | |||
734 | host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq); | 925 | host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq); |
735 | if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH && | 926 | if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH && |
736 | host_vtimer_irq_flags != IRQF_TRIGGER_LOW) { | 927 | host_vtimer_irq_flags != IRQF_TRIGGER_LOW) { |
737 | kvm_err("Invalid trigger for IRQ%d, assuming level low\n", | 928 | kvm_err("Invalid trigger for vtimer IRQ%d, assuming level low\n", |
738 | host_vtimer_irq); | 929 | host_vtimer_irq); |
739 | host_vtimer_irq_flags = IRQF_TRIGGER_LOW; | 930 | host_vtimer_irq_flags = IRQF_TRIGGER_LOW; |
740 | } | 931 | } |
741 | 932 | ||
742 | err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, | 933 | err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, |
743 | "kvm guest timer", kvm_get_running_vcpus()); | 934 | "kvm guest vtimer", kvm_get_running_vcpus()); |
744 | if (err) { | 935 | if (err) { |
745 | kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n", | 936 | kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n", |
746 | host_vtimer_irq, err); | 937 | host_vtimer_irq, err); |
747 | return err; | 938 | return err; |
748 | } | 939 | } |
@@ -760,6 +951,43 @@ int kvm_timer_hyp_init(bool has_gic) | |||
760 | 951 | ||
761 | kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq); | 952 | kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq); |
762 | 953 | ||
954 | /* Now let's do the physical EL1 timer irq */ | ||
955 | |||
956 | if (info->physical_irq > 0) { | ||
957 | host_ptimer_irq = info->physical_irq; | ||
958 | host_ptimer_irq_flags = irq_get_trigger_type(host_ptimer_irq); | ||
959 | if (host_ptimer_irq_flags != IRQF_TRIGGER_HIGH && | ||
960 | host_ptimer_irq_flags != IRQF_TRIGGER_LOW) { | ||
961 | kvm_err("Invalid trigger for ptimer IRQ%d, assuming level low\n", | ||
962 | host_ptimer_irq); | ||
963 | host_ptimer_irq_flags = IRQF_TRIGGER_LOW; | ||
964 | } | ||
965 | |||
966 | err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler, | ||
967 | "kvm guest ptimer", kvm_get_running_vcpus()); | ||
968 | if (err) { | ||
969 | kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n", | ||
970 | host_ptimer_irq, err); | ||
971 | return err; | ||
972 | } | ||
973 | |||
974 | if (has_gic) { | ||
975 | err = irq_set_vcpu_affinity(host_ptimer_irq, | ||
976 | kvm_get_running_vcpus()); | ||
977 | if (err) { | ||
978 | kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); | ||
979 | goto out_free_irq; | ||
980 | } | ||
981 | } | ||
982 | |||
983 | kvm_debug("physical timer IRQ%d\n", host_ptimer_irq); | ||
984 | } else if (has_vhe()) { | ||
985 | kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n", | ||
986 | info->physical_irq); | ||
987 | err = -ENODEV; | ||
988 | goto out_free_irq; | ||
989 | } | ||
990 | |||
763 | cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING, | 991 | cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING, |
764 | "kvm/arm/timer:starting", kvm_timer_starting_cpu, | 992 | "kvm/arm/timer:starting", kvm_timer_starting_cpu, |
765 | kvm_timer_dying_cpu); | 993 | kvm_timer_dying_cpu); |
@@ -771,7 +999,7 @@ out_free_irq: | |||
771 | 999 | ||
772 | void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) | 1000 | void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) |
773 | { | 1001 | { |
774 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 1002 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
775 | 1003 | ||
776 | soft_timer_cancel(&timer->bg_timer); | 1004 | soft_timer_cancel(&timer->bg_timer); |
777 | } | 1005 | } |
@@ -807,16 +1035,18 @@ bool kvm_arch_timer_get_input_level(int vintid) | |||
807 | 1035 | ||
808 | if (vintid == vcpu_vtimer(vcpu)->irq.irq) | 1036 | if (vintid == vcpu_vtimer(vcpu)->irq.irq) |
809 | timer = vcpu_vtimer(vcpu); | 1037 | timer = vcpu_vtimer(vcpu); |
1038 | else if (vintid == vcpu_ptimer(vcpu)->irq.irq) | ||
1039 | timer = vcpu_ptimer(vcpu); | ||
810 | else | 1040 | else |
811 | BUG(); /* We only map the vtimer so far */ | 1041 | BUG(); |
812 | 1042 | ||
813 | return kvm_timer_should_fire(timer); | 1043 | return kvm_timer_should_fire(timer); |
814 | } | 1044 | } |
815 | 1045 | ||
816 | int kvm_timer_enable(struct kvm_vcpu *vcpu) | 1046 | int kvm_timer_enable(struct kvm_vcpu *vcpu) |
817 | { | 1047 | { |
818 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 1048 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
819 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 1049 | struct timer_map map; |
820 | int ret; | 1050 | int ret; |
821 | 1051 | ||
822 | if (timer->enabled) | 1052 | if (timer->enabled) |
@@ -834,19 +1064,33 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) | |||
834 | return -EINVAL; | 1064 | return -EINVAL; |
835 | } | 1065 | } |
836 | 1066 | ||
837 | ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq, | 1067 | get_timer_map(vcpu, &map); |
1068 | |||
1069 | ret = kvm_vgic_map_phys_irq(vcpu, | ||
1070 | map.direct_vtimer->host_timer_irq, | ||
1071 | map.direct_vtimer->irq.irq, | ||
838 | kvm_arch_timer_get_input_level); | 1072 | kvm_arch_timer_get_input_level); |
839 | if (ret) | 1073 | if (ret) |
840 | return ret; | 1074 | return ret; |
841 | 1075 | ||
1076 | if (map.direct_ptimer) { | ||
1077 | ret = kvm_vgic_map_phys_irq(vcpu, | ||
1078 | map.direct_ptimer->host_timer_irq, | ||
1079 | map.direct_ptimer->irq.irq, | ||
1080 | kvm_arch_timer_get_input_level); | ||
1081 | } | ||
1082 | |||
1083 | if (ret) | ||
1084 | return ret; | ||
1085 | |||
842 | no_vgic: | 1086 | no_vgic: |
843 | timer->enabled = 1; | 1087 | timer->enabled = 1; |
844 | return 0; | 1088 | return 0; |
845 | } | 1089 | } |
846 | 1090 | ||
847 | /* | 1091 | /* |
848 | * On VHE system, we only need to configure trap on physical timer and counter | 1092 | * On VHE system, we only need to configure the EL2 timer trap register once, |
849 | * accesses in EL0 and EL1 once, not for every world switch. | 1093 | * not for every world switch. |
850 | * The host kernel runs at EL2 with HCR_EL2.TGE == 1, | 1094 | * The host kernel runs at EL2 with HCR_EL2.TGE == 1, |
851 | * and this makes those bits have no effect for the host kernel execution. | 1095 | * and this makes those bits have no effect for the host kernel execution. |
852 | */ | 1096 | */ |
@@ -857,11 +1101,11 @@ void kvm_timer_init_vhe(void) | |||
857 | u64 val; | 1101 | u64 val; |
858 | 1102 | ||
859 | /* | 1103 | /* |
860 | * Disallow physical timer access for the guest. | 1104 | * VHE systems allow the guest direct access to the EL1 physical |
861 | * Physical counter access is allowed. | 1105 | * timer/counter. |
862 | */ | 1106 | */ |
863 | val = read_sysreg(cnthctl_el2); | 1107 | val = read_sysreg(cnthctl_el2); |
864 | val &= ~(CNTHCTL_EL1PCEN << cnthctl_shift); | 1108 | val |= (CNTHCTL_EL1PCEN << cnthctl_shift); |
865 | val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); | 1109 | val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); |
866 | write_sysreg(val, cnthctl_el2); | 1110 | write_sysreg(val, cnthctl_el2); |
867 | } | 1111 | } |
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 9c486fad3f9f..99c37384ba7b 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c | |||
@@ -65,7 +65,6 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu); | |||
65 | /* The VMID used in the VTTBR */ | 65 | /* The VMID used in the VTTBR */ |
66 | static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); | 66 | static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); |
67 | static u32 kvm_next_vmid; | 67 | static u32 kvm_next_vmid; |
68 | static unsigned int kvm_vmid_bits __read_mostly; | ||
69 | static DEFINE_SPINLOCK(kvm_vmid_lock); | 68 | static DEFINE_SPINLOCK(kvm_vmid_lock); |
70 | 69 | ||
71 | static bool vgic_present; | 70 | static bool vgic_present; |
@@ -142,7 +141,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
142 | kvm_vgic_early_init(kvm); | 141 | kvm_vgic_early_init(kvm); |
143 | 142 | ||
144 | /* Mark the initial VMID generation invalid */ | 143 | /* Mark the initial VMID generation invalid */ |
145 | kvm->arch.vmid_gen = 0; | 144 | kvm->arch.vmid.vmid_gen = 0; |
146 | 145 | ||
147 | /* The maximum number of VCPUs is limited by the host's GIC model */ | 146 | /* The maximum number of VCPUs is limited by the host's GIC model */ |
148 | kvm->arch.max_vcpus = vgic_present ? | 147 | kvm->arch.max_vcpus = vgic_present ? |
@@ -336,13 +335,11 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | |||
336 | 335 | ||
337 | void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) | 336 | void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) |
338 | { | 337 | { |
339 | kvm_timer_schedule(vcpu); | ||
340 | kvm_vgic_v4_enable_doorbell(vcpu); | 338 | kvm_vgic_v4_enable_doorbell(vcpu); |
341 | } | 339 | } |
342 | 340 | ||
343 | void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) | 341 | void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) |
344 | { | 342 | { |
345 | kvm_timer_unschedule(vcpu); | ||
346 | kvm_vgic_v4_disable_doorbell(vcpu); | 343 | kvm_vgic_v4_disable_doorbell(vcpu); |
347 | } | 344 | } |
348 | 345 | ||
@@ -472,37 +469,31 @@ void force_vm_exit(const cpumask_t *mask) | |||
472 | 469 | ||
473 | /** | 470 | /** |
474 | * need_new_vmid_gen - check that the VMID is still valid | 471 | * need_new_vmid_gen - check that the VMID is still valid |
475 | * @kvm: The VM's VMID to check | 472 | * @vmid: The VMID to check |
476 | * | 473 | * |
477 | * return true if there is a new generation of VMIDs being used | 474 | * return true if there is a new generation of VMIDs being used |
478 | * | 475 | * |
479 | * The hardware supports only 256 values with the value zero reserved for the | 476 | * The hardware supports a limited set of values with the value zero reserved |
480 | * host, so we check if an assigned value belongs to a previous generation, | 477 | * for the host, so we check if an assigned value belongs to a previous |
481 | * which which requires us to assign a new value. If we're the first to use a | 478 | * generation, which which requires us to assign a new value. If we're the |
482 | * VMID for the new generation, we must flush necessary caches and TLBs on all | 479 | * first to use a VMID for the new generation, we must flush necessary caches |
483 | * CPUs. | 480 | * and TLBs on all CPUs. |
484 | */ | 481 | */ |
485 | static bool need_new_vmid_gen(struct kvm *kvm) | 482 | static bool need_new_vmid_gen(struct kvm_vmid *vmid) |
486 | { | 483 | { |
487 | u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen); | 484 | u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen); |
488 | smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */ | 485 | smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */ |
489 | return unlikely(READ_ONCE(kvm->arch.vmid_gen) != current_vmid_gen); | 486 | return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen); |
490 | } | 487 | } |
491 | 488 | ||
492 | /** | 489 | /** |
493 | * update_vttbr - Update the VTTBR with a valid VMID before the guest runs | 490 | * update_vmid - Update the vmid with a valid VMID for the current generation |
494 | * @kvm The guest that we are about to run | 491 | * @kvm: The guest that struct vmid belongs to |
495 | * | 492 | * @vmid: The stage-2 VMID information struct |
496 | * Called from kvm_arch_vcpu_ioctl_run before entering the guest to ensure the | ||
497 | * VM has a valid VMID, otherwise assigns a new one and flushes corresponding | ||
498 | * caches and TLBs. | ||
499 | */ | 493 | */ |
500 | static void update_vttbr(struct kvm *kvm) | 494 | static void update_vmid(struct kvm_vmid *vmid) |
501 | { | 495 | { |
502 | phys_addr_t pgd_phys; | 496 | if (!need_new_vmid_gen(vmid)) |
503 | u64 vmid, cnp = kvm_cpu_has_cnp() ? VTTBR_CNP_BIT : 0; | ||
504 | |||
505 | if (!need_new_vmid_gen(kvm)) | ||
506 | return; | 497 | return; |
507 | 498 | ||
508 | spin_lock(&kvm_vmid_lock); | 499 | spin_lock(&kvm_vmid_lock); |
@@ -512,7 +503,7 @@ static void update_vttbr(struct kvm *kvm) | |||
512 | * already allocated a valid vmid for this vm, then this vcpu should | 503 | * already allocated a valid vmid for this vm, then this vcpu should |
513 | * use the same vmid. | 504 | * use the same vmid. |
514 | */ | 505 | */ |
515 | if (!need_new_vmid_gen(kvm)) { | 506 | if (!need_new_vmid_gen(vmid)) { |
516 | spin_unlock(&kvm_vmid_lock); | 507 | spin_unlock(&kvm_vmid_lock); |
517 | return; | 508 | return; |
518 | } | 509 | } |
@@ -536,18 +527,12 @@ static void update_vttbr(struct kvm *kvm) | |||
536 | kvm_call_hyp(__kvm_flush_vm_context); | 527 | kvm_call_hyp(__kvm_flush_vm_context); |
537 | } | 528 | } |
538 | 529 | ||
539 | kvm->arch.vmid = kvm_next_vmid; | 530 | vmid->vmid = kvm_next_vmid; |
540 | kvm_next_vmid++; | 531 | kvm_next_vmid++; |
541 | kvm_next_vmid &= (1 << kvm_vmid_bits) - 1; | 532 | kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1; |
542 | |||
543 | /* update vttbr to be used with the new vmid */ | ||
544 | pgd_phys = virt_to_phys(kvm->arch.pgd); | ||
545 | BUG_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm)); | ||
546 | vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); | ||
547 | kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid | cnp; | ||
548 | 533 | ||
549 | smp_wmb(); | 534 | smp_wmb(); |
550 | WRITE_ONCE(kvm->arch.vmid_gen, atomic64_read(&kvm_vmid_gen)); | 535 | WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen)); |
551 | 536 | ||
552 | spin_unlock(&kvm_vmid_lock); | 537 | spin_unlock(&kvm_vmid_lock); |
553 | } | 538 | } |
@@ -700,7 +685,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
700 | */ | 685 | */ |
701 | cond_resched(); | 686 | cond_resched(); |
702 | 687 | ||
703 | update_vttbr(vcpu->kvm); | 688 | update_vmid(&vcpu->kvm->arch.vmid); |
704 | 689 | ||
705 | check_vcpu_requests(vcpu); | 690 | check_vcpu_requests(vcpu); |
706 | 691 | ||
@@ -749,7 +734,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
749 | */ | 734 | */ |
750 | smp_store_mb(vcpu->mode, IN_GUEST_MODE); | 735 | smp_store_mb(vcpu->mode, IN_GUEST_MODE); |
751 | 736 | ||
752 | if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || | 737 | if (ret <= 0 || need_new_vmid_gen(&vcpu->kvm->arch.vmid) || |
753 | kvm_request_pending(vcpu)) { | 738 | kvm_request_pending(vcpu)) { |
754 | vcpu->mode = OUTSIDE_GUEST_MODE; | 739 | vcpu->mode = OUTSIDE_GUEST_MODE; |
755 | isb(); /* Ensure work in x_flush_hwstate is committed */ | 740 | isb(); /* Ensure work in x_flush_hwstate is committed */ |
@@ -775,7 +760,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
775 | ret = kvm_vcpu_run_vhe(vcpu); | 760 | ret = kvm_vcpu_run_vhe(vcpu); |
776 | kvm_arm_vhe_guest_exit(); | 761 | kvm_arm_vhe_guest_exit(); |
777 | } else { | 762 | } else { |
778 | ret = kvm_call_hyp(__kvm_vcpu_run_nvhe, vcpu); | 763 | ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu); |
779 | } | 764 | } |
780 | 765 | ||
781 | vcpu->mode = OUTSIDE_GUEST_MODE; | 766 | vcpu->mode = OUTSIDE_GUEST_MODE; |
@@ -1427,10 +1412,6 @@ static inline void hyp_cpu_pm_exit(void) | |||
1427 | 1412 | ||
1428 | static int init_common_resources(void) | 1413 | static int init_common_resources(void) |
1429 | { | 1414 | { |
1430 | /* set size of VMID supported by CPU */ | ||
1431 | kvm_vmid_bits = kvm_get_vmid_bits(); | ||
1432 | kvm_info("%d-bit VMID\n", kvm_vmid_bits); | ||
1433 | |||
1434 | kvm_set_ipa_limit(); | 1415 | kvm_set_ipa_limit(); |
1435 | 1416 | ||
1436 | return 0; | 1417 | return 0; |
@@ -1571,6 +1552,7 @@ static int init_hyp_mode(void) | |||
1571 | kvm_cpu_context_t *cpu_ctxt; | 1552 | kvm_cpu_context_t *cpu_ctxt; |
1572 | 1553 | ||
1573 | cpu_ctxt = per_cpu_ptr(&kvm_host_cpu_state, cpu); | 1554 | cpu_ctxt = per_cpu_ptr(&kvm_host_cpu_state, cpu); |
1555 | kvm_init_host_cpu_context(cpu_ctxt, cpu); | ||
1574 | err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP); | 1556 | err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP); |
1575 | 1557 | ||
1576 | if (err) { | 1558 | if (err) { |
@@ -1581,7 +1563,7 @@ static int init_hyp_mode(void) | |||
1581 | 1563 | ||
1582 | err = hyp_map_aux_data(); | 1564 | err = hyp_map_aux_data(); |
1583 | if (err) | 1565 | if (err) |
1584 | kvm_err("Cannot map host auxilary data: %d\n", err); | 1566 | kvm_err("Cannot map host auxiliary data: %d\n", err); |
1585 | 1567 | ||
1586 | return 0; | 1568 | return 0; |
1587 | 1569 | ||
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 9652c453480f..264d92da3240 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c | |||
@@ -226,7 +226,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) | |||
226 | int i; | 226 | int i; |
227 | u32 elrsr; | 227 | u32 elrsr; |
228 | 228 | ||
229 | elrsr = read_gicreg(ICH_ELSR_EL2); | 229 | elrsr = read_gicreg(ICH_ELRSR_EL2); |
230 | 230 | ||
231 | write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2); | 231 | write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2); |
232 | 232 | ||
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index e9d28a7ca673..ffd7acdceac7 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c | |||
@@ -908,6 +908,7 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, | |||
908 | */ | 908 | */ |
909 | int kvm_alloc_stage2_pgd(struct kvm *kvm) | 909 | int kvm_alloc_stage2_pgd(struct kvm *kvm) |
910 | { | 910 | { |
911 | phys_addr_t pgd_phys; | ||
911 | pgd_t *pgd; | 912 | pgd_t *pgd; |
912 | 913 | ||
913 | if (kvm->arch.pgd != NULL) { | 914 | if (kvm->arch.pgd != NULL) { |
@@ -920,7 +921,12 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) | |||
920 | if (!pgd) | 921 | if (!pgd) |
921 | return -ENOMEM; | 922 | return -ENOMEM; |
922 | 923 | ||
924 | pgd_phys = virt_to_phys(pgd); | ||
925 | if (WARN_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm))) | ||
926 | return -EINVAL; | ||
927 | |||
923 | kvm->arch.pgd = pgd; | 928 | kvm->arch.pgd = pgd; |
929 | kvm->arch.pgd_phys = pgd_phys; | ||
924 | return 0; | 930 | return 0; |
925 | } | 931 | } |
926 | 932 | ||
@@ -1008,6 +1014,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm) | |||
1008 | unmap_stage2_range(kvm, 0, kvm_phys_size(kvm)); | 1014 | unmap_stage2_range(kvm, 0, kvm_phys_size(kvm)); |
1009 | pgd = READ_ONCE(kvm->arch.pgd); | 1015 | pgd = READ_ONCE(kvm->arch.pgd); |
1010 | kvm->arch.pgd = NULL; | 1016 | kvm->arch.pgd = NULL; |
1017 | kvm->arch.pgd_phys = 0; | ||
1011 | } | 1018 | } |
1012 | spin_unlock(&kvm->mmu_lock); | 1019 | spin_unlock(&kvm->mmu_lock); |
1013 | 1020 | ||
@@ -1396,14 +1403,6 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap) | |||
1396 | return false; | 1403 | return false; |
1397 | } | 1404 | } |
1398 | 1405 | ||
1399 | static bool kvm_is_write_fault(struct kvm_vcpu *vcpu) | ||
1400 | { | ||
1401 | if (kvm_vcpu_trap_is_iabt(vcpu)) | ||
1402 | return false; | ||
1403 | |||
1404 | return kvm_vcpu_dabt_iswrite(vcpu); | ||
1405 | } | ||
1406 | |||
1407 | /** | 1406 | /** |
1408 | * stage2_wp_ptes - write protect PMD range | 1407 | * stage2_wp_ptes - write protect PMD range |
1409 | * @pmd: pointer to pmd entry | 1408 | * @pmd: pointer to pmd entry |
@@ -1598,14 +1597,13 @@ static void kvm_send_hwpoison_signal(unsigned long address, | |||
1598 | static bool fault_supports_stage2_pmd_mappings(struct kvm_memory_slot *memslot, | 1597 | static bool fault_supports_stage2_pmd_mappings(struct kvm_memory_slot *memslot, |
1599 | unsigned long hva) | 1598 | unsigned long hva) |
1600 | { | 1599 | { |
1601 | gpa_t gpa_start, gpa_end; | 1600 | gpa_t gpa_start; |
1602 | hva_t uaddr_start, uaddr_end; | 1601 | hva_t uaddr_start, uaddr_end; |
1603 | size_t size; | 1602 | size_t size; |
1604 | 1603 | ||
1605 | size = memslot->npages * PAGE_SIZE; | 1604 | size = memslot->npages * PAGE_SIZE; |
1606 | 1605 | ||
1607 | gpa_start = memslot->base_gfn << PAGE_SHIFT; | 1606 | gpa_start = memslot->base_gfn << PAGE_SHIFT; |
1608 | gpa_end = gpa_start + size; | ||
1609 | 1607 | ||
1610 | uaddr_start = memslot->userspace_addr; | 1608 | uaddr_start = memslot->userspace_addr; |
1611 | uaddr_end = uaddr_start + size; | 1609 | uaddr_end = uaddr_start + size; |
@@ -2353,7 +2351,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | |||
2353 | return 0; | 2351 | return 0; |
2354 | } | 2352 | } |
2355 | 2353 | ||
2356 | void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) | 2354 | void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) |
2357 | { | 2355 | { |
2358 | } | 2356 | } |
2359 | 2357 | ||
diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h index 3828beab93f2..204d210d01c2 100644 --- a/virt/kvm/arm/trace.h +++ b/virt/kvm/arm/trace.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) | 2 | #if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) |
3 | #define _TRACE_KVM_H | 3 | #define _TRACE_KVM_H |
4 | 4 | ||
5 | #include <kvm/arm_arch_timer.h> | ||
5 | #include <linux/tracepoint.h> | 6 | #include <linux/tracepoint.h> |
6 | 7 | ||
7 | #undef TRACE_SYSTEM | 8 | #undef TRACE_SYSTEM |
@@ -262,10 +263,114 @@ TRACE_EVENT(kvm_timer_update_irq, | |||
262 | __entry->vcpu_id, __entry->irq, __entry->level) | 263 | __entry->vcpu_id, __entry->irq, __entry->level) |
263 | ); | 264 | ); |
264 | 265 | ||
266 | TRACE_EVENT(kvm_get_timer_map, | ||
267 | TP_PROTO(unsigned long vcpu_id, struct timer_map *map), | ||
268 | TP_ARGS(vcpu_id, map), | ||
269 | |||
270 | TP_STRUCT__entry( | ||
271 | __field( unsigned long, vcpu_id ) | ||
272 | __field( int, direct_vtimer ) | ||
273 | __field( int, direct_ptimer ) | ||
274 | __field( int, emul_ptimer ) | ||
275 | ), | ||
276 | |||
277 | TP_fast_assign( | ||
278 | __entry->vcpu_id = vcpu_id; | ||
279 | __entry->direct_vtimer = arch_timer_ctx_index(map->direct_vtimer); | ||
280 | __entry->direct_ptimer = | ||
281 | (map->direct_ptimer) ? arch_timer_ctx_index(map->direct_ptimer) : -1; | ||
282 | __entry->emul_ptimer = | ||
283 | (map->emul_ptimer) ? arch_timer_ctx_index(map->emul_ptimer) : -1; | ||
284 | ), | ||
285 | |||
286 | TP_printk("VCPU: %ld, dv: %d, dp: %d, ep: %d", | ||
287 | __entry->vcpu_id, | ||
288 | __entry->direct_vtimer, | ||
289 | __entry->direct_ptimer, | ||
290 | __entry->emul_ptimer) | ||
291 | ); | ||
292 | |||
293 | TRACE_EVENT(kvm_timer_save_state, | ||
294 | TP_PROTO(struct arch_timer_context *ctx), | ||
295 | TP_ARGS(ctx), | ||
296 | |||
297 | TP_STRUCT__entry( | ||
298 | __field( unsigned long, ctl ) | ||
299 | __field( unsigned long long, cval ) | ||
300 | __field( int, timer_idx ) | ||
301 | ), | ||
302 | |||
303 | TP_fast_assign( | ||
304 | __entry->ctl = ctx->cnt_ctl; | ||
305 | __entry->cval = ctx->cnt_cval; | ||
306 | __entry->timer_idx = arch_timer_ctx_index(ctx); | ||
307 | ), | ||
308 | |||
309 | TP_printk(" CTL: %#08lx CVAL: %#16llx arch_timer_ctx_index: %d", | ||
310 | __entry->ctl, | ||
311 | __entry->cval, | ||
312 | __entry->timer_idx) | ||
313 | ); | ||
314 | |||
315 | TRACE_EVENT(kvm_timer_restore_state, | ||
316 | TP_PROTO(struct arch_timer_context *ctx), | ||
317 | TP_ARGS(ctx), | ||
318 | |||
319 | TP_STRUCT__entry( | ||
320 | __field( unsigned long, ctl ) | ||
321 | __field( unsigned long long, cval ) | ||
322 | __field( int, timer_idx ) | ||
323 | ), | ||
324 | |||
325 | TP_fast_assign( | ||
326 | __entry->ctl = ctx->cnt_ctl; | ||
327 | __entry->cval = ctx->cnt_cval; | ||
328 | __entry->timer_idx = arch_timer_ctx_index(ctx); | ||
329 | ), | ||
330 | |||
331 | TP_printk("CTL: %#08lx CVAL: %#16llx arch_timer_ctx_index: %d", | ||
332 | __entry->ctl, | ||
333 | __entry->cval, | ||
334 | __entry->timer_idx) | ||
335 | ); | ||
336 | |||
337 | TRACE_EVENT(kvm_timer_hrtimer_expire, | ||
338 | TP_PROTO(struct arch_timer_context *ctx), | ||
339 | TP_ARGS(ctx), | ||
340 | |||
341 | TP_STRUCT__entry( | ||
342 | __field( int, timer_idx ) | ||
343 | ), | ||
344 | |||
345 | TP_fast_assign( | ||
346 | __entry->timer_idx = arch_timer_ctx_index(ctx); | ||
347 | ), | ||
348 | |||
349 | TP_printk("arch_timer_ctx_index: %d", __entry->timer_idx) | ||
350 | ); | ||
351 | |||
352 | TRACE_EVENT(kvm_timer_emulate, | ||
353 | TP_PROTO(struct arch_timer_context *ctx, bool should_fire), | ||
354 | TP_ARGS(ctx, should_fire), | ||
355 | |||
356 | TP_STRUCT__entry( | ||
357 | __field( int, timer_idx ) | ||
358 | __field( bool, should_fire ) | ||
359 | ), | ||
360 | |||
361 | TP_fast_assign( | ||
362 | __entry->timer_idx = arch_timer_ctx_index(ctx); | ||
363 | __entry->should_fire = should_fire; | ||
364 | ), | ||
365 | |||
366 | TP_printk("arch_timer_ctx_index: %d (should_fire: %d)", | ||
367 | __entry->timer_idx, __entry->should_fire) | ||
368 | ); | ||
369 | |||
265 | #endif /* _TRACE_KVM_H */ | 370 | #endif /* _TRACE_KVM_H */ |
266 | 371 | ||
267 | #undef TRACE_INCLUDE_PATH | 372 | #undef TRACE_INCLUDE_PATH |
268 | #define TRACE_INCLUDE_PATH ../../../virt/kvm/arm | 373 | #define TRACE_INCLUDE_PATH ../../virt/kvm/arm |
269 | #undef TRACE_INCLUDE_FILE | 374 | #undef TRACE_INCLUDE_FILE |
270 | #define TRACE_INCLUDE_FILE trace | 375 | #define TRACE_INCLUDE_FILE trace |
271 | 376 | ||
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 4ee0aeb9a905..408a78eb6a97 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c | |||
@@ -589,7 +589,7 @@ early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); | |||
589 | */ | 589 | */ |
590 | int vgic_v3_probe(const struct gic_kvm_info *info) | 590 | int vgic_v3_probe(const struct gic_kvm_info *info) |
591 | { | 591 | { |
592 | u32 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2); | 592 | u32 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_ich_vtr_el2); |
593 | int ret; | 593 | int ret; |
594 | 594 | ||
595 | /* | 595 | /* |
@@ -679,7 +679,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu) | |||
679 | struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; | 679 | struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; |
680 | 680 | ||
681 | if (likely(cpu_if->vgic_sre)) | 681 | if (likely(cpu_if->vgic_sre)) |
682 | cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr); | 682 | cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr); |
683 | 683 | ||
684 | kvm_call_hyp(__vgic_v3_save_aprs, vcpu); | 684 | kvm_call_hyp(__vgic_v3_save_aprs, vcpu); |
685 | 685 | ||
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 6855cce3e528..5294abb3f178 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c | |||
@@ -144,7 +144,8 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, | |||
144 | if (zone->pio != 1 && zone->pio != 0) | 144 | if (zone->pio != 1 && zone->pio != 0) |
145 | return -EINVAL; | 145 | return -EINVAL; |
146 | 146 | ||
147 | dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); | 147 | dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), |
148 | GFP_KERNEL_ACCOUNT); | ||
148 | if (!dev) | 149 | if (!dev) |
149 | return -ENOMEM; | 150 | return -ENOMEM; |
150 | 151 | ||
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index b20b751286fc..4325250afd72 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
@@ -297,7 +297,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | |||
297 | if (!kvm_arch_intc_initialized(kvm)) | 297 | if (!kvm_arch_intc_initialized(kvm)) |
298 | return -EAGAIN; | 298 | return -EAGAIN; |
299 | 299 | ||
300 | irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); | 300 | irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL_ACCOUNT); |
301 | if (!irqfd) | 301 | if (!irqfd) |
302 | return -ENOMEM; | 302 | return -ENOMEM; |
303 | 303 | ||
@@ -345,7 +345,8 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | |||
345 | } | 345 | } |
346 | 346 | ||
347 | if (!irqfd->resampler) { | 347 | if (!irqfd->resampler) { |
348 | resampler = kzalloc(sizeof(*resampler), GFP_KERNEL); | 348 | resampler = kzalloc(sizeof(*resampler), |
349 | GFP_KERNEL_ACCOUNT); | ||
349 | if (!resampler) { | 350 | if (!resampler) { |
350 | ret = -ENOMEM; | 351 | ret = -ENOMEM; |
351 | mutex_unlock(&kvm->irqfds.resampler_lock); | 352 | mutex_unlock(&kvm->irqfds.resampler_lock); |
@@ -797,7 +798,7 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm, | |||
797 | if (IS_ERR(eventfd)) | 798 | if (IS_ERR(eventfd)) |
798 | return PTR_ERR(eventfd); | 799 | return PTR_ERR(eventfd); |
799 | 800 | ||
800 | p = kzalloc(sizeof(*p), GFP_KERNEL); | 801 | p = kzalloc(sizeof(*p), GFP_KERNEL_ACCOUNT); |
801 | if (!p) { | 802 | if (!p) { |
802 | ret = -ENOMEM; | 803 | ret = -ENOMEM; |
803 | goto fail; | 804 | goto fail; |
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index b1286c4e0712..3547b0d8c91e 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c | |||
@@ -196,7 +196,7 @@ int kvm_set_irq_routing(struct kvm *kvm, | |||
196 | nr_rt_entries += 1; | 196 | nr_rt_entries += 1; |
197 | 197 | ||
198 | new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)), | 198 | new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)), |
199 | GFP_KERNEL); | 199 | GFP_KERNEL_ACCOUNT); |
200 | 200 | ||
201 | if (!new) | 201 | if (!new) |
202 | return -ENOMEM; | 202 | return -ENOMEM; |
@@ -208,7 +208,7 @@ int kvm_set_irq_routing(struct kvm *kvm, | |||
208 | 208 | ||
209 | for (i = 0; i < nr; ++i) { | 209 | for (i = 0; i < nr; ++i) { |
210 | r = -ENOMEM; | 210 | r = -ENOMEM; |
211 | e = kzalloc(sizeof(*e), GFP_KERNEL); | 211 | e = kzalloc(sizeof(*e), GFP_KERNEL_ACCOUNT); |
212 | if (!e) | 212 | if (!e) |
213 | goto out; | 213 | goto out; |
214 | 214 | ||
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d237d3350a99..f25aa98a94df 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -81,6 +81,11 @@ unsigned int halt_poll_ns_grow = 2; | |||
81 | module_param(halt_poll_ns_grow, uint, 0644); | 81 | module_param(halt_poll_ns_grow, uint, 0644); |
82 | EXPORT_SYMBOL_GPL(halt_poll_ns_grow); | 82 | EXPORT_SYMBOL_GPL(halt_poll_ns_grow); |
83 | 83 | ||
84 | /* The start value to grow halt_poll_ns from */ | ||
85 | unsigned int halt_poll_ns_grow_start = 10000; /* 10us */ | ||
86 | module_param(halt_poll_ns_grow_start, uint, 0644); | ||
87 | EXPORT_SYMBOL_GPL(halt_poll_ns_grow_start); | ||
88 | |||
84 | /* Default resets per-vcpu halt_poll_ns . */ | 89 | /* Default resets per-vcpu halt_poll_ns . */ |
85 | unsigned int halt_poll_ns_shrink; | 90 | unsigned int halt_poll_ns_shrink; |
86 | module_param(halt_poll_ns_shrink, uint, 0644); | 91 | module_param(halt_poll_ns_shrink, uint, 0644); |
@@ -525,7 +530,7 @@ static struct kvm_memslots *kvm_alloc_memslots(void) | |||
525 | int i; | 530 | int i; |
526 | struct kvm_memslots *slots; | 531 | struct kvm_memslots *slots; |
527 | 532 | ||
528 | slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 533 | slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT); |
529 | if (!slots) | 534 | if (!slots) |
530 | return NULL; | 535 | return NULL; |
531 | 536 | ||
@@ -601,12 +606,12 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) | |||
601 | 606 | ||
602 | kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries, | 607 | kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries, |
603 | sizeof(*kvm->debugfs_stat_data), | 608 | sizeof(*kvm->debugfs_stat_data), |
604 | GFP_KERNEL); | 609 | GFP_KERNEL_ACCOUNT); |
605 | if (!kvm->debugfs_stat_data) | 610 | if (!kvm->debugfs_stat_data) |
606 | return -ENOMEM; | 611 | return -ENOMEM; |
607 | 612 | ||
608 | for (p = debugfs_entries; p->name; p++) { | 613 | for (p = debugfs_entries; p->name; p++) { |
609 | stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL); | 614 | stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT); |
610 | if (!stat_data) | 615 | if (!stat_data) |
611 | return -ENOMEM; | 616 | return -ENOMEM; |
612 | 617 | ||
@@ -656,12 +661,8 @@ static struct kvm *kvm_create_vm(unsigned long type) | |||
656 | struct kvm_memslots *slots = kvm_alloc_memslots(); | 661 | struct kvm_memslots *slots = kvm_alloc_memslots(); |
657 | if (!slots) | 662 | if (!slots) |
658 | goto out_err_no_srcu; | 663 | goto out_err_no_srcu; |
659 | /* | 664 | /* Generations must be different for each address space. */ |
660 | * Generations must be different for each address space. | 665 | slots->generation = i; |
661 | * Init kvm generation close to the maximum to easily test the | ||
662 | * code of handling generation number wrap-around. | ||
663 | */ | ||
664 | slots->generation = i * 2 - 150; | ||
665 | rcu_assign_pointer(kvm->memslots[i], slots); | 666 | rcu_assign_pointer(kvm->memslots[i], slots); |
666 | } | 667 | } |
667 | 668 | ||
@@ -671,7 +672,7 @@ static struct kvm *kvm_create_vm(unsigned long type) | |||
671 | goto out_err_no_irq_srcu; | 672 | goto out_err_no_irq_srcu; |
672 | for (i = 0; i < KVM_NR_BUSES; i++) { | 673 | for (i = 0; i < KVM_NR_BUSES; i++) { |
673 | rcu_assign_pointer(kvm->buses[i], | 674 | rcu_assign_pointer(kvm->buses[i], |
674 | kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL)); | 675 | kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT)); |
675 | if (!kvm->buses[i]) | 676 | if (!kvm->buses[i]) |
676 | goto out_err; | 677 | goto out_err; |
677 | } | 678 | } |
@@ -789,7 +790,7 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) | |||
789 | { | 790 | { |
790 | unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); | 791 | unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); |
791 | 792 | ||
792 | memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL); | 793 | memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL_ACCOUNT); |
793 | if (!memslot->dirty_bitmap) | 794 | if (!memslot->dirty_bitmap) |
794 | return -ENOMEM; | 795 | return -ENOMEM; |
795 | 796 | ||
@@ -874,31 +875,34 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, | |||
874 | int as_id, struct kvm_memslots *slots) | 875 | int as_id, struct kvm_memslots *slots) |
875 | { | 876 | { |
876 | struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id); | 877 | struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id); |
878 | u64 gen = old_memslots->generation; | ||
877 | 879 | ||
878 | /* | 880 | WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS); |
879 | * Set the low bit in the generation, which disables SPTE caching | 881 | slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS; |
880 | * until the end of synchronize_srcu_expedited. | ||
881 | */ | ||
882 | WARN_ON(old_memslots->generation & 1); | ||
883 | slots->generation = old_memslots->generation + 1; | ||
884 | 882 | ||
885 | rcu_assign_pointer(kvm->memslots[as_id], slots); | 883 | rcu_assign_pointer(kvm->memslots[as_id], slots); |
886 | synchronize_srcu_expedited(&kvm->srcu); | 884 | synchronize_srcu_expedited(&kvm->srcu); |
887 | 885 | ||
888 | /* | 886 | /* |
889 | * Increment the new memslot generation a second time. This prevents | 887 | * Increment the new memslot generation a second time, dropping the |
890 | * vm exits that race with memslot updates from caching a memslot | 888 | * update in-progress flag and incrementing then generation based on |
891 | * generation that will (potentially) be valid forever. | 889 | * the number of address spaces. This provides a unique and easily |
892 | * | 890 | * identifiable generation number while the memslots are in flux. |
891 | */ | ||
892 | gen = slots->generation & ~KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS; | ||
893 | |||
894 | /* | ||
893 | * Generations must be unique even across address spaces. We do not need | 895 | * Generations must be unique even across address spaces. We do not need |
894 | * a global counter for that, instead the generation space is evenly split | 896 | * a global counter for that, instead the generation space is evenly split |
895 | * across address spaces. For example, with two address spaces, address | 897 | * across address spaces. For example, with two address spaces, address |
896 | * space 0 will use generations 0, 4, 8, ... while * address space 1 will | 898 | * space 0 will use generations 0, 2, 4, ... while address space 1 will |
897 | * use generations 2, 6, 10, 14, ... | 899 | * use generations 1, 3, 5, ... |
898 | */ | 900 | */ |
899 | slots->generation += KVM_ADDRESS_SPACE_NUM * 2 - 1; | 901 | gen += KVM_ADDRESS_SPACE_NUM; |
902 | |||
903 | kvm_arch_memslots_updated(kvm, gen); | ||
900 | 904 | ||
901 | kvm_arch_memslots_updated(kvm, slots); | 905 | slots->generation = gen; |
902 | 906 | ||
903 | return old_memslots; | 907 | return old_memslots; |
904 | } | 908 | } |
@@ -1018,7 +1022,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
1018 | goto out_free; | 1022 | goto out_free; |
1019 | } | 1023 | } |
1020 | 1024 | ||
1021 | slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 1025 | slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT); |
1022 | if (!slots) | 1026 | if (!slots) |
1023 | goto out_free; | 1027 | goto out_free; |
1024 | memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots)); | 1028 | memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots)); |
@@ -1201,11 +1205,9 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, | |||
1201 | mask = xchg(&dirty_bitmap[i], 0); | 1205 | mask = xchg(&dirty_bitmap[i], 0); |
1202 | dirty_bitmap_buffer[i] = mask; | 1206 | dirty_bitmap_buffer[i] = mask; |
1203 | 1207 | ||
1204 | if (mask) { | 1208 | offset = i * BITS_PER_LONG; |
1205 | offset = i * BITS_PER_LONG; | 1209 | kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, |
1206 | kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, | 1210 | offset, mask); |
1207 | offset, mask); | ||
1208 | } | ||
1209 | } | 1211 | } |
1210 | spin_unlock(&kvm->mmu_lock); | 1212 | spin_unlock(&kvm->mmu_lock); |
1211 | } | 1213 | } |
@@ -2185,20 +2187,23 @@ void kvm_sigset_deactivate(struct kvm_vcpu *vcpu) | |||
2185 | 2187 | ||
2186 | static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) | 2188 | static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) |
2187 | { | 2189 | { |
2188 | unsigned int old, val, grow; | 2190 | unsigned int old, val, grow, grow_start; |
2189 | 2191 | ||
2190 | old = val = vcpu->halt_poll_ns; | 2192 | old = val = vcpu->halt_poll_ns; |
2193 | grow_start = READ_ONCE(halt_poll_ns_grow_start); | ||
2191 | grow = READ_ONCE(halt_poll_ns_grow); | 2194 | grow = READ_ONCE(halt_poll_ns_grow); |
2192 | /* 10us base */ | 2195 | if (!grow) |
2193 | if (val == 0 && grow) | 2196 | goto out; |
2194 | val = 10000; | 2197 | |
2195 | else | 2198 | val *= grow; |
2196 | val *= grow; | 2199 | if (val < grow_start) |
2200 | val = grow_start; | ||
2197 | 2201 | ||
2198 | if (val > halt_poll_ns) | 2202 | if (val > halt_poll_ns) |
2199 | val = halt_poll_ns; | 2203 | val = halt_poll_ns; |
2200 | 2204 | ||
2201 | vcpu->halt_poll_ns = val; | 2205 | vcpu->halt_poll_ns = val; |
2206 | out: | ||
2202 | trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); | 2207 | trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); |
2203 | } | 2208 | } |
2204 | 2209 | ||
@@ -2683,7 +2688,7 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
2683 | struct kvm_regs *kvm_regs; | 2688 | struct kvm_regs *kvm_regs; |
2684 | 2689 | ||
2685 | r = -ENOMEM; | 2690 | r = -ENOMEM; |
2686 | kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); | 2691 | kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL_ACCOUNT); |
2687 | if (!kvm_regs) | 2692 | if (!kvm_regs) |
2688 | goto out; | 2693 | goto out; |
2689 | r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); | 2694 | r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); |
@@ -2711,7 +2716,8 @@ out_free1: | |||
2711 | break; | 2716 | break; |
2712 | } | 2717 | } |
2713 | case KVM_GET_SREGS: { | 2718 | case KVM_GET_SREGS: { |
2714 | kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); | 2719 | kvm_sregs = kzalloc(sizeof(struct kvm_sregs), |
2720 | GFP_KERNEL_ACCOUNT); | ||
2715 | r = -ENOMEM; | 2721 | r = -ENOMEM; |
2716 | if (!kvm_sregs) | 2722 | if (!kvm_sregs) |
2717 | goto out; | 2723 | goto out; |
@@ -2803,7 +2809,7 @@ out_free1: | |||
2803 | break; | 2809 | break; |
2804 | } | 2810 | } |
2805 | case KVM_GET_FPU: { | 2811 | case KVM_GET_FPU: { |
2806 | fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL); | 2812 | fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL_ACCOUNT); |
2807 | r = -ENOMEM; | 2813 | r = -ENOMEM; |
2808 | if (!fpu) | 2814 | if (!fpu) |
2809 | goto out; | 2815 | goto out; |
@@ -2980,7 +2986,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm, | |||
2980 | if (test) | 2986 | if (test) |
2981 | return 0; | 2987 | return 0; |
2982 | 2988 | ||
2983 | dev = kzalloc(sizeof(*dev), GFP_KERNEL); | 2989 | dev = kzalloc(sizeof(*dev), GFP_KERNEL_ACCOUNT); |
2984 | if (!dev) | 2990 | if (!dev) |
2985 | return -ENOMEM; | 2991 | return -ENOMEM; |
2986 | 2992 | ||
@@ -3625,6 +3631,7 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, | |||
3625 | r = __kvm_io_bus_write(vcpu, bus, &range, val); | 3631 | r = __kvm_io_bus_write(vcpu, bus, &range, val); |
3626 | return r < 0 ? r : 0; | 3632 | return r < 0 ? r : 0; |
3627 | } | 3633 | } |
3634 | EXPORT_SYMBOL_GPL(kvm_io_bus_write); | ||
3628 | 3635 | ||
3629 | /* kvm_io_bus_write_cookie - called under kvm->slots_lock */ | 3636 | /* kvm_io_bus_write_cookie - called under kvm->slots_lock */ |
3630 | int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, | 3637 | int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, |
@@ -3675,7 +3682,6 @@ static int __kvm_io_bus_read(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus, | |||
3675 | 3682 | ||
3676 | return -EOPNOTSUPP; | 3683 | return -EOPNOTSUPP; |
3677 | } | 3684 | } |
3678 | EXPORT_SYMBOL_GPL(kvm_io_bus_write); | ||
3679 | 3685 | ||
3680 | /* kvm_io_bus_read - called under kvm->slots_lock */ | 3686 | /* kvm_io_bus_read - called under kvm->slots_lock */ |
3681 | int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, | 3687 | int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, |
@@ -3697,7 +3703,6 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, | |||
3697 | return r < 0 ? r : 0; | 3703 | return r < 0 ? r : 0; |
3698 | } | 3704 | } |
3699 | 3705 | ||
3700 | |||
3701 | /* Caller must hold slots_lock. */ | 3706 | /* Caller must hold slots_lock. */ |
3702 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | 3707 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
3703 | int len, struct kvm_io_device *dev) | 3708 | int len, struct kvm_io_device *dev) |
@@ -3714,8 +3719,8 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
3714 | if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) | 3719 | if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) |
3715 | return -ENOSPC; | 3720 | return -ENOSPC; |
3716 | 3721 | ||
3717 | new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count + 1) * | 3722 | new_bus = kmalloc(struct_size(bus, range, bus->dev_count + 1), |
3718 | sizeof(struct kvm_io_range)), GFP_KERNEL); | 3723 | GFP_KERNEL_ACCOUNT); |
3719 | if (!new_bus) | 3724 | if (!new_bus) |
3720 | return -ENOMEM; | 3725 | return -ENOMEM; |
3721 | 3726 | ||
@@ -3760,8 +3765,8 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | |||
3760 | if (i == bus->dev_count) | 3765 | if (i == bus->dev_count) |
3761 | return; | 3766 | return; |
3762 | 3767 | ||
3763 | new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) * | 3768 | new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1), |
3764 | sizeof(struct kvm_io_range)), GFP_KERNEL); | 3769 | GFP_KERNEL_ACCOUNT); |
3765 | if (!new_bus) { | 3770 | if (!new_bus) { |
3766 | pr_err("kvm: failed to shrink bus, removing it completely\n"); | 3771 | pr_err("kvm: failed to shrink bus, removing it completely\n"); |
3767 | goto broken; | 3772 | goto broken; |
@@ -4029,7 +4034,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) | |||
4029 | active = kvm_active_vms; | 4034 | active = kvm_active_vms; |
4030 | spin_unlock(&kvm_lock); | 4035 | spin_unlock(&kvm_lock); |
4031 | 4036 | ||
4032 | env = kzalloc(sizeof(*env), GFP_KERNEL); | 4037 | env = kzalloc(sizeof(*env), GFP_KERNEL_ACCOUNT); |
4033 | if (!env) | 4038 | if (!env) |
4034 | return; | 4039 | return; |
4035 | 4040 | ||
@@ -4045,7 +4050,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) | |||
4045 | add_uevent_var(env, "PID=%d", kvm->userspace_pid); | 4050 | add_uevent_var(env, "PID=%d", kvm->userspace_pid); |
4046 | 4051 | ||
4047 | if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) { | 4052 | if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) { |
4048 | char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL); | 4053 | char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT); |
4049 | 4054 | ||
4050 | if (p) { | 4055 | if (p) { |
4051 | tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX); | 4056 | tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX); |
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index d99850c462a1..524cbd20379f 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c | |||
@@ -219,7 +219,7 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) | |||
219 | } | 219 | } |
220 | } | 220 | } |
221 | 221 | ||
222 | kvg = kzalloc(sizeof(*kvg), GFP_KERNEL); | 222 | kvg = kzalloc(sizeof(*kvg), GFP_KERNEL_ACCOUNT); |
223 | if (!kvg) { | 223 | if (!kvg) { |
224 | mutex_unlock(&kv->lock); | 224 | mutex_unlock(&kv->lock); |
225 | kvm_vfio_group_put_external_user(vfio_group); | 225 | kvm_vfio_group_put_external_user(vfio_group); |
@@ -405,7 +405,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type) | |||
405 | if (tmp->ops == &kvm_vfio_ops) | 405 | if (tmp->ops == &kvm_vfio_ops) |
406 | return -EBUSY; | 406 | return -EBUSY; |
407 | 407 | ||
408 | kv = kzalloc(sizeof(*kv), GFP_KERNEL); | 408 | kv = kzalloc(sizeof(*kv), GFP_KERNEL_ACCOUNT); |
409 | if (!kv) | 409 | if (!kv) |
410 | return -ENOMEM; | 410 | return -ENOMEM; |
411 | 411 | ||