diff options
93 files changed, 2623 insertions, 1199 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 356156f5c52d..7de9eee73fcd 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
| @@ -45,6 +45,23 @@ the API. The only supported use is one virtual machine per process, | |||
| 45 | and one vcpu per thread. | 45 | and one vcpu per thread. |
| 46 | 46 | ||
| 47 | 47 | ||
| 48 | It is important to note that althought VM ioctls may only be issued from | ||
| 49 | the process that created the VM, a VM's lifecycle is associated with its | ||
| 50 | file descriptor, not its creator (process). In other words, the VM and | ||
| 51 | its resources, *including the associated address space*, are not freed | ||
| 52 | until the last reference to the VM's file descriptor has been released. | ||
| 53 | For example, if fork() is issued after ioctl(KVM_CREATE_VM), the VM will | ||
| 54 | not be freed until both the parent (original) process and its child have | ||
| 55 | put their references to the VM's file descriptor. | ||
| 56 | |||
| 57 | Because a VM's resources are not freed until the last reference to its | ||
| 58 | file descriptor is released, creating additional references to a VM via | ||
| 59 | via fork(), dup(), etc... without careful consideration is strongly | ||
| 60 | discouraged and may have unwanted side effects, e.g. memory allocated | ||
| 61 | by and on behalf of the VM's process may not be freed/unaccounted when | ||
| 62 | the VM is shut down. | ||
| 63 | |||
| 64 | |||
| 48 | 3. Extensions | 65 | 3. Extensions |
| 49 | ------------- | 66 | ------------- |
| 50 | 67 | ||
diff --git a/Documentation/virtual/kvm/halt-polling.txt b/Documentation/virtual/kvm/halt-polling.txt index 4a8418318769..4f791b128dd2 100644 --- a/Documentation/virtual/kvm/halt-polling.txt +++ b/Documentation/virtual/kvm/halt-polling.txt | |||
| @@ -53,7 +53,8 @@ the global max polling interval then the polling interval can be increased in | |||
| 53 | the hope that next time during the longer polling interval the wake up source | 53 | the hope that next time during the longer polling interval the wake up source |
| 54 | will be received while the host is polling and the latency benefits will be | 54 | will be received while the host is polling and the latency benefits will be |
| 55 | received. The polling interval is grown in the function grow_halt_poll_ns() and | 55 | received. The polling interval is grown in the function grow_halt_poll_ns() and |
| 56 | is multiplied by the module parameter halt_poll_ns_grow. | 56 | is multiplied by the module parameters halt_poll_ns_grow and |
| 57 | halt_poll_ns_grow_start. | ||
| 57 | 58 | ||
| 58 | In the event that the total block time was greater than the global max polling | 59 | In the event that the total block time was greater than the global max polling |
| 59 | interval then the host will never poll for long enough (limited by the global | 60 | interval then the host will never poll for long enough (limited by the global |
| @@ -80,22 +81,30 @@ shrunk. These variables are defined in include/linux/kvm_host.h and as module | |||
| 80 | parameters in virt/kvm/kvm_main.c, or arch/powerpc/kvm/book3s_hv.c in the | 81 | parameters in virt/kvm/kvm_main.c, or arch/powerpc/kvm/book3s_hv.c in the |
| 81 | powerpc kvm-hv case. | 82 | powerpc kvm-hv case. |
| 82 | 83 | ||
| 83 | Module Parameter | Description | Default Value | 84 | Module Parameter | Description | Default Value |
| 84 | -------------------------------------------------------------------------------- | 85 | -------------------------------------------------------------------------------- |
| 85 | halt_poll_ns | The global max polling interval | KVM_HALT_POLL_NS_DEFAULT | 86 | halt_poll_ns | The global max polling | KVM_HALT_POLL_NS_DEFAULT |
| 86 | | which defines the ceiling value | | 87 | | interval which defines | |
| 87 | | of the polling interval for | (per arch value) | 88 | | the ceiling value of the | |
| 88 | | each vcpu. | | 89 | | polling interval for | (per arch value) |
| 90 | | each vcpu. | | ||
| 89 | -------------------------------------------------------------------------------- | 91 | -------------------------------------------------------------------------------- |
| 90 | halt_poll_ns_grow | The value by which the halt | 2 | 92 | halt_poll_ns_grow | The value by which the | 2 |
| 91 | | polling interval is multiplied | | 93 | | halt polling interval is | |
| 92 | | in the grow_halt_poll_ns() | | 94 | | multiplied in the | |
| 93 | | function. | | 95 | | grow_halt_poll_ns() | |
| 96 | | function. | | ||
| 94 | -------------------------------------------------------------------------------- | 97 | -------------------------------------------------------------------------------- |
| 95 | halt_poll_ns_shrink | The value by which the halt | 0 | 98 | halt_poll_ns_grow_start | The initial value to grow | 10000 |
| 96 | | polling interval is divided in | | 99 | | to from zero in the | |
| 97 | | the shrink_halt_poll_ns() | | 100 | | grow_halt_poll_ns() | |
| 98 | | function. | | 101 | | function. | |
| 102 | -------------------------------------------------------------------------------- | ||
| 103 | halt_poll_ns_shrink | The value by which the | 0 | ||
| 104 | | halt polling interval is | | ||
| 105 | | divided in the | | ||
| 106 | | shrink_halt_poll_ns() | | ||
| 107 | | function. | | ||
| 99 | -------------------------------------------------------------------------------- | 108 | -------------------------------------------------------------------------------- |
| 100 | 109 | ||
| 101 | These module parameters can be set from the debugfs files in: | 110 | These module parameters can be set from the debugfs files in: |
diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt index e507a9e0421e..f365102c80f5 100644 --- a/Documentation/virtual/kvm/mmu.txt +++ b/Documentation/virtual/kvm/mmu.txt | |||
| @@ -224,10 +224,6 @@ Shadow pages contain the following information: | |||
| 224 | A bitmap indicating which sptes in spt point (directly or indirectly) at | 224 | A bitmap indicating which sptes in spt point (directly or indirectly) at |
| 225 | pages that may be unsynchronized. Used to quickly locate all unsychronized | 225 | pages that may be unsynchronized. Used to quickly locate all unsychronized |
| 226 | pages reachable from a given page. | 226 | pages reachable from a given page. |
| 227 | mmu_valid_gen: | ||
| 228 | Generation number of the page. It is compared with kvm->arch.mmu_valid_gen | ||
| 229 | during hash table lookup, and used to skip invalidated shadow pages (see | ||
| 230 | "Zapping all pages" below.) | ||
| 231 | clear_spte_count: | 227 | clear_spte_count: |
| 232 | Only present on 32-bit hosts, where a 64-bit spte cannot be written | 228 | Only present on 32-bit hosts, where a 64-bit spte cannot be written |
| 233 | atomically. The reader uses this while running out of the MMU lock | 229 | atomically. The reader uses this while running out of the MMU lock |
| @@ -402,27 +398,6 @@ causes its disallow_lpage to be incremented, thus preventing instantiation of | |||
| 402 | a large spte. The frames at the end of an unaligned memory slot have | 398 | a large spte. The frames at the end of an unaligned memory slot have |
| 403 | artificially inflated ->disallow_lpages so they can never be instantiated. | 399 | artificially inflated ->disallow_lpages so they can never be instantiated. |
| 404 | 400 | ||
| 405 | Zapping all pages (page generation count) | ||
| 406 | ========================================= | ||
| 407 | |||
| 408 | For the large memory guests, walking and zapping all pages is really slow | ||
| 409 | (because there are a lot of pages), and also blocks memory accesses of | ||
| 410 | all VCPUs because it needs to hold the MMU lock. | ||
| 411 | |||
| 412 | To make it be more scalable, kvm maintains a global generation number | ||
| 413 | which is stored in kvm->arch.mmu_valid_gen. Every shadow page stores | ||
| 414 | the current global generation-number into sp->mmu_valid_gen when it | ||
| 415 | is created. Pages with a mismatching generation number are "obsolete". | ||
| 416 | |||
| 417 | When KVM need zap all shadow pages sptes, it just simply increases the global | ||
| 418 | generation-number then reload root shadow pages on all vcpus. As the VCPUs | ||
| 419 | create new shadow page tables, the old pages are not used because of the | ||
| 420 | mismatching generation number. | ||
| 421 | |||
| 422 | KVM then walks through all pages and zaps obsolete pages. While the zap | ||
| 423 | operation needs to take the MMU lock, the lock can be released periodically | ||
| 424 | so that the VCPUs can make progress. | ||
| 425 | |||
| 426 | Fast invalidation of MMIO sptes | 401 | Fast invalidation of MMIO sptes |
| 427 | =============================== | 402 | =============================== |
| 428 | 403 | ||
| @@ -435,8 +410,7 @@ shadow pages, and is made more scalable with a similar technique. | |||
| 435 | MMIO sptes have a few spare bits, which are used to store a | 410 | MMIO sptes have a few spare bits, which are used to store a |
| 436 | generation number. The global generation number is stored in | 411 | generation number. The global generation number is stored in |
| 437 | kvm_memslots(kvm)->generation, and increased whenever guest memory info | 412 | kvm_memslots(kvm)->generation, and increased whenever guest memory info |
| 438 | changes. This generation number is distinct from the one described in | 413 | changes. |
| 439 | the previous section. | ||
| 440 | 414 | ||
| 441 | When KVM finds an MMIO spte, it checks the generation number of the spte. | 415 | When KVM finds an MMIO spte, it checks the generation number of the spte. |
| 442 | If the generation number of the spte does not equal the global generation | 416 | If the generation number of the spte does not equal the global generation |
| @@ -452,13 +426,16 @@ stored into the MMIO spte. Thus, the MMIO spte might be created based on | |||
| 452 | out-of-date information, but with an up-to-date generation number. | 426 | out-of-date information, but with an up-to-date generation number. |
| 453 | 427 | ||
| 454 | To avoid this, the generation number is incremented again after synchronize_srcu | 428 | To avoid this, the generation number is incremented again after synchronize_srcu |
| 455 | returns; thus, the low bit of kvm_memslots(kvm)->generation is only 1 during a | 429 | returns; thus, bit 63 of kvm_memslots(kvm)->generation set to 1 only during a |
| 456 | memslot update, while some SRCU readers might be using the old copy. We do not | 430 | memslot update, while some SRCU readers might be using the old copy. We do not |
| 457 | want to use an MMIO sptes created with an odd generation number, and we can do | 431 | want to use an MMIO sptes created with an odd generation number, and we can do |
| 458 | this without losing a bit in the MMIO spte. The low bit of the generation | 432 | this without losing a bit in the MMIO spte. The "update in-progress" bit of the |
| 459 | is not stored in MMIO spte, and presumed zero when it is extracted out of the | 433 | generation is not stored in MMIO spte, and is so is implicitly zero when the |
| 460 | spte. If KVM is unlucky and creates an MMIO spte while the low bit is 1, | 434 | generation is extracted out of the spte. If KVM is unlucky and creates an MMIO |
| 461 | the next access to the spte will always be a cache miss. | 435 | spte while an update is in-progress, the next access to the spte will always be |
| 436 | a cache miss. For example, a subsequent access during the update window will | ||
| 437 | miss due to the in-progress flag diverging, while an access after the update | ||
| 438 | window closes will have a higher generation number (as compared to the spte). | ||
| 462 | 439 | ||
| 463 | 440 | ||
| 464 | Further reading | 441 | Further reading |
diff --git a/MAINTAINERS b/MAINTAINERS index c009ad17ae64..e17ebf70b548 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
| @@ -8461,6 +8461,7 @@ F: include/linux/kvm* | |||
| 8461 | F: include/kvm/iodev.h | 8461 | F: include/kvm/iodev.h |
| 8462 | F: virt/kvm/* | 8462 | F: virt/kvm/* |
| 8463 | F: tools/kvm/ | 8463 | F: tools/kvm/ |
| 8464 | F: tools/testing/selftests/kvm/ | ||
| 8464 | 8465 | ||
| 8465 | KERNEL VIRTUAL MACHINE FOR AMD-V (KVM/amd) | 8466 | KERNEL VIRTUAL MACHINE FOR AMD-V (KVM/amd) |
| 8466 | M: Joerg Roedel <joro@8bytes.org> | 8467 | M: Joerg Roedel <joro@8bytes.org> |
| @@ -8470,29 +8471,25 @@ S: Maintained | |||
| 8470 | F: arch/x86/include/asm/svm.h | 8471 | F: arch/x86/include/asm/svm.h |
| 8471 | F: arch/x86/kvm/svm.c | 8472 | F: arch/x86/kvm/svm.c |
| 8472 | 8473 | ||
| 8473 | KERNEL VIRTUAL MACHINE FOR ARM (KVM/arm) | 8474 | KERNEL VIRTUAL MACHINE FOR ARM/ARM64 (KVM/arm, KVM/arm64) |
| 8474 | M: Christoffer Dall <christoffer.dall@arm.com> | 8475 | M: Christoffer Dall <christoffer.dall@arm.com> |
| 8475 | M: Marc Zyngier <marc.zyngier@arm.com> | 8476 | M: Marc Zyngier <marc.zyngier@arm.com> |
| 8477 | R: James Morse <james.morse@arm.com> | ||
| 8478 | R: Julien Thierry <julien.thierry@arm.com> | ||
| 8479 | R: Suzuki K Pouloze <suzuki.poulose@arm.com> | ||
| 8476 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) | 8480 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) |
| 8477 | L: kvmarm@lists.cs.columbia.edu | 8481 | L: kvmarm@lists.cs.columbia.edu |
| 8478 | W: http://systems.cs.columbia.edu/projects/kvm-arm | 8482 | W: http://systems.cs.columbia.edu/projects/kvm-arm |
| 8479 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git | 8483 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git |
| 8480 | S: Supported | 8484 | S: Maintained |
| 8481 | F: arch/arm/include/uapi/asm/kvm* | 8485 | F: arch/arm/include/uapi/asm/kvm* |
| 8482 | F: arch/arm/include/asm/kvm* | 8486 | F: arch/arm/include/asm/kvm* |
| 8483 | F: arch/arm/kvm/ | 8487 | F: arch/arm/kvm/ |
| 8484 | F: virt/kvm/arm/ | ||
| 8485 | F: include/kvm/arm_* | ||
| 8486 | |||
| 8487 | KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64) | ||
| 8488 | M: Christoffer Dall <christoffer.dall@arm.com> | ||
| 8489 | M: Marc Zyngier <marc.zyngier@arm.com> | ||
| 8490 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) | ||
| 8491 | L: kvmarm@lists.cs.columbia.edu | ||
| 8492 | S: Maintained | ||
| 8493 | F: arch/arm64/include/uapi/asm/kvm* | 8488 | F: arch/arm64/include/uapi/asm/kvm* |
| 8494 | F: arch/arm64/include/asm/kvm* | 8489 | F: arch/arm64/include/asm/kvm* |
| 8495 | F: arch/arm64/kvm/ | 8490 | F: arch/arm64/kvm/ |
| 8491 | F: virt/kvm/arm/ | ||
| 8492 | F: include/kvm/arm_* | ||
| 8496 | 8493 | ||
| 8497 | KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips) | 8494 | KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips) |
| 8498 | M: James Hogan <jhogan@kernel.org> | 8495 | M: James Hogan <jhogan@kernel.org> |
diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h index f6f485f4744e..d15b8c99f1b3 100644 --- a/arch/arm/include/asm/arch_gicv3.h +++ b/arch/arm/include/asm/arch_gicv3.h | |||
| @@ -55,7 +55,7 @@ | |||
| 55 | #define ICH_VTR __ACCESS_CP15(c12, 4, c11, 1) | 55 | #define ICH_VTR __ACCESS_CP15(c12, 4, c11, 1) |
| 56 | #define ICH_MISR __ACCESS_CP15(c12, 4, c11, 2) | 56 | #define ICH_MISR __ACCESS_CP15(c12, 4, c11, 2) |
| 57 | #define ICH_EISR __ACCESS_CP15(c12, 4, c11, 3) | 57 | #define ICH_EISR __ACCESS_CP15(c12, 4, c11, 3) |
| 58 | #define ICH_ELSR __ACCESS_CP15(c12, 4, c11, 5) | 58 | #define ICH_ELRSR __ACCESS_CP15(c12, 4, c11, 5) |
| 59 | #define ICH_VMCR __ACCESS_CP15(c12, 4, c11, 7) | 59 | #define ICH_VMCR __ACCESS_CP15(c12, 4, c11, 7) |
| 60 | 60 | ||
| 61 | #define __LR0(x) __ACCESS_CP15(c12, 4, c12, x) | 61 | #define __LR0(x) __ACCESS_CP15(c12, 4, c12, x) |
| @@ -152,7 +152,7 @@ CPUIF_MAP(ICH_HCR, ICH_HCR_EL2) | |||
| 152 | CPUIF_MAP(ICH_VTR, ICH_VTR_EL2) | 152 | CPUIF_MAP(ICH_VTR, ICH_VTR_EL2) |
| 153 | CPUIF_MAP(ICH_MISR, ICH_MISR_EL2) | 153 | CPUIF_MAP(ICH_MISR, ICH_MISR_EL2) |
| 154 | CPUIF_MAP(ICH_EISR, ICH_EISR_EL2) | 154 | CPUIF_MAP(ICH_EISR, ICH_EISR_EL2) |
| 155 | CPUIF_MAP(ICH_ELSR, ICH_ELSR_EL2) | 155 | CPUIF_MAP(ICH_ELRSR, ICH_ELRSR_EL2) |
| 156 | CPUIF_MAP(ICH_VMCR, ICH_VMCR_EL2) | 156 | CPUIF_MAP(ICH_VMCR, ICH_VMCR_EL2) |
| 157 | CPUIF_MAP(ICH_AP0R3, ICH_AP0R3_EL2) | 157 | CPUIF_MAP(ICH_AP0R3, ICH_AP0R3_EL2) |
| 158 | CPUIF_MAP(ICH_AP0R2, ICH_AP0R2_EL2) | 158 | CPUIF_MAP(ICH_AP0R2, ICH_AP0R2_EL2) |
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 77121b713bef..8927cae7c966 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h | |||
| @@ -265,6 +265,14 @@ static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu) | |||
| 265 | } | 265 | } |
| 266 | } | 266 | } |
| 267 | 267 | ||
| 268 | static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) | ||
| 269 | { | ||
| 270 | if (kvm_vcpu_trap_is_iabt(vcpu)) | ||
| 271 | return false; | ||
| 272 | |||
| 273 | return kvm_vcpu_dabt_iswrite(vcpu); | ||
| 274 | } | ||
| 275 | |||
| 268 | static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu) | 276 | static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu) |
| 269 | { | 277 | { |
| 270 | return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; | 278 | return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; |
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 50e89869178a..770d73257ad9 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #include <asm/kvm_asm.h> | 26 | #include <asm/kvm_asm.h> |
| 27 | #include <asm/kvm_mmio.h> | 27 | #include <asm/kvm_mmio.h> |
| 28 | #include <asm/fpstate.h> | 28 | #include <asm/fpstate.h> |
| 29 | #include <asm/smp_plat.h> | ||
| 29 | #include <kvm/arm_arch_timer.h> | 30 | #include <kvm/arm_arch_timer.h> |
| 30 | 31 | ||
| 31 | #define __KVM_HAVE_ARCH_INTC_INITIALIZED | 32 | #define __KVM_HAVE_ARCH_INTC_INITIALIZED |
| @@ -57,10 +58,13 @@ int __attribute_const__ kvm_target_cpu(void); | |||
| 57 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu); | 58 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu); |
| 58 | void kvm_reset_coprocs(struct kvm_vcpu *vcpu); | 59 | void kvm_reset_coprocs(struct kvm_vcpu *vcpu); |
| 59 | 60 | ||
| 60 | struct kvm_arch { | 61 | struct kvm_vmid { |
| 61 | /* VTTBR value associated with below pgd and vmid */ | 62 | /* The VMID generation used for the virt. memory system */ |
| 62 | u64 vttbr; | 63 | u64 vmid_gen; |
| 64 | u32 vmid; | ||
| 65 | }; | ||
| 63 | 66 | ||
| 67 | struct kvm_arch { | ||
| 64 | /* The last vcpu id that ran on each physical CPU */ | 68 | /* The last vcpu id that ran on each physical CPU */ |
| 65 | int __percpu *last_vcpu_ran; | 69 | int __percpu *last_vcpu_ran; |
| 66 | 70 | ||
| @@ -70,11 +74,11 @@ struct kvm_arch { | |||
| 70 | */ | 74 | */ |
| 71 | 75 | ||
| 72 | /* The VMID generation used for the virt. memory system */ | 76 | /* The VMID generation used for the virt. memory system */ |
| 73 | u64 vmid_gen; | 77 | struct kvm_vmid vmid; |
| 74 | u32 vmid; | ||
| 75 | 78 | ||
| 76 | /* Stage-2 page table */ | 79 | /* Stage-2 page table */ |
| 77 | pgd_t *pgd; | 80 | pgd_t *pgd; |
| 81 | phys_addr_t pgd_phys; | ||
| 78 | 82 | ||
| 79 | /* Interrupt controller */ | 83 | /* Interrupt controller */ |
| 80 | struct vgic_dist vgic; | 84 | struct vgic_dist vgic; |
| @@ -148,6 +152,13 @@ struct kvm_cpu_context { | |||
| 148 | 152 | ||
| 149 | typedef struct kvm_cpu_context kvm_cpu_context_t; | 153 | typedef struct kvm_cpu_context kvm_cpu_context_t; |
| 150 | 154 | ||
| 155 | static inline void kvm_init_host_cpu_context(kvm_cpu_context_t *cpu_ctxt, | ||
| 156 | int cpu) | ||
| 157 | { | ||
| 158 | /* The host's MPIDR is immutable, so let's set it up at boot time */ | ||
| 159 | cpu_ctxt->cp15[c0_MPIDR] = cpu_logical_map(cpu); | ||
| 160 | } | ||
| 161 | |||
| 151 | struct vcpu_reset_state { | 162 | struct vcpu_reset_state { |
| 152 | unsigned long pc; | 163 | unsigned long pc; |
| 153 | unsigned long r0; | 164 | unsigned long r0; |
| @@ -224,7 +235,35 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); | |||
| 224 | int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); | 235 | int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); |
| 225 | int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); | 236 | int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); |
| 226 | int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); | 237 | int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); |
| 227 | unsigned long kvm_call_hyp(void *hypfn, ...); | 238 | |
| 239 | unsigned long __kvm_call_hyp(void *hypfn, ...); | ||
| 240 | |||
| 241 | /* | ||
| 242 | * The has_vhe() part doesn't get emitted, but is used for type-checking. | ||
| 243 | */ | ||
| 244 | #define kvm_call_hyp(f, ...) \ | ||
| 245 | do { \ | ||
| 246 | if (has_vhe()) { \ | ||
| 247 | f(__VA_ARGS__); \ | ||
| 248 | } else { \ | ||
| 249 | __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__); \ | ||
| 250 | } \ | ||
| 251 | } while(0) | ||
| 252 | |||
| 253 | #define kvm_call_hyp_ret(f, ...) \ | ||
| 254 | ({ \ | ||
| 255 | typeof(f(__VA_ARGS__)) ret; \ | ||
| 256 | \ | ||
| 257 | if (has_vhe()) { \ | ||
| 258 | ret = f(__VA_ARGS__); \ | ||
| 259 | } else { \ | ||
| 260 | ret = __kvm_call_hyp(kvm_ksym_ref(f), \ | ||
| 261 | ##__VA_ARGS__); \ | ||
| 262 | } \ | ||
| 263 | \ | ||
| 264 | ret; \ | ||
| 265 | }) | ||
| 266 | |||
| 228 | void force_vm_exit(const cpumask_t *mask); | 267 | void force_vm_exit(const cpumask_t *mask); |
| 229 | int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, | 268 | int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, |
| 230 | struct kvm_vcpu_events *events); | 269 | struct kvm_vcpu_events *events); |
| @@ -275,7 +314,7 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, | |||
| 275 | * compliant with the PCS!). | 314 | * compliant with the PCS!). |
| 276 | */ | 315 | */ |
| 277 | 316 | ||
| 278 | kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr); | 317 | __kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr); |
| 279 | } | 318 | } |
| 280 | 319 | ||
| 281 | static inline void __cpu_init_stage2(void) | 320 | static inline void __cpu_init_stage2(void) |
diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h index e93a0cac9add..87bcd18df8d5 100644 --- a/arch/arm/include/asm/kvm_hyp.h +++ b/arch/arm/include/asm/kvm_hyp.h | |||
| @@ -40,6 +40,7 @@ | |||
| 40 | #define TTBR1 __ACCESS_CP15_64(1, c2) | 40 | #define TTBR1 __ACCESS_CP15_64(1, c2) |
| 41 | #define VTTBR __ACCESS_CP15_64(6, c2) | 41 | #define VTTBR __ACCESS_CP15_64(6, c2) |
| 42 | #define PAR __ACCESS_CP15_64(0, c7) | 42 | #define PAR __ACCESS_CP15_64(0, c7) |
| 43 | #define CNTP_CVAL __ACCESS_CP15_64(2, c14) | ||
| 43 | #define CNTV_CVAL __ACCESS_CP15_64(3, c14) | 44 | #define CNTV_CVAL __ACCESS_CP15_64(3, c14) |
| 44 | #define CNTVOFF __ACCESS_CP15_64(4, c14) | 45 | #define CNTVOFF __ACCESS_CP15_64(4, c14) |
| 45 | 46 | ||
| @@ -85,6 +86,7 @@ | |||
| 85 | #define TID_PRIV __ACCESS_CP15(c13, 0, c0, 4) | 86 | #define TID_PRIV __ACCESS_CP15(c13, 0, c0, 4) |
| 86 | #define HTPIDR __ACCESS_CP15(c13, 4, c0, 2) | 87 | #define HTPIDR __ACCESS_CP15(c13, 4, c0, 2) |
| 87 | #define CNTKCTL __ACCESS_CP15(c14, 0, c1, 0) | 88 | #define CNTKCTL __ACCESS_CP15(c14, 0, c1, 0) |
| 89 | #define CNTP_CTL __ACCESS_CP15(c14, 0, c2, 1) | ||
| 88 | #define CNTV_CTL __ACCESS_CP15(c14, 0, c3, 1) | 90 | #define CNTV_CTL __ACCESS_CP15(c14, 0, c3, 1) |
| 89 | #define CNTHCTL __ACCESS_CP15(c14, 4, c1, 0) | 91 | #define CNTHCTL __ACCESS_CP15(c14, 4, c1, 0) |
| 90 | 92 | ||
| @@ -94,6 +96,8 @@ | |||
| 94 | #define read_sysreg_el0(r) read_sysreg(r##_el0) | 96 | #define read_sysreg_el0(r) read_sysreg(r##_el0) |
| 95 | #define write_sysreg_el0(v, r) write_sysreg(v, r##_el0) | 97 | #define write_sysreg_el0(v, r) write_sysreg(v, r##_el0) |
| 96 | 98 | ||
| 99 | #define cntp_ctl_el0 CNTP_CTL | ||
| 100 | #define cntp_cval_el0 CNTP_CVAL | ||
| 97 | #define cntv_ctl_el0 CNTV_CTL | 101 | #define cntv_ctl_el0 CNTV_CTL |
| 98 | #define cntv_cval_el0 CNTV_CVAL | 102 | #define cntv_cval_el0 CNTV_CVAL |
| 99 | #define cntvoff_el2 CNTVOFF | 103 | #define cntvoff_el2 CNTVOFF |
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 3a875fc1b63c..2de96a180166 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h | |||
| @@ -421,9 +421,14 @@ static inline int hyp_map_aux_data(void) | |||
| 421 | 421 | ||
| 422 | static inline void kvm_set_ipa_limit(void) {} | 422 | static inline void kvm_set_ipa_limit(void) {} |
| 423 | 423 | ||
| 424 | static inline bool kvm_cpu_has_cnp(void) | 424 | static __always_inline u64 kvm_get_vttbr(struct kvm *kvm) |
| 425 | { | 425 | { |
| 426 | return false; | 426 | struct kvm_vmid *vmid = &kvm->arch.vmid; |
| 427 | u64 vmid_field, baddr; | ||
| 428 | |||
| 429 | baddr = kvm->arch.pgd_phys; | ||
| 430 | vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT; | ||
| 431 | return kvm_phys_to_vttbr(baddr) | vmid_field; | ||
| 427 | } | 432 | } |
| 428 | 433 | ||
| 429 | #endif /* !__ASSEMBLY__ */ | 434 | #endif /* !__ASSEMBLY__ */ |
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 48de846f2246..531e59f5be9c 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile | |||
| @@ -8,9 +8,8 @@ ifeq ($(plus_virt),+virt) | |||
| 8 | plus_virt_def := -DREQUIRES_VIRT=1 | 8 | plus_virt_def := -DREQUIRES_VIRT=1 |
| 9 | endif | 9 | endif |
| 10 | 10 | ||
| 11 | ccflags-y += -Iarch/arm/kvm -Ivirt/kvm/arm/vgic | 11 | ccflags-y += -I $(srctree)/$(src) -I $(srctree)/virt/kvm/arm/vgic |
| 12 | CFLAGS_arm.o := -I. $(plus_virt_def) | 12 | CFLAGS_arm.o := $(plus_virt_def) |
| 13 | CFLAGS_mmu.o := -I. | ||
| 14 | 13 | ||
| 15 | AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) | 14 | AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) |
| 16 | AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) | 15 | AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) |
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index e8bd288fd5be..14915c78bd99 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c | |||
| @@ -293,15 +293,16 @@ static bool access_cntp_tval(struct kvm_vcpu *vcpu, | |||
| 293 | const struct coproc_params *p, | 293 | const struct coproc_params *p, |
| 294 | const struct coproc_reg *r) | 294 | const struct coproc_reg *r) |
| 295 | { | 295 | { |
| 296 | u64 now = kvm_phys_timer_read(); | 296 | u32 val; |
| 297 | u64 val; | ||
| 298 | 297 | ||
| 299 | if (p->is_write) { | 298 | if (p->is_write) { |
| 300 | val = *vcpu_reg(vcpu, p->Rt1); | 299 | val = *vcpu_reg(vcpu, p->Rt1); |
| 301 | kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, val + now); | 300 | kvm_arm_timer_write_sysreg(vcpu, |
| 301 | TIMER_PTIMER, TIMER_REG_TVAL, val); | ||
| 302 | } else { | 302 | } else { |
| 303 | val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL); | 303 | val = kvm_arm_timer_read_sysreg(vcpu, |
| 304 | *vcpu_reg(vcpu, p->Rt1) = val - now; | 304 | TIMER_PTIMER, TIMER_REG_TVAL); |
| 305 | *vcpu_reg(vcpu, p->Rt1) = val; | ||
| 305 | } | 306 | } |
| 306 | 307 | ||
| 307 | return true; | 308 | return true; |
| @@ -315,9 +316,11 @@ static bool access_cntp_ctl(struct kvm_vcpu *vcpu, | |||
| 315 | 316 | ||
| 316 | if (p->is_write) { | 317 | if (p->is_write) { |
| 317 | val = *vcpu_reg(vcpu, p->Rt1); | 318 | val = *vcpu_reg(vcpu, p->Rt1); |
| 318 | kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CTL, val); | 319 | kvm_arm_timer_write_sysreg(vcpu, |
| 320 | TIMER_PTIMER, TIMER_REG_CTL, val); | ||
| 319 | } else { | 321 | } else { |
| 320 | val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CTL); | 322 | val = kvm_arm_timer_read_sysreg(vcpu, |
| 323 | TIMER_PTIMER, TIMER_REG_CTL); | ||
| 321 | *vcpu_reg(vcpu, p->Rt1) = val; | 324 | *vcpu_reg(vcpu, p->Rt1) = val; |
| 322 | } | 325 | } |
| 323 | 326 | ||
| @@ -333,9 +336,11 @@ static bool access_cntp_cval(struct kvm_vcpu *vcpu, | |||
| 333 | if (p->is_write) { | 336 | if (p->is_write) { |
| 334 | val = (u64)*vcpu_reg(vcpu, p->Rt2) << 32; | 337 | val = (u64)*vcpu_reg(vcpu, p->Rt2) << 32; |
| 335 | val |= *vcpu_reg(vcpu, p->Rt1); | 338 | val |= *vcpu_reg(vcpu, p->Rt1); |
| 336 | kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, val); | 339 | kvm_arm_timer_write_sysreg(vcpu, |
| 340 | TIMER_PTIMER, TIMER_REG_CVAL, val); | ||
| 337 | } else { | 341 | } else { |
| 338 | val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL); | 342 | val = kvm_arm_timer_read_sysreg(vcpu, |
| 343 | TIMER_PTIMER, TIMER_REG_CVAL); | ||
| 339 | *vcpu_reg(vcpu, p->Rt1) = val; | 344 | *vcpu_reg(vcpu, p->Rt1) = val; |
| 340 | *vcpu_reg(vcpu, p->Rt2) = val >> 32; | 345 | *vcpu_reg(vcpu, p->Rt2) = val >> 32; |
| 341 | } | 346 | } |
diff --git a/arch/arm/kvm/hyp/cp15-sr.c b/arch/arm/kvm/hyp/cp15-sr.c index c4782812714c..8bf895ec6e04 100644 --- a/arch/arm/kvm/hyp/cp15-sr.c +++ b/arch/arm/kvm/hyp/cp15-sr.c | |||
| @@ -27,7 +27,6 @@ static u64 *cp15_64(struct kvm_cpu_context *ctxt, int idx) | |||
| 27 | 27 | ||
| 28 | void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) | 28 | void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) |
| 29 | { | 29 | { |
| 30 | ctxt->cp15[c0_MPIDR] = read_sysreg(VMPIDR); | ||
| 31 | ctxt->cp15[c0_CSSELR] = read_sysreg(CSSELR); | 30 | ctxt->cp15[c0_CSSELR] = read_sysreg(CSSELR); |
| 32 | ctxt->cp15[c1_SCTLR] = read_sysreg(SCTLR); | 31 | ctxt->cp15[c1_SCTLR] = read_sysreg(SCTLR); |
| 33 | ctxt->cp15[c1_CPACR] = read_sysreg(CPACR); | 32 | ctxt->cp15[c1_CPACR] = read_sysreg(CPACR); |
diff --git a/arch/arm/kvm/hyp/hyp-entry.S b/arch/arm/kvm/hyp/hyp-entry.S index aa3f9a9837ac..6ed3cf23fe89 100644 --- a/arch/arm/kvm/hyp/hyp-entry.S +++ b/arch/arm/kvm/hyp/hyp-entry.S | |||
| @@ -176,7 +176,7 @@ THUMB( orr lr, lr, #PSR_T_BIT ) | |||
| 176 | msr spsr_cxsf, lr | 176 | msr spsr_cxsf, lr |
| 177 | ldr lr, =panic | 177 | ldr lr, =panic |
| 178 | msr ELR_hyp, lr | 178 | msr ELR_hyp, lr |
| 179 | ldr lr, =kvm_call_hyp | 179 | ldr lr, =__kvm_call_hyp |
| 180 | clrex | 180 | clrex |
| 181 | eret | 181 | eret |
| 182 | ENDPROC(__hyp_do_panic) | 182 | ENDPROC(__hyp_do_panic) |
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c index acf1c37fa49c..3b058a5d7c5f 100644 --- a/arch/arm/kvm/hyp/switch.c +++ b/arch/arm/kvm/hyp/switch.c | |||
| @@ -77,7 +77,7 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) | |||
| 77 | static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu) | 77 | static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu) |
| 78 | { | 78 | { |
| 79 | struct kvm *kvm = kern_hyp_va(vcpu->kvm); | 79 | struct kvm *kvm = kern_hyp_va(vcpu->kvm); |
| 80 | write_sysreg(kvm->arch.vttbr, VTTBR); | 80 | write_sysreg(kvm_get_vttbr(kvm), VTTBR); |
| 81 | write_sysreg(vcpu->arch.midr, VPIDR); | 81 | write_sysreg(vcpu->arch.midr, VPIDR); |
| 82 | } | 82 | } |
| 83 | 83 | ||
diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c index c0edd450e104..8e4afba73635 100644 --- a/arch/arm/kvm/hyp/tlb.c +++ b/arch/arm/kvm/hyp/tlb.c | |||
| @@ -41,7 +41,7 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) | |||
| 41 | 41 | ||
| 42 | /* Switch to requested VMID */ | 42 | /* Switch to requested VMID */ |
| 43 | kvm = kern_hyp_va(kvm); | 43 | kvm = kern_hyp_va(kvm); |
| 44 | write_sysreg(kvm->arch.vttbr, VTTBR); | 44 | write_sysreg(kvm_get_vttbr(kvm), VTTBR); |
| 45 | isb(); | 45 | isb(); |
| 46 | 46 | ||
| 47 | write_sysreg(0, TLBIALLIS); | 47 | write_sysreg(0, TLBIALLIS); |
| @@ -61,7 +61,7 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) | |||
| 61 | struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); | 61 | struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); |
| 62 | 62 | ||
| 63 | /* Switch to requested VMID */ | 63 | /* Switch to requested VMID */ |
| 64 | write_sysreg(kvm->arch.vttbr, VTTBR); | 64 | write_sysreg(kvm_get_vttbr(kvm), VTTBR); |
| 65 | isb(); | 65 | isb(); |
| 66 | 66 | ||
| 67 | write_sysreg(0, TLBIALL); | 67 | write_sysreg(0, TLBIALL); |
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 80a1d6cd261c..a08e6419ebe9 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S | |||
| @@ -42,7 +42,7 @@ | |||
| 42 | * r12: caller save | 42 | * r12: caller save |
| 43 | * rest: callee save | 43 | * rest: callee save |
| 44 | */ | 44 | */ |
| 45 | ENTRY(kvm_call_hyp) | 45 | ENTRY(__kvm_call_hyp) |
| 46 | hvc #0 | 46 | hvc #0 |
| 47 | bx lr | 47 | bx lr |
| 48 | ENDPROC(kvm_call_hyp) | 48 | ENDPROC(__kvm_call_hyp) |
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 506386a3edde..d3842791e1c4 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h | |||
| @@ -77,6 +77,10 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) | |||
| 77 | */ | 77 | */ |
| 78 | if (!vcpu_el1_is_32bit(vcpu)) | 78 | if (!vcpu_el1_is_32bit(vcpu)) |
| 79 | vcpu->arch.hcr_el2 |= HCR_TID3; | 79 | vcpu->arch.hcr_el2 |= HCR_TID3; |
| 80 | |||
| 81 | if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) || | ||
| 82 | vcpu_el1_is_32bit(vcpu)) | ||
| 83 | vcpu->arch.hcr_el2 |= HCR_TID2; | ||
| 80 | } | 84 | } |
| 81 | 85 | ||
| 82 | static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) | 86 | static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) |
| @@ -331,6 +335,14 @@ static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) | |||
| 331 | return ESR_ELx_SYS64_ISS_RT(esr); | 335 | return ESR_ELx_SYS64_ISS_RT(esr); |
| 332 | } | 336 | } |
| 333 | 337 | ||
| 338 | static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) | ||
| 339 | { | ||
| 340 | if (kvm_vcpu_trap_is_iabt(vcpu)) | ||
| 341 | return false; | ||
| 342 | |||
| 343 | return kvm_vcpu_dabt_iswrite(vcpu); | ||
| 344 | } | ||
| 345 | |||
| 334 | static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) | 346 | static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) |
| 335 | { | 347 | { |
| 336 | return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; | 348 | return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; |
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 222af1d2c3e4..a01fe087e022 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include <asm/kvm.h> | 31 | #include <asm/kvm.h> |
| 32 | #include <asm/kvm_asm.h> | 32 | #include <asm/kvm_asm.h> |
| 33 | #include <asm/kvm_mmio.h> | 33 | #include <asm/kvm_mmio.h> |
| 34 | #include <asm/smp_plat.h> | ||
| 34 | #include <asm/thread_info.h> | 35 | #include <asm/thread_info.h> |
| 35 | 36 | ||
| 36 | #define __KVM_HAVE_ARCH_INTC_INITIALIZED | 37 | #define __KVM_HAVE_ARCH_INTC_INITIALIZED |
| @@ -58,16 +59,19 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu); | |||
| 58 | int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext); | 59 | int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext); |
| 59 | void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); | 60 | void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); |
| 60 | 61 | ||
| 61 | struct kvm_arch { | 62 | struct kvm_vmid { |
| 62 | /* The VMID generation used for the virt. memory system */ | 63 | /* The VMID generation used for the virt. memory system */ |
| 63 | u64 vmid_gen; | 64 | u64 vmid_gen; |
| 64 | u32 vmid; | 65 | u32 vmid; |
| 66 | }; | ||
| 67 | |||
| 68 | struct kvm_arch { | ||
| 69 | struct kvm_vmid vmid; | ||
| 65 | 70 | ||
| 66 | /* stage2 entry level table */ | 71 | /* stage2 entry level table */ |
| 67 | pgd_t *pgd; | 72 | pgd_t *pgd; |
| 73 | phys_addr_t pgd_phys; | ||
| 68 | 74 | ||
| 69 | /* VTTBR value associated with above pgd and vmid */ | ||
| 70 | u64 vttbr; | ||
| 71 | /* VTCR_EL2 value for this VM */ | 75 | /* VTCR_EL2 value for this VM */ |
| 72 | u64 vtcr; | 76 | u64 vtcr; |
| 73 | 77 | ||
| @@ -382,7 +386,36 @@ void kvm_arm_halt_guest(struct kvm *kvm); | |||
| 382 | void kvm_arm_resume_guest(struct kvm *kvm); | 386 | void kvm_arm_resume_guest(struct kvm *kvm); |
| 383 | 387 | ||
| 384 | u64 __kvm_call_hyp(void *hypfn, ...); | 388 | u64 __kvm_call_hyp(void *hypfn, ...); |
| 385 | #define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) | 389 | |
| 390 | /* | ||
| 391 | * The couple of isb() below are there to guarantee the same behaviour | ||
| 392 | * on VHE as on !VHE, where the eret to EL1 acts as a context | ||
| 393 | * synchronization event. | ||
| 394 | */ | ||
| 395 | #define kvm_call_hyp(f, ...) \ | ||
| 396 | do { \ | ||
| 397 | if (has_vhe()) { \ | ||
| 398 | f(__VA_ARGS__); \ | ||
| 399 | isb(); \ | ||
| 400 | } else { \ | ||
| 401 | __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__); \ | ||
| 402 | } \ | ||
| 403 | } while(0) | ||
| 404 | |||
| 405 | #define kvm_call_hyp_ret(f, ...) \ | ||
| 406 | ({ \ | ||
| 407 | typeof(f(__VA_ARGS__)) ret; \ | ||
| 408 | \ | ||
| 409 | if (has_vhe()) { \ | ||
| 410 | ret = f(__VA_ARGS__); \ | ||
| 411 | isb(); \ | ||
| 412 | } else { \ | ||
| 413 | ret = __kvm_call_hyp(kvm_ksym_ref(f), \ | ||
| 414 | ##__VA_ARGS__); \ | ||
| 415 | } \ | ||
| 416 | \ | ||
| 417 | ret; \ | ||
| 418 | }) | ||
| 386 | 419 | ||
| 387 | void force_vm_exit(const cpumask_t *mask); | 420 | void force_vm_exit(const cpumask_t *mask); |
| 388 | void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); | 421 | void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); |
| @@ -401,6 +434,13 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); | |||
| 401 | 434 | ||
| 402 | DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); | 435 | DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); |
| 403 | 436 | ||
| 437 | static inline void kvm_init_host_cpu_context(kvm_cpu_context_t *cpu_ctxt, | ||
| 438 | int cpu) | ||
| 439 | { | ||
| 440 | /* The host's MPIDR is immutable, so let's set it up at boot time */ | ||
| 441 | cpu_ctxt->sys_regs[MPIDR_EL1] = cpu_logical_map(cpu); | ||
| 442 | } | ||
| 443 | |||
| 404 | void __kvm_enable_ssbs(void); | 444 | void __kvm_enable_ssbs(void); |
| 405 | 445 | ||
| 406 | static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, | 446 | static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, |
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index a80a7ef57325..4da765f2cca5 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include <linux/compiler.h> | 21 | #include <linux/compiler.h> |
| 22 | #include <linux/kvm_host.h> | 22 | #include <linux/kvm_host.h> |
| 23 | #include <asm/alternative.h> | 23 | #include <asm/alternative.h> |
| 24 | #include <asm/kvm_mmu.h> | ||
| 24 | #include <asm/sysreg.h> | 25 | #include <asm/sysreg.h> |
| 25 | 26 | ||
| 26 | #define __hyp_text __section(.hyp.text) notrace | 27 | #define __hyp_text __section(.hyp.text) notrace |
| @@ -163,7 +164,7 @@ void __noreturn __hyp_do_panic(unsigned long, ...); | |||
| 163 | static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm) | 164 | static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm) |
| 164 | { | 165 | { |
| 165 | write_sysreg(kvm->arch.vtcr, vtcr_el2); | 166 | write_sysreg(kvm->arch.vtcr, vtcr_el2); |
| 166 | write_sysreg(kvm->arch.vttbr, vttbr_el2); | 167 | write_sysreg(kvm_get_vttbr(kvm), vttbr_el2); |
| 167 | 168 | ||
| 168 | /* | 169 | /* |
| 169 | * ARM erratum 1165522 requires the actual execution of the above | 170 | * ARM erratum 1165522 requires the actual execution of the above |
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 8af4b1befa42..b0742a16c6c9 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h | |||
| @@ -138,7 +138,8 @@ static inline unsigned long __kern_hyp_va(unsigned long v) | |||
| 138 | }) | 138 | }) |
| 139 | 139 | ||
| 140 | /* | 140 | /* |
| 141 | * We currently only support a 40bit IPA. | 141 | * We currently support using a VM-specified IPA size. For backward |
| 142 | * compatibility, the default IPA size is fixed to 40bits. | ||
| 142 | */ | 143 | */ |
| 143 | #define KVM_PHYS_SHIFT (40) | 144 | #define KVM_PHYS_SHIFT (40) |
| 144 | 145 | ||
| @@ -591,9 +592,15 @@ static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm) | |||
| 591 | return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm)); | 592 | return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm)); |
| 592 | } | 593 | } |
| 593 | 594 | ||
| 594 | static inline bool kvm_cpu_has_cnp(void) | 595 | static __always_inline u64 kvm_get_vttbr(struct kvm *kvm) |
| 595 | { | 596 | { |
| 596 | return system_supports_cnp(); | 597 | struct kvm_vmid *vmid = &kvm->arch.vmid; |
| 598 | u64 vmid_field, baddr; | ||
| 599 | u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0; | ||
| 600 | |||
| 601 | baddr = kvm->arch.pgd_phys; | ||
| 602 | vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT; | ||
| 603 | return kvm_phys_to_vttbr(baddr) | vmid_field | cnp; | ||
| 597 | } | 604 | } |
| 598 | 605 | ||
| 599 | #endif /* __ASSEMBLY__ */ | 606 | #endif /* __ASSEMBLY__ */ |
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 72dc4c011014..5b267dec6194 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h | |||
| @@ -361,6 +361,7 @@ | |||
| 361 | 361 | ||
| 362 | #define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) | 362 | #define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) |
| 363 | 363 | ||
| 364 | #define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) | ||
| 364 | #define SYS_CLIDR_EL1 sys_reg(3, 1, 0, 0, 1) | 365 | #define SYS_CLIDR_EL1 sys_reg(3, 1, 0, 0, 1) |
| 365 | #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) | 366 | #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) |
| 366 | 367 | ||
| @@ -392,6 +393,10 @@ | |||
| 392 | #define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1) | 393 | #define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1) |
| 393 | #define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2) | 394 | #define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2) |
| 394 | 395 | ||
| 396 | #define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0) | ||
| 397 | #define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1) | ||
| 398 | #define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0) | ||
| 399 | |||
| 395 | #define __PMEV_op2(n) ((n) & 0x7) | 400 | #define __PMEV_op2(n) ((n) & 0x7) |
| 396 | #define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3)) | 401 | #define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3)) |
| 397 | #define SYS_PMEVCNTRn_EL0(n) sys_reg(3, 3, 14, __CNTR_CRm(n), __PMEV_op2(n)) | 402 | #define SYS_PMEVCNTRn_EL0(n) sys_reg(3, 3, 14, __CNTR_CRm(n), __PMEV_op2(n)) |
| @@ -426,7 +431,7 @@ | |||
| 426 | #define SYS_ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1) | 431 | #define SYS_ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1) |
| 427 | #define SYS_ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2) | 432 | #define SYS_ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2) |
| 428 | #define SYS_ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3) | 433 | #define SYS_ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3) |
| 429 | #define SYS_ICH_ELSR_EL2 sys_reg(3, 4, 12, 11, 5) | 434 | #define SYS_ICH_ELRSR_EL2 sys_reg(3, 4, 12, 11, 5) |
| 430 | #define SYS_ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7) | 435 | #define SYS_ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7) |
| 431 | 436 | ||
| 432 | #define __SYS__LR0_EL2(x) sys_reg(3, 4, 12, 12, x) | 437 | #define __SYS__LR0_EL2(x) sys_reg(3, 4, 12, 12, x) |
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 0f2a135ba15b..690e033a91c0 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile | |||
| @@ -3,9 +3,7 @@ | |||
| 3 | # Makefile for Kernel-based Virtual Machine module | 3 | # Makefile for Kernel-based Virtual Machine module |
| 4 | # | 4 | # |
| 5 | 5 | ||
| 6 | ccflags-y += -Iarch/arm64/kvm -Ivirt/kvm/arm/vgic | 6 | ccflags-y += -I $(srctree)/$(src) -I $(srctree)/virt/kvm/arm/vgic |
| 7 | CFLAGS_arm.o := -I. | ||
| 8 | CFLAGS_mmu.o := -I. | ||
| 9 | 7 | ||
| 10 | KVM=../../../virt/kvm | 8 | KVM=../../../virt/kvm |
| 11 | 9 | ||
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index f39801e4136c..fd917d6d12af 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c | |||
| @@ -76,7 +76,7 @@ static void restore_guest_debug_regs(struct kvm_vcpu *vcpu) | |||
| 76 | 76 | ||
| 77 | void kvm_arm_init_debug(void) | 77 | void kvm_arm_init_debug(void) |
| 78 | { | 78 | { |
| 79 | __this_cpu_write(mdcr_el2, kvm_call_hyp(__kvm_get_mdcr_el2)); | 79 | __this_cpu_write(mdcr_el2, kvm_call_hyp_ret(__kvm_get_mdcr_el2)); |
| 80 | } | 80 | } |
| 81 | 81 | ||
| 82 | /** | 82 | /** |
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 952f6cb9cf72..2845aa680841 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S | |||
| @@ -40,9 +40,6 @@ | |||
| 40 | * arch/arm64/kernel/hyp_stub.S. | 40 | * arch/arm64/kernel/hyp_stub.S. |
| 41 | */ | 41 | */ |
| 42 | ENTRY(__kvm_call_hyp) | 42 | ENTRY(__kvm_call_hyp) |
| 43 | alternative_if_not ARM64_HAS_VIRT_HOST_EXTN | ||
| 44 | hvc #0 | 43 | hvc #0 |
| 45 | ret | 44 | ret |
| 46 | alternative_else_nop_endif | ||
| 47 | b __vhe_hyp_call | ||
| 48 | ENDPROC(__kvm_call_hyp) | 45 | ENDPROC(__kvm_call_hyp) |
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 73c1b483ec39..2b1e686772bf 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S | |||
| @@ -43,18 +43,6 @@ | |||
| 43 | ldr lr, [sp], #16 | 43 | ldr lr, [sp], #16 |
| 44 | .endm | 44 | .endm |
| 45 | 45 | ||
| 46 | ENTRY(__vhe_hyp_call) | ||
| 47 | do_el2_call | ||
| 48 | /* | ||
| 49 | * We used to rely on having an exception return to get | ||
| 50 | * an implicit isb. In the E2H case, we don't have it anymore. | ||
| 51 | * rather than changing all the leaf functions, just do it here | ||
| 52 | * before returning to the rest of the kernel. | ||
| 53 | */ | ||
| 54 | isb | ||
| 55 | ret | ||
| 56 | ENDPROC(__vhe_hyp_call) | ||
| 57 | |||
| 58 | el1_sync: // Guest trapped into EL2 | 46 | el1_sync: // Guest trapped into EL2 |
| 59 | 47 | ||
| 60 | mrs x0, esr_el2 | 48 | mrs x0, esr_el2 |
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index b426e2cf973c..c52a8451637c 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c | |||
| @@ -53,7 +53,6 @@ static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt) | |||
| 53 | 53 | ||
| 54 | static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) | 54 | static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) |
| 55 | { | 55 | { |
| 56 | ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2); | ||
| 57 | ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); | 56 | ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); |
| 58 | ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr); | 57 | ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr); |
| 59 | ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); | 58 | ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); |
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c936aa40c3f4..539feecda5b8 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c | |||
| @@ -982,6 +982,10 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | |||
| 982 | return true; | 982 | return true; |
| 983 | } | 983 | } |
| 984 | 984 | ||
| 985 | #define reg_to_encoding(x) \ | ||
| 986 | sys_reg((u32)(x)->Op0, (u32)(x)->Op1, \ | ||
| 987 | (u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2); | ||
| 988 | |||
| 985 | /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ | 989 | /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ |
| 986 | #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ | 990 | #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ |
| 987 | { SYS_DESC(SYS_DBGBVRn_EL1(n)), \ | 991 | { SYS_DESC(SYS_DBGBVRn_EL1(n)), \ |
| @@ -1003,44 +1007,38 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | |||
| 1003 | { SYS_DESC(SYS_PMEVTYPERn_EL0(n)), \ | 1007 | { SYS_DESC(SYS_PMEVTYPERn_EL0(n)), \ |
| 1004 | access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), } | 1008 | access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), } |
| 1005 | 1009 | ||
| 1006 | static bool access_cntp_tval(struct kvm_vcpu *vcpu, | 1010 | static bool access_arch_timer(struct kvm_vcpu *vcpu, |
| 1007 | struct sys_reg_params *p, | 1011 | struct sys_reg_params *p, |
| 1008 | const struct sys_reg_desc *r) | 1012 | const struct sys_reg_desc *r) |
| 1009 | { | 1013 | { |
| 1010 | u64 now = kvm_phys_timer_read(); | 1014 | enum kvm_arch_timers tmr; |
| 1011 | u64 cval; | 1015 | enum kvm_arch_timer_regs treg; |
| 1016 | u64 reg = reg_to_encoding(r); | ||
| 1012 | 1017 | ||
| 1013 | if (p->is_write) { | 1018 | switch (reg) { |
| 1014 | kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, | 1019 | case SYS_CNTP_TVAL_EL0: |
| 1015 | p->regval + now); | 1020 | case SYS_AARCH32_CNTP_TVAL: |
| 1016 | } else { | 1021 | tmr = TIMER_PTIMER; |
| 1017 | cval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL); | 1022 | treg = TIMER_REG_TVAL; |
| 1018 | p->regval = cval - now; | 1023 | break; |
| 1024 | case SYS_CNTP_CTL_EL0: | ||
| 1025 | case SYS_AARCH32_CNTP_CTL: | ||
| 1026 | tmr = TIMER_PTIMER; | ||
| 1027 | treg = TIMER_REG_CTL; | ||
| 1028 | break; | ||
| 1029 | case SYS_CNTP_CVAL_EL0: | ||
| 1030 | case SYS_AARCH32_CNTP_CVAL: | ||
| 1031 | tmr = TIMER_PTIMER; | ||
| 1032 | treg = TIMER_REG_CVAL; | ||
| 1033 | break; | ||
| 1034 | default: | ||
| 1035 | BUG(); | ||
| 1019 | } | 1036 | } |
| 1020 | 1037 | ||
| 1021 | return true; | ||
| 1022 | } | ||
| 1023 | |||
| 1024 | static bool access_cntp_ctl(struct kvm_vcpu *vcpu, | ||
| 1025 | struct sys_reg_params *p, | ||
| 1026 | const struct sys_reg_desc *r) | ||
| 1027 | { | ||
| 1028 | if (p->is_write) | ||
| 1029 | kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CTL, p->regval); | ||
| 1030 | else | ||
| 1031 | p->regval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CTL); | ||
| 1032 | |||
| 1033 | return true; | ||
| 1034 | } | ||
| 1035 | |||
| 1036 | static bool access_cntp_cval(struct kvm_vcpu *vcpu, | ||
| 1037 | struct sys_reg_params *p, | ||
| 1038 | const struct sys_reg_desc *r) | ||
| 1039 | { | ||
| 1040 | if (p->is_write) | 1038 | if (p->is_write) |
| 1041 | kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, p->regval); | 1039 | kvm_arm_timer_write_sysreg(vcpu, tmr, treg, p->regval); |
| 1042 | else | 1040 | else |
| 1043 | p->regval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL); | 1041 | p->regval = kvm_arm_timer_read_sysreg(vcpu, tmr, treg); |
| 1044 | 1042 | ||
| 1045 | return true; | 1043 | return true; |
| 1046 | } | 1044 | } |
| @@ -1160,6 +1158,64 @@ static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, | |||
| 1160 | return __set_id_reg(rd, uaddr, true); | 1158 | return __set_id_reg(rd, uaddr, true); |
| 1161 | } | 1159 | } |
| 1162 | 1160 | ||
| 1161 | static bool access_ctr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
| 1162 | const struct sys_reg_desc *r) | ||
| 1163 | { | ||
| 1164 | if (p->is_write) | ||
| 1165 | return write_to_read_only(vcpu, p, r); | ||
| 1166 | |||
| 1167 | p->regval = read_sanitised_ftr_reg(SYS_CTR_EL0); | ||
| 1168 | return true; | ||
| 1169 | } | ||
| 1170 | |||
| 1171 | static bool access_clidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
| 1172 | const struct sys_reg_desc *r) | ||
| 1173 | { | ||
| 1174 | if (p->is_write) | ||
| 1175 | return write_to_read_only(vcpu, p, r); | ||
| 1176 | |||
| 1177 | p->regval = read_sysreg(clidr_el1); | ||
| 1178 | return true; | ||
| 1179 | } | ||
| 1180 | |||
| 1181 | static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
| 1182 | const struct sys_reg_desc *r) | ||
| 1183 | { | ||
| 1184 | if (p->is_write) | ||
| 1185 | vcpu_write_sys_reg(vcpu, p->regval, r->reg); | ||
| 1186 | else | ||
| 1187 | p->regval = vcpu_read_sys_reg(vcpu, r->reg); | ||
| 1188 | return true; | ||
| 1189 | } | ||
| 1190 | |||
| 1191 | static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | ||
| 1192 | const struct sys_reg_desc *r) | ||
| 1193 | { | ||
| 1194 | u32 csselr; | ||
| 1195 | |||
| 1196 | if (p->is_write) | ||
| 1197 | return write_to_read_only(vcpu, p, r); | ||
| 1198 | |||
| 1199 | csselr = vcpu_read_sys_reg(vcpu, CSSELR_EL1); | ||
| 1200 | p->regval = get_ccsidr(csselr); | ||
| 1201 | |||
| 1202 | /* | ||
| 1203 | * Guests should not be doing cache operations by set/way at all, and | ||
| 1204 | * for this reason, we trap them and attempt to infer the intent, so | ||
| 1205 | * that we can flush the entire guest's address space at the appropriate | ||
| 1206 | * time. | ||
| 1207 | * To prevent this trapping from causing performance problems, let's | ||
| 1208 | * expose the geometry of all data and unified caches (which are | ||
| 1209 | * guaranteed to be PIPT and thus non-aliasing) as 1 set and 1 way. | ||
| 1210 | * [If guests should attempt to infer aliasing properties from the | ||
| 1211 | * geometry (which is not permitted by the architecture), they would | ||
| 1212 | * only do so for virtually indexed caches.] | ||
| 1213 | */ | ||
| 1214 | if (!(csselr & 1)) // data or unified cache | ||
| 1215 | p->regval &= ~GENMASK(27, 3); | ||
| 1216 | return true; | ||
| 1217 | } | ||
| 1218 | |||
| 1163 | /* sys_reg_desc initialiser for known cpufeature ID registers */ | 1219 | /* sys_reg_desc initialiser for known cpufeature ID registers */ |
| 1164 | #define ID_SANITISED(name) { \ | 1220 | #define ID_SANITISED(name) { \ |
| 1165 | SYS_DESC(SYS_##name), \ | 1221 | SYS_DESC(SYS_##name), \ |
| @@ -1377,7 +1433,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { | |||
| 1377 | 1433 | ||
| 1378 | { SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0}, | 1434 | { SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0}, |
| 1379 | 1435 | ||
| 1380 | { SYS_DESC(SYS_CSSELR_EL1), NULL, reset_unknown, CSSELR_EL1 }, | 1436 | { SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr }, |
| 1437 | { SYS_DESC(SYS_CLIDR_EL1), access_clidr }, | ||
| 1438 | { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, | ||
| 1439 | { SYS_DESC(SYS_CTR_EL0), access_ctr }, | ||
| 1381 | 1440 | ||
| 1382 | { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, }, | 1441 | { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, }, |
| 1383 | { SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, | 1442 | { SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, |
| @@ -1400,9 +1459,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { | |||
| 1400 | { SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 }, | 1459 | { SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 }, |
| 1401 | { SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 }, | 1460 | { SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 }, |
| 1402 | 1461 | ||
| 1403 | { SYS_DESC(SYS_CNTP_TVAL_EL0), access_cntp_tval }, | 1462 | { SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer }, |
| 1404 | { SYS_DESC(SYS_CNTP_CTL_EL0), access_cntp_ctl }, | 1463 | { SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer }, |
| 1405 | { SYS_DESC(SYS_CNTP_CVAL_EL0), access_cntp_cval }, | 1464 | { SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer }, |
| 1406 | 1465 | ||
| 1407 | /* PMEVCNTRn_EL0 */ | 1466 | /* PMEVCNTRn_EL0 */ |
| 1408 | PMU_PMEVCNTR_EL0(0), | 1467 | PMU_PMEVCNTR_EL0(0), |
| @@ -1476,7 +1535,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { | |||
| 1476 | 1535 | ||
| 1477 | { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 }, | 1536 | { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 }, |
| 1478 | { SYS_DESC(SYS_IFSR32_EL2), NULL, reset_unknown, IFSR32_EL2 }, | 1537 | { SYS_DESC(SYS_IFSR32_EL2), NULL, reset_unknown, IFSR32_EL2 }, |
| 1479 | { SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x70 }, | 1538 | { SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x700 }, |
| 1480 | }; | 1539 | }; |
| 1481 | 1540 | ||
| 1482 | static bool trap_dbgidr(struct kvm_vcpu *vcpu, | 1541 | static bool trap_dbgidr(struct kvm_vcpu *vcpu, |
| @@ -1677,6 +1736,7 @@ static const struct sys_reg_desc cp14_64_regs[] = { | |||
| 1677 | * register). | 1736 | * register). |
| 1678 | */ | 1737 | */ |
| 1679 | static const struct sys_reg_desc cp15_regs[] = { | 1738 | static const struct sys_reg_desc cp15_regs[] = { |
| 1739 | { Op1( 0), CRn( 0), CRm( 0), Op2( 1), access_ctr }, | ||
| 1680 | { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR }, | 1740 | { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR }, |
| 1681 | { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, | 1741 | { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, |
| 1682 | { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, | 1742 | { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, |
| @@ -1723,10 +1783,9 @@ static const struct sys_reg_desc cp15_regs[] = { | |||
| 1723 | 1783 | ||
| 1724 | { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, | 1784 | { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, |
| 1725 | 1785 | ||
| 1726 | /* CNTP_TVAL */ | 1786 | /* Arch Tmers */ |
| 1727 | { Op1( 0), CRn(14), CRm( 2), Op2( 0), access_cntp_tval }, | 1787 | { SYS_DESC(SYS_AARCH32_CNTP_TVAL), access_arch_timer }, |
| 1728 | /* CNTP_CTL */ | 1788 | { SYS_DESC(SYS_AARCH32_CNTP_CTL), access_arch_timer }, |
| 1729 | { Op1( 0), CRn(14), CRm( 2), Op2( 1), access_cntp_ctl }, | ||
| 1730 | 1789 | ||
| 1731 | /* PMEVCNTRn */ | 1790 | /* PMEVCNTRn */ |
| 1732 | PMU_PMEVCNTR(0), | 1791 | PMU_PMEVCNTR(0), |
| @@ -1794,6 +1853,10 @@ static const struct sys_reg_desc cp15_regs[] = { | |||
| 1794 | PMU_PMEVTYPER(30), | 1853 | PMU_PMEVTYPER(30), |
| 1795 | /* PMCCFILTR */ | 1854 | /* PMCCFILTR */ |
| 1796 | { Op1(0), CRn(14), CRm(15), Op2(7), access_pmu_evtyper }, | 1855 | { Op1(0), CRn(14), CRm(15), Op2(7), access_pmu_evtyper }, |
| 1856 | |||
| 1857 | { Op1(1), CRn( 0), CRm( 0), Op2(0), access_ccsidr }, | ||
| 1858 | { Op1(1), CRn( 0), CRm( 0), Op2(1), access_clidr }, | ||
| 1859 | { Op1(2), CRn( 0), CRm( 0), Op2(0), access_csselr, NULL, c0_CSSELR }, | ||
| 1797 | }; | 1860 | }; |
| 1798 | 1861 | ||
| 1799 | static const struct sys_reg_desc cp15_64_regs[] = { | 1862 | static const struct sys_reg_desc cp15_64_regs[] = { |
| @@ -1803,7 +1866,7 @@ static const struct sys_reg_desc cp15_64_regs[] = { | |||
| 1803 | { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, | 1866 | { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, |
| 1804 | { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */ | 1867 | { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */ |
| 1805 | { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */ | 1868 | { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */ |
| 1806 | { Op1( 2), CRn( 0), CRm(14), Op2( 0), access_cntp_cval }, | 1869 | { SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer }, |
| 1807 | }; | 1870 | }; |
| 1808 | 1871 | ||
| 1809 | /* Target specific emulation tables */ | 1872 | /* Target specific emulation tables */ |
| @@ -1832,30 +1895,19 @@ static const struct sys_reg_desc *get_target_table(unsigned target, | |||
| 1832 | } | 1895 | } |
| 1833 | } | 1896 | } |
| 1834 | 1897 | ||
| 1835 | #define reg_to_match_value(x) \ | ||
| 1836 | ({ \ | ||
| 1837 | unsigned long val; \ | ||
| 1838 | val = (x)->Op0 << 14; \ | ||
| 1839 | val |= (x)->Op1 << 11; \ | ||
| 1840 | val |= (x)->CRn << 7; \ | ||
| 1841 | val |= (x)->CRm << 3; \ | ||
| 1842 | val |= (x)->Op2; \ | ||
| 1843 | val; \ | ||
| 1844 | }) | ||
| 1845 | |||
| 1846 | static int match_sys_reg(const void *key, const void *elt) | 1898 | static int match_sys_reg(const void *key, const void *elt) |
| 1847 | { | 1899 | { |
| 1848 | const unsigned long pval = (unsigned long)key; | 1900 | const unsigned long pval = (unsigned long)key; |
| 1849 | const struct sys_reg_desc *r = elt; | 1901 | const struct sys_reg_desc *r = elt; |
| 1850 | 1902 | ||
| 1851 | return pval - reg_to_match_value(r); | 1903 | return pval - reg_to_encoding(r); |
| 1852 | } | 1904 | } |
| 1853 | 1905 | ||
| 1854 | static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params, | 1906 | static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params, |
| 1855 | const struct sys_reg_desc table[], | 1907 | const struct sys_reg_desc table[], |
| 1856 | unsigned int num) | 1908 | unsigned int num) |
| 1857 | { | 1909 | { |
| 1858 | unsigned long pval = reg_to_match_value(params); | 1910 | unsigned long pval = reg_to_encoding(params); |
| 1859 | 1911 | ||
| 1860 | return bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg); | 1912 | return bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg); |
| 1861 | } | 1913 | } |
| @@ -2218,11 +2270,15 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, | |||
| 2218 | } | 2270 | } |
| 2219 | 2271 | ||
| 2220 | FUNCTION_INVARIANT(midr_el1) | 2272 | FUNCTION_INVARIANT(midr_el1) |
| 2221 | FUNCTION_INVARIANT(ctr_el0) | ||
| 2222 | FUNCTION_INVARIANT(revidr_el1) | 2273 | FUNCTION_INVARIANT(revidr_el1) |
| 2223 | FUNCTION_INVARIANT(clidr_el1) | 2274 | FUNCTION_INVARIANT(clidr_el1) |
| 2224 | FUNCTION_INVARIANT(aidr_el1) | 2275 | FUNCTION_INVARIANT(aidr_el1) |
| 2225 | 2276 | ||
| 2277 | static void get_ctr_el0(struct kvm_vcpu *v, const struct sys_reg_desc *r) | ||
| 2278 | { | ||
| 2279 | ((struct sys_reg_desc *)r)->val = read_sanitised_ftr_reg(SYS_CTR_EL0); | ||
| 2280 | } | ||
| 2281 | |||
| 2226 | /* ->val is filled in by kvm_sys_reg_table_init() */ | 2282 | /* ->val is filled in by kvm_sys_reg_table_init() */ |
| 2227 | static struct sys_reg_desc invariant_sys_regs[] = { | 2283 | static struct sys_reg_desc invariant_sys_regs[] = { |
| 2228 | { SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 }, | 2284 | { SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 }, |
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index d2abd98471e8..41204a49cf95 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h | |||
| @@ -1134,7 +1134,7 @@ static inline void kvm_arch_hardware_unsetup(void) {} | |||
| 1134 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} | 1134 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} |
| 1135 | static inline void kvm_arch_free_memslot(struct kvm *kvm, | 1135 | static inline void kvm_arch_free_memslot(struct kvm *kvm, |
| 1136 | struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} | 1136 | struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} |
| 1137 | static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} | 1137 | static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} |
| 1138 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} | 1138 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} |
| 1139 | static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} | 1139 | static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} |
| 1140 | static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} | 1140 | static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 0f98f00da2ea..e6b5bb012ccb 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
| @@ -99,6 +99,8 @@ struct kvm_nested_guest; | |||
| 99 | 99 | ||
| 100 | struct kvm_vm_stat { | 100 | struct kvm_vm_stat { |
| 101 | ulong remote_tlb_flush; | 101 | ulong remote_tlb_flush; |
| 102 | ulong num_2M_pages; | ||
| 103 | ulong num_1G_pages; | ||
| 102 | }; | 104 | }; |
| 103 | 105 | ||
| 104 | struct kvm_vcpu_stat { | 106 | struct kvm_vcpu_stat { |
| @@ -377,6 +379,7 @@ struct kvmppc_mmu { | |||
| 377 | void (*slbmte)(struct kvm_vcpu *vcpu, u64 rb, u64 rs); | 379 | void (*slbmte)(struct kvm_vcpu *vcpu, u64 rb, u64 rs); |
| 378 | u64 (*slbmfee)(struct kvm_vcpu *vcpu, u64 slb_nr); | 380 | u64 (*slbmfee)(struct kvm_vcpu *vcpu, u64 slb_nr); |
| 379 | u64 (*slbmfev)(struct kvm_vcpu *vcpu, u64 slb_nr); | 381 | u64 (*slbmfev)(struct kvm_vcpu *vcpu, u64 slb_nr); |
| 382 | int (*slbfee)(struct kvm_vcpu *vcpu, gva_t eaddr, ulong *ret_slb); | ||
| 380 | void (*slbie)(struct kvm_vcpu *vcpu, u64 slb_nr); | 383 | void (*slbie)(struct kvm_vcpu *vcpu, u64 slb_nr); |
| 381 | void (*slbia)(struct kvm_vcpu *vcpu); | 384 | void (*slbia)(struct kvm_vcpu *vcpu); |
| 382 | /* book3s */ | 385 | /* book3s */ |
| @@ -837,7 +840,7 @@ struct kvm_vcpu_arch { | |||
| 837 | static inline void kvm_arch_hardware_disable(void) {} | 840 | static inline void kvm_arch_hardware_disable(void) {} |
| 838 | static inline void kvm_arch_hardware_unsetup(void) {} | 841 | static inline void kvm_arch_hardware_unsetup(void) {} |
| 839 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} | 842 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} |
| 840 | static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} | 843 | static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} |
| 841 | static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} | 844 | static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} |
| 842 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} | 845 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} |
| 843 | static inline void kvm_arch_exit(void) {} | 846 | static inline void kvm_arch_exit(void) {} |
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index a6c8548ed9fa..ac22b28ae78d 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
| @@ -36,6 +36,8 @@ | |||
| 36 | #endif | 36 | #endif |
| 37 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER | 37 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER |
| 38 | #include <asm/paca.h> | 38 | #include <asm/paca.h> |
| 39 | #include <asm/xive.h> | ||
| 40 | #include <asm/cpu_has_feature.h> | ||
| 39 | #endif | 41 | #endif |
| 40 | 42 | ||
| 41 | /* | 43 | /* |
| @@ -617,6 +619,18 @@ static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 ir | |||
| 617 | static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { } | 619 | static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { } |
| 618 | #endif /* CONFIG_KVM_XIVE */ | 620 | #endif /* CONFIG_KVM_XIVE */ |
| 619 | 621 | ||
| 622 | #if defined(CONFIG_PPC_POWERNV) && defined(CONFIG_KVM_BOOK3S_64_HANDLER) | ||
| 623 | static inline bool xics_on_xive(void) | ||
| 624 | { | ||
| 625 | return xive_enabled() && cpu_has_feature(CPU_FTR_HVMODE); | ||
| 626 | } | ||
| 627 | #else | ||
| 628 | static inline bool xics_on_xive(void) | ||
| 629 | { | ||
| 630 | return false; | ||
| 631 | } | ||
| 632 | #endif | ||
| 633 | |||
| 620 | /* | 634 | /* |
| 621 | * Prototypes for functions called only from assembler code. | 635 | * Prototypes for functions called only from assembler code. |
| 622 | * Having prototypes reduces sparse errors. | 636 | * Having prototypes reduces sparse errors. |
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 8c876c166ef2..26ca425f4c2c 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h | |||
| @@ -463,10 +463,12 @@ struct kvm_ppc_cpu_char { | |||
| 463 | #define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED (1ULL << 58) | 463 | #define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED (1ULL << 58) |
| 464 | #define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF (1ULL << 57) | 464 | #define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF (1ULL << 57) |
| 465 | #define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS (1ULL << 56) | 465 | #define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS (1ULL << 56) |
| 466 | #define KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST (1ull << 54) | ||
| 466 | 467 | ||
| 467 | #define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY (1ULL << 63) | 468 | #define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY (1ULL << 63) |
| 468 | #define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR (1ULL << 62) | 469 | #define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR (1ULL << 62) |
| 469 | #define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ULL << 61) | 470 | #define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ULL << 61) |
| 471 | #define KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE (1ull << 58) | ||
| 470 | 472 | ||
| 471 | /* Per-vcpu XICS interrupt controller state */ | 473 | /* Per-vcpu XICS interrupt controller state */ |
| 472 | #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) | 474 | #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) |
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 9a7dadbe1f17..10c5579d20ce 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c | |||
| @@ -39,6 +39,7 @@ | |||
| 39 | #include "book3s.h" | 39 | #include "book3s.h" |
| 40 | #include "trace.h" | 40 | #include "trace.h" |
| 41 | 41 | ||
| 42 | #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM | ||
| 42 | #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU | 43 | #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU |
| 43 | 44 | ||
| 44 | /* #define EXIT_DEBUG */ | 45 | /* #define EXIT_DEBUG */ |
| @@ -71,6 +72,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
| 71 | { "pthru_all", VCPU_STAT(pthru_all) }, | 72 | { "pthru_all", VCPU_STAT(pthru_all) }, |
| 72 | { "pthru_host", VCPU_STAT(pthru_host) }, | 73 | { "pthru_host", VCPU_STAT(pthru_host) }, |
| 73 | { "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) }, | 74 | { "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) }, |
| 75 | { "largepages_2M", VM_STAT(num_2M_pages) }, | ||
| 76 | { "largepages_1G", VM_STAT(num_1G_pages) }, | ||
| 74 | { NULL } | 77 | { NULL } |
| 75 | }; | 78 | }; |
| 76 | 79 | ||
| @@ -642,7 +645,7 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, | |||
| 642 | r = -ENXIO; | 645 | r = -ENXIO; |
| 643 | break; | 646 | break; |
| 644 | } | 647 | } |
| 645 | if (xive_enabled()) | 648 | if (xics_on_xive()) |
| 646 | *val = get_reg_val(id, kvmppc_xive_get_icp(vcpu)); | 649 | *val = get_reg_val(id, kvmppc_xive_get_icp(vcpu)); |
| 647 | else | 650 | else |
| 648 | *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu)); | 651 | *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu)); |
| @@ -715,7 +718,7 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, | |||
| 715 | r = -ENXIO; | 718 | r = -ENXIO; |
| 716 | break; | 719 | break; |
| 717 | } | 720 | } |
| 718 | if (xive_enabled()) | 721 | if (xics_on_xive()) |
| 719 | r = kvmppc_xive_set_icp(vcpu, set_reg_val(id, *val)); | 722 | r = kvmppc_xive_set_icp(vcpu, set_reg_val(id, *val)); |
| 720 | else | 723 | else |
| 721 | r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val)); | 724 | r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val)); |
| @@ -991,7 +994,7 @@ int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hcall) | |||
| 991 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, | 994 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, |
| 992 | bool line_status) | 995 | bool line_status) |
| 993 | { | 996 | { |
| 994 | if (xive_enabled()) | 997 | if (xics_on_xive()) |
| 995 | return kvmppc_xive_set_irq(kvm, irq_source_id, irq, level, | 998 | return kvmppc_xive_set_irq(kvm, irq_source_id, irq, level, |
| 996 | line_status); | 999 | line_status); |
| 997 | else | 1000 | else |
| @@ -1044,7 +1047,7 @@ static int kvmppc_book3s_init(void) | |||
| 1044 | 1047 | ||
| 1045 | #ifdef CONFIG_KVM_XICS | 1048 | #ifdef CONFIG_KVM_XICS |
| 1046 | #ifdef CONFIG_KVM_XIVE | 1049 | #ifdef CONFIG_KVM_XIVE |
| 1047 | if (xive_enabled()) { | 1050 | if (xics_on_xive()) { |
| 1048 | kvmppc_xive_init_module(); | 1051 | kvmppc_xive_init_module(); |
| 1049 | kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS); | 1052 | kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS); |
| 1050 | } else | 1053 | } else |
| @@ -1057,7 +1060,7 @@ static int kvmppc_book3s_init(void) | |||
| 1057 | static void kvmppc_book3s_exit(void) | 1060 | static void kvmppc_book3s_exit(void) |
| 1058 | { | 1061 | { |
| 1059 | #ifdef CONFIG_KVM_XICS | 1062 | #ifdef CONFIG_KVM_XICS |
| 1060 | if (xive_enabled()) | 1063 | if (xics_on_xive()) |
| 1061 | kvmppc_xive_exit_module(); | 1064 | kvmppc_xive_exit_module(); |
| 1062 | #endif | 1065 | #endif |
| 1063 | #ifdef CONFIG_KVM_BOOK3S_32_HANDLER | 1066 | #ifdef CONFIG_KVM_BOOK3S_32_HANDLER |
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 612169988a3d..6f789f674048 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c | |||
| @@ -425,6 +425,7 @@ void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu) | |||
| 425 | mmu->slbmte = NULL; | 425 | mmu->slbmte = NULL; |
| 426 | mmu->slbmfee = NULL; | 426 | mmu->slbmfee = NULL; |
| 427 | mmu->slbmfev = NULL; | 427 | mmu->slbmfev = NULL; |
| 428 | mmu->slbfee = NULL; | ||
| 428 | mmu->slbie = NULL; | 429 | mmu->slbie = NULL; |
| 429 | mmu->slbia = NULL; | 430 | mmu->slbia = NULL; |
| 430 | } | 431 | } |
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index c92dd25bed23..d4b967f0e8d4 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c | |||
| @@ -435,6 +435,19 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb) | |||
| 435 | kvmppc_mmu_map_segment(vcpu, esid << SID_SHIFT); | 435 | kvmppc_mmu_map_segment(vcpu, esid << SID_SHIFT); |
| 436 | } | 436 | } |
| 437 | 437 | ||
| 438 | static int kvmppc_mmu_book3s_64_slbfee(struct kvm_vcpu *vcpu, gva_t eaddr, | ||
| 439 | ulong *ret_slb) | ||
| 440 | { | ||
| 441 | struct kvmppc_slb *slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu, eaddr); | ||
| 442 | |||
| 443 | if (slbe) { | ||
| 444 | *ret_slb = slbe->origv; | ||
| 445 | return 0; | ||
| 446 | } | ||
| 447 | *ret_slb = 0; | ||
| 448 | return -ENOENT; | ||
| 449 | } | ||
| 450 | |||
| 438 | static u64 kvmppc_mmu_book3s_64_slbmfee(struct kvm_vcpu *vcpu, u64 slb_nr) | 451 | static u64 kvmppc_mmu_book3s_64_slbmfee(struct kvm_vcpu *vcpu, u64 slb_nr) |
| 439 | { | 452 | { |
| 440 | struct kvmppc_slb *slbe; | 453 | struct kvmppc_slb *slbe; |
| @@ -670,6 +683,7 @@ void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu) | |||
| 670 | mmu->slbmte = kvmppc_mmu_book3s_64_slbmte; | 683 | mmu->slbmte = kvmppc_mmu_book3s_64_slbmte; |
| 671 | mmu->slbmfee = kvmppc_mmu_book3s_64_slbmfee; | 684 | mmu->slbmfee = kvmppc_mmu_book3s_64_slbmfee; |
| 672 | mmu->slbmfev = kvmppc_mmu_book3s_64_slbmfev; | 685 | mmu->slbmfev = kvmppc_mmu_book3s_64_slbmfev; |
| 686 | mmu->slbfee = kvmppc_mmu_book3s_64_slbfee; | ||
| 673 | mmu->slbie = kvmppc_mmu_book3s_64_slbie; | 687 | mmu->slbie = kvmppc_mmu_book3s_64_slbie; |
| 674 | mmu->slbia = kvmppc_mmu_book3s_64_slbia; | 688 | mmu->slbia = kvmppc_mmu_book3s_64_slbia; |
| 675 | mmu->xlate = kvmppc_mmu_book3s_64_xlate; | 689 | mmu->xlate = kvmppc_mmu_book3s_64_xlate; |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index bd2dcfbf00cd..be7bc070eae5 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
| @@ -442,6 +442,24 @@ int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 442 | u32 last_inst; | 442 | u32 last_inst; |
| 443 | 443 | ||
| 444 | /* | 444 | /* |
| 445 | * Fast path - check if the guest physical address corresponds to a | ||
| 446 | * device on the FAST_MMIO_BUS, if so we can avoid loading the | ||
| 447 | * instruction all together, then we can just handle it and return. | ||
| 448 | */ | ||
| 449 | if (is_store) { | ||
| 450 | int idx, ret; | ||
| 451 | |||
| 452 | idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
| 453 | ret = kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, (gpa_t) gpa, 0, | ||
| 454 | NULL); | ||
| 455 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | ||
| 456 | if (!ret) { | ||
| 457 | kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); | ||
| 458 | return RESUME_GUEST; | ||
| 459 | } | ||
| 460 | } | ||
| 461 | |||
| 462 | /* | ||
| 445 | * If we fail, we just return to the guest and try executing it again. | 463 | * If we fail, we just return to the guest and try executing it again. |
| 446 | */ | 464 | */ |
| 447 | if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) != | 465 | if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) != |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 1b821c6efdef..f55ef071883f 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c | |||
| @@ -403,8 +403,13 @@ void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa, | |||
| 403 | if (!memslot) | 403 | if (!memslot) |
| 404 | return; | 404 | return; |
| 405 | } | 405 | } |
| 406 | if (shift) | 406 | if (shift) { /* 1GB or 2MB page */ |
| 407 | page_size = 1ul << shift; | 407 | page_size = 1ul << shift; |
| 408 | if (shift == PMD_SHIFT) | ||
| 409 | kvm->stat.num_2M_pages--; | ||
| 410 | else if (shift == PUD_SHIFT) | ||
| 411 | kvm->stat.num_1G_pages--; | ||
| 412 | } | ||
| 408 | 413 | ||
| 409 | gpa &= ~(page_size - 1); | 414 | gpa &= ~(page_size - 1); |
| 410 | hpa = old & PTE_RPN_MASK; | 415 | hpa = old & PTE_RPN_MASK; |
| @@ -878,6 +883,14 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, | |||
| 878 | put_page(page); | 883 | put_page(page); |
| 879 | } | 884 | } |
| 880 | 885 | ||
| 886 | /* Increment number of large pages if we (successfully) inserted one */ | ||
| 887 | if (!ret) { | ||
| 888 | if (level == 1) | ||
| 889 | kvm->stat.num_2M_pages++; | ||
| 890 | else if (level == 2) | ||
| 891 | kvm->stat.num_1G_pages++; | ||
| 892 | } | ||
| 893 | |||
| 881 | return ret; | 894 | return ret; |
| 882 | } | 895 | } |
| 883 | 896 | ||
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 532ab79734c7..f02b04973710 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c | |||
| @@ -133,7 +133,6 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, | |||
| 133 | continue; | 133 | continue; |
| 134 | 134 | ||
| 135 | kref_put(&stit->kref, kvm_spapr_tce_liobn_put); | 135 | kref_put(&stit->kref, kvm_spapr_tce_liobn_put); |
| 136 | return; | ||
| 137 | } | 136 | } |
| 138 | } | 137 | } |
| 139 | } | 138 | } |
| @@ -338,14 +337,15 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | |||
| 338 | } | 337 | } |
| 339 | } | 338 | } |
| 340 | 339 | ||
| 340 | kvm_get_kvm(kvm); | ||
| 341 | if (!ret) | 341 | if (!ret) |
| 342 | ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, | 342 | ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, |
| 343 | stt, O_RDWR | O_CLOEXEC); | 343 | stt, O_RDWR | O_CLOEXEC); |
| 344 | 344 | ||
| 345 | if (ret >= 0) { | 345 | if (ret >= 0) |
| 346 | list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); | 346 | list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); |
| 347 | kvm_get_kvm(kvm); | 347 | else |
| 348 | } | 348 | kvm_put_kvm(kvm); |
| 349 | 349 | ||
| 350 | mutex_unlock(&kvm->lock); | 350 | mutex_unlock(&kvm->lock); |
| 351 | 351 | ||
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 8c7e933e942e..6ef7c5f00a49 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c | |||
| @@ -47,6 +47,7 @@ | |||
| 47 | #define OP_31_XOP_SLBMFEV 851 | 47 | #define OP_31_XOP_SLBMFEV 851 |
| 48 | #define OP_31_XOP_EIOIO 854 | 48 | #define OP_31_XOP_EIOIO 854 |
| 49 | #define OP_31_XOP_SLBMFEE 915 | 49 | #define OP_31_XOP_SLBMFEE 915 |
| 50 | #define OP_31_XOP_SLBFEE 979 | ||
| 50 | 51 | ||
| 51 | #define OP_31_XOP_TBEGIN 654 | 52 | #define OP_31_XOP_TBEGIN 654 |
| 52 | #define OP_31_XOP_TABORT 910 | 53 | #define OP_31_XOP_TABORT 910 |
| @@ -416,6 +417,23 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 416 | 417 | ||
| 417 | vcpu->arch.mmu.slbia(vcpu); | 418 | vcpu->arch.mmu.slbia(vcpu); |
| 418 | break; | 419 | break; |
| 420 | case OP_31_XOP_SLBFEE: | ||
| 421 | if (!(inst & 1) || !vcpu->arch.mmu.slbfee) { | ||
| 422 | return EMULATE_FAIL; | ||
| 423 | } else { | ||
| 424 | ulong b, t; | ||
| 425 | ulong cr = kvmppc_get_cr(vcpu) & ~CR0_MASK; | ||
| 426 | |||
| 427 | b = kvmppc_get_gpr(vcpu, rb); | ||
| 428 | if (!vcpu->arch.mmu.slbfee(vcpu, b, &t)) | ||
| 429 | cr |= 2 << CR0_SHIFT; | ||
| 430 | kvmppc_set_gpr(vcpu, rt, t); | ||
| 431 | /* copy XER[SO] bit to CR0[SO] */ | ||
| 432 | cr |= (vcpu->arch.regs.xer & 0x80000000) >> | ||
| 433 | (31 - CR0_SHIFT); | ||
| 434 | kvmppc_set_cr(vcpu, cr); | ||
| 435 | } | ||
| 436 | break; | ||
| 419 | case OP_31_XOP_SLBMFEE: | 437 | case OP_31_XOP_SLBMFEE: |
| 420 | if (!vcpu->arch.mmu.slbmfee) { | 438 | if (!vcpu->arch.mmu.slbmfee) { |
| 421 | emulated = EMULATE_FAIL; | 439 | emulated = EMULATE_FAIL; |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index a3d5318f5d1e..06964350b97a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
| @@ -922,7 +922,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | |||
| 922 | case H_IPOLL: | 922 | case H_IPOLL: |
| 923 | case H_XIRR_X: | 923 | case H_XIRR_X: |
| 924 | if (kvmppc_xics_enabled(vcpu)) { | 924 | if (kvmppc_xics_enabled(vcpu)) { |
| 925 | if (xive_enabled()) { | 925 | if (xics_on_xive()) { |
| 926 | ret = H_NOT_AVAILABLE; | 926 | ret = H_NOT_AVAILABLE; |
| 927 | return RESUME_GUEST; | 927 | return RESUME_GUEST; |
| 928 | } | 928 | } |
| @@ -937,6 +937,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | |||
| 937 | ret = kvmppc_h_set_xdabr(vcpu, kvmppc_get_gpr(vcpu, 4), | 937 | ret = kvmppc_h_set_xdabr(vcpu, kvmppc_get_gpr(vcpu, 4), |
| 938 | kvmppc_get_gpr(vcpu, 5)); | 938 | kvmppc_get_gpr(vcpu, 5)); |
| 939 | break; | 939 | break; |
| 940 | #ifdef CONFIG_SPAPR_TCE_IOMMU | ||
| 940 | case H_GET_TCE: | 941 | case H_GET_TCE: |
| 941 | ret = kvmppc_h_get_tce(vcpu, kvmppc_get_gpr(vcpu, 4), | 942 | ret = kvmppc_h_get_tce(vcpu, kvmppc_get_gpr(vcpu, 4), |
| 942 | kvmppc_get_gpr(vcpu, 5)); | 943 | kvmppc_get_gpr(vcpu, 5)); |
| @@ -966,6 +967,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | |||
| 966 | if (ret == H_TOO_HARD) | 967 | if (ret == H_TOO_HARD) |
| 967 | return RESUME_HOST; | 968 | return RESUME_HOST; |
| 968 | break; | 969 | break; |
| 970 | #endif | ||
| 969 | case H_RANDOM: | 971 | case H_RANDOM: |
| 970 | if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4])) | 972 | if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4])) |
| 971 | ret = H_HARDWARE; | 973 | ret = H_HARDWARE; |
| @@ -1445,7 +1447,7 @@ static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
| 1445 | case BOOK3S_INTERRUPT_HV_RM_HARD: | 1447 | case BOOK3S_INTERRUPT_HV_RM_HARD: |
| 1446 | vcpu->arch.trap = 0; | 1448 | vcpu->arch.trap = 0; |
| 1447 | r = RESUME_GUEST; | 1449 | r = RESUME_GUEST; |
| 1448 | if (!xive_enabled()) | 1450 | if (!xics_on_xive()) |
| 1449 | kvmppc_xics_rm_complete(vcpu, 0); | 1451 | kvmppc_xics_rm_complete(vcpu, 0); |
| 1450 | break; | 1452 | break; |
| 1451 | default: | 1453 | default: |
| @@ -3648,11 +3650,12 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc, | |||
| 3648 | 3650 | ||
| 3649 | static void grow_halt_poll_ns(struct kvmppc_vcore *vc) | 3651 | static void grow_halt_poll_ns(struct kvmppc_vcore *vc) |
| 3650 | { | 3652 | { |
| 3651 | /* 10us base */ | 3653 | if (!halt_poll_ns_grow) |
| 3652 | if (vc->halt_poll_ns == 0 && halt_poll_ns_grow) | 3654 | return; |
| 3653 | vc->halt_poll_ns = 10000; | 3655 | |
| 3654 | else | 3656 | vc->halt_poll_ns *= halt_poll_ns_grow; |
| 3655 | vc->halt_poll_ns *= halt_poll_ns_grow; | 3657 | if (vc->halt_poll_ns < halt_poll_ns_grow_start) |
| 3658 | vc->halt_poll_ns = halt_poll_ns_grow_start; | ||
| 3656 | } | 3659 | } |
| 3657 | 3660 | ||
| 3658 | static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) | 3661 | static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) |
| @@ -3666,7 +3669,7 @@ static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) | |||
| 3666 | #ifdef CONFIG_KVM_XICS | 3669 | #ifdef CONFIG_KVM_XICS |
| 3667 | static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu) | 3670 | static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu) |
| 3668 | { | 3671 | { |
| 3669 | if (!xive_enabled()) | 3672 | if (!xics_on_xive()) |
| 3670 | return false; | 3673 | return false; |
| 3671 | return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr < | 3674 | return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr < |
| 3672 | vcpu->arch.xive_saved_state.cppr; | 3675 | vcpu->arch.xive_saved_state.cppr; |
| @@ -4226,7 +4229,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
| 4226 | vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); | 4229 | vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); |
| 4227 | srcu_read_unlock(&kvm->srcu, srcu_idx); | 4230 | srcu_read_unlock(&kvm->srcu, srcu_idx); |
| 4228 | } else if (r == RESUME_PASSTHROUGH) { | 4231 | } else if (r == RESUME_PASSTHROUGH) { |
| 4229 | if (WARN_ON(xive_enabled())) | 4232 | if (WARN_ON(xics_on_xive())) |
| 4230 | r = H_SUCCESS; | 4233 | r = H_SUCCESS; |
| 4231 | else | 4234 | else |
| 4232 | r = kvmppc_xics_rm_complete(vcpu, 0); | 4235 | r = kvmppc_xics_rm_complete(vcpu, 0); |
| @@ -4750,7 +4753,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) | |||
| 4750 | * If xive is enabled, we route 0x500 interrupts directly | 4753 | * If xive is enabled, we route 0x500 interrupts directly |
| 4751 | * to the guest. | 4754 | * to the guest. |
| 4752 | */ | 4755 | */ |
| 4753 | if (xive_enabled()) | 4756 | if (xics_on_xive()) |
| 4754 | lpcr |= LPCR_LPES; | 4757 | lpcr |= LPCR_LPES; |
| 4755 | } | 4758 | } |
| 4756 | 4759 | ||
| @@ -4986,7 +4989,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) | |||
| 4986 | if (i == pimap->n_mapped) | 4989 | if (i == pimap->n_mapped) |
| 4987 | pimap->n_mapped++; | 4990 | pimap->n_mapped++; |
| 4988 | 4991 | ||
| 4989 | if (xive_enabled()) | 4992 | if (xics_on_xive()) |
| 4990 | rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc); | 4993 | rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc); |
| 4991 | else | 4994 | else |
| 4992 | kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq); | 4995 | kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq); |
| @@ -5027,7 +5030,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) | |||
| 5027 | return -ENODEV; | 5030 | return -ENODEV; |
| 5028 | } | 5031 | } |
| 5029 | 5032 | ||
| 5030 | if (xive_enabled()) | 5033 | if (xics_on_xive()) |
| 5031 | rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc); | 5034 | rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc); |
| 5032 | else | 5035 | else |
| 5033 | kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq); | 5036 | kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq); |
| @@ -5359,13 +5362,11 @@ static int kvm_init_subcore_bitmap(void) | |||
| 5359 | continue; | 5362 | continue; |
| 5360 | 5363 | ||
| 5361 | sibling_subcore_state = | 5364 | sibling_subcore_state = |
| 5362 | kmalloc_node(sizeof(struct sibling_subcore_state), | 5365 | kzalloc_node(sizeof(struct sibling_subcore_state), |
| 5363 | GFP_KERNEL, node); | 5366 | GFP_KERNEL, node); |
| 5364 | if (!sibling_subcore_state) | 5367 | if (!sibling_subcore_state) |
| 5365 | return -ENOMEM; | 5368 | return -ENOMEM; |
| 5366 | 5369 | ||
| 5367 | memset(sibling_subcore_state, 0, | ||
| 5368 | sizeof(struct sibling_subcore_state)); | ||
| 5369 | 5370 | ||
| 5370 | for (j = 0; j < threads_per_core; j++) { | 5371 | for (j = 0; j < threads_per_core; j++) { |
| 5371 | int cpu = first_cpu + j; | 5372 | int cpu = first_cpu + j; |
| @@ -5406,7 +5407,7 @@ static int kvmppc_book3s_init_hv(void) | |||
| 5406 | * indirectly, via OPAL. | 5407 | * indirectly, via OPAL. |
| 5407 | */ | 5408 | */ |
| 5408 | #ifdef CONFIG_SMP | 5409 | #ifdef CONFIG_SMP |
| 5409 | if (!xive_enabled() && !kvmhv_on_pseries() && | 5410 | if (!xics_on_xive() && !kvmhv_on_pseries() && |
| 5410 | !local_paca->kvm_hstate.xics_phys) { | 5411 | !local_paca->kvm_hstate.xics_phys) { |
| 5411 | struct device_node *np; | 5412 | struct device_node *np; |
| 5412 | 5413 | ||
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index a71e2fc00a4e..b0cf22477e87 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c | |||
| @@ -257,7 +257,7 @@ void kvmhv_rm_send_ipi(int cpu) | |||
| 257 | } | 257 | } |
| 258 | 258 | ||
| 259 | /* We should never reach this */ | 259 | /* We should never reach this */ |
| 260 | if (WARN_ON_ONCE(xive_enabled())) | 260 | if (WARN_ON_ONCE(xics_on_xive())) |
| 261 | return; | 261 | return; |
| 262 | 262 | ||
| 263 | /* Else poke the target with an IPI */ | 263 | /* Else poke the target with an IPI */ |
| @@ -577,7 +577,7 @@ unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu) | |||
| 577 | { | 577 | { |
| 578 | if (!kvmppc_xics_enabled(vcpu)) | 578 | if (!kvmppc_xics_enabled(vcpu)) |
| 579 | return H_TOO_HARD; | 579 | return H_TOO_HARD; |
| 580 | if (xive_enabled()) { | 580 | if (xics_on_xive()) { |
| 581 | if (is_rm()) | 581 | if (is_rm()) |
| 582 | return xive_rm_h_xirr(vcpu); | 582 | return xive_rm_h_xirr(vcpu); |
| 583 | if (unlikely(!__xive_vm_h_xirr)) | 583 | if (unlikely(!__xive_vm_h_xirr)) |
| @@ -592,7 +592,7 @@ unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu) | |||
| 592 | if (!kvmppc_xics_enabled(vcpu)) | 592 | if (!kvmppc_xics_enabled(vcpu)) |
| 593 | return H_TOO_HARD; | 593 | return H_TOO_HARD; |
| 594 | vcpu->arch.regs.gpr[5] = get_tb(); | 594 | vcpu->arch.regs.gpr[5] = get_tb(); |
| 595 | if (xive_enabled()) { | 595 | if (xics_on_xive()) { |
| 596 | if (is_rm()) | 596 | if (is_rm()) |
| 597 | return xive_rm_h_xirr(vcpu); | 597 | return xive_rm_h_xirr(vcpu); |
| 598 | if (unlikely(!__xive_vm_h_xirr)) | 598 | if (unlikely(!__xive_vm_h_xirr)) |
| @@ -606,7 +606,7 @@ unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server) | |||
| 606 | { | 606 | { |
| 607 | if (!kvmppc_xics_enabled(vcpu)) | 607 | if (!kvmppc_xics_enabled(vcpu)) |
| 608 | return H_TOO_HARD; | 608 | return H_TOO_HARD; |
| 609 | if (xive_enabled()) { | 609 | if (xics_on_xive()) { |
| 610 | if (is_rm()) | 610 | if (is_rm()) |
| 611 | return xive_rm_h_ipoll(vcpu, server); | 611 | return xive_rm_h_ipoll(vcpu, server); |
| 612 | if (unlikely(!__xive_vm_h_ipoll)) | 612 | if (unlikely(!__xive_vm_h_ipoll)) |
| @@ -621,7 +621,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, | |||
| 621 | { | 621 | { |
| 622 | if (!kvmppc_xics_enabled(vcpu)) | 622 | if (!kvmppc_xics_enabled(vcpu)) |
| 623 | return H_TOO_HARD; | 623 | return H_TOO_HARD; |
| 624 | if (xive_enabled()) { | 624 | if (xics_on_xive()) { |
| 625 | if (is_rm()) | 625 | if (is_rm()) |
| 626 | return xive_rm_h_ipi(vcpu, server, mfrr); | 626 | return xive_rm_h_ipi(vcpu, server, mfrr); |
| 627 | if (unlikely(!__xive_vm_h_ipi)) | 627 | if (unlikely(!__xive_vm_h_ipi)) |
| @@ -635,7 +635,7 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) | |||
| 635 | { | 635 | { |
| 636 | if (!kvmppc_xics_enabled(vcpu)) | 636 | if (!kvmppc_xics_enabled(vcpu)) |
| 637 | return H_TOO_HARD; | 637 | return H_TOO_HARD; |
| 638 | if (xive_enabled()) { | 638 | if (xics_on_xive()) { |
| 639 | if (is_rm()) | 639 | if (is_rm()) |
| 640 | return xive_rm_h_cppr(vcpu, cppr); | 640 | return xive_rm_h_cppr(vcpu, cppr); |
| 641 | if (unlikely(!__xive_vm_h_cppr)) | 641 | if (unlikely(!__xive_vm_h_cppr)) |
| @@ -649,7 +649,7 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) | |||
| 649 | { | 649 | { |
| 650 | if (!kvmppc_xics_enabled(vcpu)) | 650 | if (!kvmppc_xics_enabled(vcpu)) |
| 651 | return H_TOO_HARD; | 651 | return H_TOO_HARD; |
| 652 | if (xive_enabled()) { | 652 | if (xics_on_xive()) { |
| 653 | if (is_rm()) | 653 | if (is_rm()) |
| 654 | return xive_rm_h_eoi(vcpu, xirr); | 654 | return xive_rm_h_eoi(vcpu, xirr); |
| 655 | if (unlikely(!__xive_vm_h_eoi)) | 655 | if (unlikely(!__xive_vm_h_eoi)) |
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index b3f5786b20dc..3b9662a4207e 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c | |||
| @@ -144,6 +144,13 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, | |||
| 144 | return; | 144 | return; |
| 145 | } | 145 | } |
| 146 | 146 | ||
| 147 | if (xive_enabled() && kvmhv_on_pseries()) { | ||
| 148 | /* No XICS access or hypercalls available, too hard */ | ||
| 149 | this_icp->rm_action |= XICS_RM_KICK_VCPU; | ||
| 150 | this_icp->rm_kick_target = vcpu; | ||
| 151 | return; | ||
| 152 | } | ||
| 153 | |||
| 147 | /* | 154 | /* |
| 148 | * Check if the core is loaded, | 155 | * Check if the core is loaded, |
| 149 | * if not, find an available host core to post to wake the VCPU, | 156 | * if not, find an available host core to post to wake the VCPU, |
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 25043b50cb30..3a5e719ef032 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S | |||
| @@ -2272,8 +2272,13 @@ hcall_real_table: | |||
| 2272 | .long DOTSYM(kvmppc_h_clear_mod) - hcall_real_table | 2272 | .long DOTSYM(kvmppc_h_clear_mod) - hcall_real_table |
| 2273 | .long DOTSYM(kvmppc_h_clear_ref) - hcall_real_table | 2273 | .long DOTSYM(kvmppc_h_clear_ref) - hcall_real_table |
| 2274 | .long DOTSYM(kvmppc_h_protect) - hcall_real_table | 2274 | .long DOTSYM(kvmppc_h_protect) - hcall_real_table |
| 2275 | #ifdef CONFIG_SPAPR_TCE_IOMMU | ||
| 2275 | .long DOTSYM(kvmppc_h_get_tce) - hcall_real_table | 2276 | .long DOTSYM(kvmppc_h_get_tce) - hcall_real_table |
| 2276 | .long DOTSYM(kvmppc_rm_h_put_tce) - hcall_real_table | 2277 | .long DOTSYM(kvmppc_rm_h_put_tce) - hcall_real_table |
| 2278 | #else | ||
| 2279 | .long 0 /* 0x1c */ | ||
| 2280 | .long 0 /* 0x20 */ | ||
| 2281 | #endif | ||
| 2277 | .long 0 /* 0x24 - H_SET_SPRG0 */ | 2282 | .long 0 /* 0x24 - H_SET_SPRG0 */ |
| 2278 | .long DOTSYM(kvmppc_h_set_dabr) - hcall_real_table | 2283 | .long DOTSYM(kvmppc_h_set_dabr) - hcall_real_table |
| 2279 | .long 0 /* 0x2c */ | 2284 | .long 0 /* 0x2c */ |
| @@ -2351,8 +2356,13 @@ hcall_real_table: | |||
| 2351 | .long 0 /* 0x12c */ | 2356 | .long 0 /* 0x12c */ |
| 2352 | .long 0 /* 0x130 */ | 2357 | .long 0 /* 0x130 */ |
| 2353 | .long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table | 2358 | .long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table |
| 2359 | #ifdef CONFIG_SPAPR_TCE_IOMMU | ||
| 2354 | .long DOTSYM(kvmppc_rm_h_stuff_tce) - hcall_real_table | 2360 | .long DOTSYM(kvmppc_rm_h_stuff_tce) - hcall_real_table |
| 2355 | .long DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table | 2361 | .long DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table |
| 2362 | #else | ||
| 2363 | .long 0 /* 0x138 */ | ||
| 2364 | .long 0 /* 0x13c */ | ||
| 2365 | #endif | ||
| 2356 | .long 0 /* 0x140 */ | 2366 | .long 0 /* 0x140 */ |
| 2357 | .long 0 /* 0x144 */ | 2367 | .long 0 /* 0x144 */ |
| 2358 | .long 0 /* 0x148 */ | 2368 | .long 0 /* 0x148 */ |
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index 2d3b2b1cc272..4e178c4c1ea5 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c | |||
| @@ -33,7 +33,7 @@ static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) | |||
| 33 | server = be32_to_cpu(args->args[1]); | 33 | server = be32_to_cpu(args->args[1]); |
| 34 | priority = be32_to_cpu(args->args[2]); | 34 | priority = be32_to_cpu(args->args[2]); |
| 35 | 35 | ||
| 36 | if (xive_enabled()) | 36 | if (xics_on_xive()) |
| 37 | rc = kvmppc_xive_set_xive(vcpu->kvm, irq, server, priority); | 37 | rc = kvmppc_xive_set_xive(vcpu->kvm, irq, server, priority); |
| 38 | else | 38 | else |
| 39 | rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority); | 39 | rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority); |
| @@ -56,7 +56,7 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) | |||
| 56 | irq = be32_to_cpu(args->args[0]); | 56 | irq = be32_to_cpu(args->args[0]); |
| 57 | 57 | ||
| 58 | server = priority = 0; | 58 | server = priority = 0; |
| 59 | if (xive_enabled()) | 59 | if (xics_on_xive()) |
| 60 | rc = kvmppc_xive_get_xive(vcpu->kvm, irq, &server, &priority); | 60 | rc = kvmppc_xive_get_xive(vcpu->kvm, irq, &server, &priority); |
| 61 | else | 61 | else |
| 62 | rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority); | 62 | rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority); |
| @@ -83,7 +83,7 @@ static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args) | |||
| 83 | 83 | ||
| 84 | irq = be32_to_cpu(args->args[0]); | 84 | irq = be32_to_cpu(args->args[0]); |
| 85 | 85 | ||
| 86 | if (xive_enabled()) | 86 | if (xics_on_xive()) |
| 87 | rc = kvmppc_xive_int_off(vcpu->kvm, irq); | 87 | rc = kvmppc_xive_int_off(vcpu->kvm, irq); |
| 88 | else | 88 | else |
| 89 | rc = kvmppc_xics_int_off(vcpu->kvm, irq); | 89 | rc = kvmppc_xics_int_off(vcpu->kvm, irq); |
| @@ -105,7 +105,7 @@ static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args) | |||
| 105 | 105 | ||
| 106 | irq = be32_to_cpu(args->args[0]); | 106 | irq = be32_to_cpu(args->args[0]); |
| 107 | 107 | ||
| 108 | if (xive_enabled()) | 108 | if (xics_on_xive()) |
| 109 | rc = kvmppc_xive_int_on(vcpu->kvm, irq); | 109 | rc = kvmppc_xive_int_on(vcpu->kvm, irq); |
| 110 | else | 110 | else |
| 111 | rc = kvmppc_xics_int_on(vcpu->kvm, irq); | 111 | rc = kvmppc_xics_int_on(vcpu->kvm, irq); |
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index b90a7d154180..8885377ec3e0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
| @@ -748,7 +748,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | |||
| 748 | kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu); | 748 | kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu); |
| 749 | break; | 749 | break; |
| 750 | case KVMPPC_IRQ_XICS: | 750 | case KVMPPC_IRQ_XICS: |
| 751 | if (xive_enabled()) | 751 | if (xics_on_xive()) |
| 752 | kvmppc_xive_cleanup_vcpu(vcpu); | 752 | kvmppc_xive_cleanup_vcpu(vcpu); |
| 753 | else | 753 | else |
| 754 | kvmppc_xics_free_icp(vcpu); | 754 | kvmppc_xics_free_icp(vcpu); |
| @@ -1931,7 +1931,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, | |||
| 1931 | r = -EPERM; | 1931 | r = -EPERM; |
| 1932 | dev = kvm_device_from_filp(f.file); | 1932 | dev = kvm_device_from_filp(f.file); |
| 1933 | if (dev) { | 1933 | if (dev) { |
| 1934 | if (xive_enabled()) | 1934 | if (xics_on_xive()) |
| 1935 | r = kvmppc_xive_connect_vcpu(dev, vcpu, cap->args[1]); | 1935 | r = kvmppc_xive_connect_vcpu(dev, vcpu, cap->args[1]); |
| 1936 | else | 1936 | else |
| 1937 | r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]); | 1937 | r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]); |
| @@ -2189,10 +2189,12 @@ static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp) | |||
| 2189 | KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV | | 2189 | KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV | |
| 2190 | KVM_PPC_CPU_CHAR_BR_HINT_HONOURED | | 2190 | KVM_PPC_CPU_CHAR_BR_HINT_HONOURED | |
| 2191 | KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF | | 2191 | KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF | |
| 2192 | KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; | 2192 | KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS | |
| 2193 | KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST; | ||
| 2193 | cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY | | 2194 | cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY | |
| 2194 | KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR | | 2195 | KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR | |
| 2195 | KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; | 2196 | KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR | |
| 2197 | KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE; | ||
| 2196 | } | 2198 | } |
| 2197 | return 0; | 2199 | return 0; |
| 2198 | } | 2200 | } |
| @@ -2251,12 +2253,16 @@ static int kvmppc_get_cpu_char(struct kvm_ppc_cpu_char *cp) | |||
| 2251 | if (have_fw_feat(fw_features, "enabled", | 2253 | if (have_fw_feat(fw_features, "enabled", |
| 2252 | "fw-count-cache-disabled")) | 2254 | "fw-count-cache-disabled")) |
| 2253 | cp->character |= KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; | 2255 | cp->character |= KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; |
| 2256 | if (have_fw_feat(fw_features, "enabled", | ||
| 2257 | "fw-count-cache-flush-bcctr2,0,0")) | ||
| 2258 | cp->character |= KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST; | ||
| 2254 | cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 | | 2259 | cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 | |
| 2255 | KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED | | 2260 | KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED | |
| 2256 | KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 | | 2261 | KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 | |
| 2257 | KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 | | 2262 | KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 | |
| 2258 | KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV | | 2263 | KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV | |
| 2259 | KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; | 2264 | KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS | |
| 2265 | KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST; | ||
| 2260 | 2266 | ||
| 2261 | if (have_fw_feat(fw_features, "enabled", | 2267 | if (have_fw_feat(fw_features, "enabled", |
| 2262 | "speculation-policy-favor-security")) | 2268 | "speculation-policy-favor-security")) |
| @@ -2267,9 +2273,13 @@ static int kvmppc_get_cpu_char(struct kvm_ppc_cpu_char *cp) | |||
| 2267 | if (!have_fw_feat(fw_features, "disabled", | 2273 | if (!have_fw_feat(fw_features, "disabled", |
| 2268 | "needs-spec-barrier-for-bound-checks")) | 2274 | "needs-spec-barrier-for-bound-checks")) |
| 2269 | cp->behaviour |= KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; | 2275 | cp->behaviour |= KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; |
| 2276 | if (have_fw_feat(fw_features, "enabled", | ||
| 2277 | "needs-count-cache-flush-on-context-switch")) | ||
| 2278 | cp->behaviour |= KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE; | ||
| 2270 | cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY | | 2279 | cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY | |
| 2271 | KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR | | 2280 | KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR | |
| 2272 | KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; | 2281 | KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR | |
| 2282 | KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE; | ||
| 2273 | 2283 | ||
| 2274 | of_node_put(fw_features); | 2284 | of_node_put(fw_features); |
| 2275 | } | 2285 | } |
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index 225667652069..1727180e8ca1 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h | |||
| @@ -331,5 +331,6 @@ extern void css_schedule_reprobe(void); | |||
| 331 | /* Function from drivers/s390/cio/chsc.c */ | 331 | /* Function from drivers/s390/cio/chsc.c */ |
| 332 | int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta); | 332 | int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta); |
| 333 | int chsc_sstpi(void *page, void *result, size_t size); | 333 | int chsc_sstpi(void *page, void *result, size_t size); |
| 334 | int chsc_sgib(u32 origin); | ||
| 334 | 335 | ||
| 335 | #endif | 336 | #endif |
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index 2f7f27e5493f..afaf5e3c57fd 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h | |||
| @@ -62,6 +62,7 @@ enum interruption_class { | |||
| 62 | IRQIO_MSI, | 62 | IRQIO_MSI, |
| 63 | IRQIO_VIR, | 63 | IRQIO_VIR, |
| 64 | IRQIO_VAI, | 64 | IRQIO_VAI, |
| 65 | IRQIO_GAL, | ||
| 65 | NMI_NMI, | 66 | NMI_NMI, |
| 66 | CPU_RST, | 67 | CPU_RST, |
| 67 | NR_ARCH_IRQS | 68 | NR_ARCH_IRQS |
diff --git a/arch/s390/include/asm/isc.h b/arch/s390/include/asm/isc.h index 6cb9e2ed05b6..b2cc1ec78d06 100644 --- a/arch/s390/include/asm/isc.h +++ b/arch/s390/include/asm/isc.h | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | /* Adapter interrupts. */ | 21 | /* Adapter interrupts. */ |
| 22 | #define QDIO_AIRQ_ISC IO_SCH_ISC /* I/O subchannel in qdio mode */ | 22 | #define QDIO_AIRQ_ISC IO_SCH_ISC /* I/O subchannel in qdio mode */ |
| 23 | #define PCI_ISC 2 /* PCI I/O subchannels */ | 23 | #define PCI_ISC 2 /* PCI I/O subchannels */ |
| 24 | #define GAL_ISC 5 /* GIB alert */ | ||
| 24 | #define AP_ISC 6 /* adjunct processor (crypto) devices */ | 25 | #define AP_ISC 6 /* adjunct processor (crypto) devices */ |
| 25 | 26 | ||
| 26 | /* Functions for registration of I/O interruption subclasses */ | 27 | /* Functions for registration of I/O interruption subclasses */ |
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index d5d24889c3bc..c47e22bba87f 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h | |||
| @@ -591,7 +591,6 @@ struct kvm_s390_float_interrupt { | |||
| 591 | struct kvm_s390_mchk_info mchk; | 591 | struct kvm_s390_mchk_info mchk; |
| 592 | struct kvm_s390_ext_info srv_signal; | 592 | struct kvm_s390_ext_info srv_signal; |
| 593 | int next_rr_cpu; | 593 | int next_rr_cpu; |
| 594 | unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; | ||
| 595 | struct mutex ais_lock; | 594 | struct mutex ais_lock; |
| 596 | u8 simm; | 595 | u8 simm; |
| 597 | u8 nimm; | 596 | u8 nimm; |
| @@ -712,6 +711,7 @@ struct s390_io_adapter { | |||
| 712 | struct kvm_s390_cpu_model { | 711 | struct kvm_s390_cpu_model { |
| 713 | /* facility mask supported by kvm & hosting machine */ | 712 | /* facility mask supported by kvm & hosting machine */ |
| 714 | __u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64]; | 713 | __u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64]; |
| 714 | struct kvm_s390_vm_cpu_subfunc subfuncs; | ||
| 715 | /* facility list requested by guest (in dma page) */ | 715 | /* facility list requested by guest (in dma page) */ |
| 716 | __u64 *fac_list; | 716 | __u64 *fac_list; |
| 717 | u64 cpuid; | 717 | u64 cpuid; |
| @@ -782,9 +782,21 @@ struct kvm_s390_gisa { | |||
| 782 | u8 reserved03[11]; | 782 | u8 reserved03[11]; |
| 783 | u32 airq_count; | 783 | u32 airq_count; |
| 784 | } g1; | 784 | } g1; |
| 785 | struct { | ||
| 786 | u64 word[4]; | ||
| 787 | } u64; | ||
| 785 | }; | 788 | }; |
| 786 | }; | 789 | }; |
| 787 | 790 | ||
| 791 | struct kvm_s390_gib { | ||
| 792 | u32 alert_list_origin; | ||
| 793 | u32 reserved01; | ||
| 794 | u8:5; | ||
| 795 | u8 nisc:3; | ||
| 796 | u8 reserved03[3]; | ||
| 797 | u32 reserved04[5]; | ||
| 798 | }; | ||
| 799 | |||
| 788 | /* | 800 | /* |
| 789 | * sie_page2 has to be allocated as DMA because fac_list, crycb and | 801 | * sie_page2 has to be allocated as DMA because fac_list, crycb and |
| 790 | * gisa need 31bit addresses in the sie control block. | 802 | * gisa need 31bit addresses in the sie control block. |
| @@ -793,7 +805,8 @@ struct sie_page2 { | |||
| 793 | __u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */ | 805 | __u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */ |
| 794 | struct kvm_s390_crypto_cb crycb; /* 0x0800 */ | 806 | struct kvm_s390_crypto_cb crycb; /* 0x0800 */ |
| 795 | struct kvm_s390_gisa gisa; /* 0x0900 */ | 807 | struct kvm_s390_gisa gisa; /* 0x0900 */ |
| 796 | u8 reserved920[0x1000 - 0x920]; /* 0x0920 */ | 808 | struct kvm *kvm; /* 0x0920 */ |
| 809 | u8 reserved928[0x1000 - 0x928]; /* 0x0928 */ | ||
| 797 | }; | 810 | }; |
| 798 | 811 | ||
| 799 | struct kvm_s390_vsie { | 812 | struct kvm_s390_vsie { |
| @@ -804,6 +817,20 @@ struct kvm_s390_vsie { | |||
| 804 | struct page *pages[KVM_MAX_VCPUS]; | 817 | struct page *pages[KVM_MAX_VCPUS]; |
| 805 | }; | 818 | }; |
| 806 | 819 | ||
| 820 | struct kvm_s390_gisa_iam { | ||
| 821 | u8 mask; | ||
| 822 | spinlock_t ref_lock; | ||
| 823 | u32 ref_count[MAX_ISC + 1]; | ||
| 824 | }; | ||
| 825 | |||
| 826 | struct kvm_s390_gisa_interrupt { | ||
| 827 | struct kvm_s390_gisa *origin; | ||
| 828 | struct kvm_s390_gisa_iam alert; | ||
| 829 | struct hrtimer timer; | ||
| 830 | u64 expires; | ||
| 831 | DECLARE_BITMAP(kicked_mask, KVM_MAX_VCPUS); | ||
| 832 | }; | ||
| 833 | |||
| 807 | struct kvm_arch{ | 834 | struct kvm_arch{ |
| 808 | void *sca; | 835 | void *sca; |
| 809 | int use_esca; | 836 | int use_esca; |
| @@ -837,7 +864,8 @@ struct kvm_arch{ | |||
| 837 | atomic64_t cmma_dirty_pages; | 864 | atomic64_t cmma_dirty_pages; |
| 838 | /* subset of available cpu features enabled by user space */ | 865 | /* subset of available cpu features enabled by user space */ |
| 839 | DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); | 866 | DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); |
| 840 | struct kvm_s390_gisa *gisa; | 867 | DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS); |
| 868 | struct kvm_s390_gisa_interrupt gisa_int; | ||
| 841 | }; | 869 | }; |
| 842 | 870 | ||
| 843 | #define KVM_HVA_ERR_BAD (-1UL) | 871 | #define KVM_HVA_ERR_BAD (-1UL) |
| @@ -871,6 +899,9 @@ void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, | |||
| 871 | extern int sie64a(struct kvm_s390_sie_block *, u64 *); | 899 | extern int sie64a(struct kvm_s390_sie_block *, u64 *); |
| 872 | extern char sie_exit; | 900 | extern char sie_exit; |
| 873 | 901 | ||
| 902 | extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc); | ||
| 903 | extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc); | ||
| 904 | |||
| 874 | static inline void kvm_arch_hardware_disable(void) {} | 905 | static inline void kvm_arch_hardware_disable(void) {} |
| 875 | static inline void kvm_arch_check_processor_compat(void *rtn) {} | 906 | static inline void kvm_arch_check_processor_compat(void *rtn) {} |
| 876 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} | 907 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} |
| @@ -878,7 +909,7 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} | |||
| 878 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} | 909 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} |
| 879 | static inline void kvm_arch_free_memslot(struct kvm *kvm, | 910 | static inline void kvm_arch_free_memslot(struct kvm *kvm, |
| 880 | struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} | 911 | struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} |
| 881 | static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} | 912 | static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} |
| 882 | static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} | 913 | static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} |
| 883 | static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | 914 | static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, |
| 884 | struct kvm_memory_slot *slot) {} | 915 | struct kvm_memory_slot *slot) {} |
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 0e8d68bac82c..0cd5a5f96729 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c | |||
| @@ -88,6 +88,7 @@ static const struct irq_class irqclass_sub_desc[] = { | |||
| 88 | {.irq = IRQIO_MSI, .name = "MSI", .desc = "[I/O] MSI Interrupt" }, | 88 | {.irq = IRQIO_MSI, .name = "MSI", .desc = "[I/O] MSI Interrupt" }, |
| 89 | {.irq = IRQIO_VIR, .name = "VIR", .desc = "[I/O] Virtual I/O Devices"}, | 89 | {.irq = IRQIO_VIR, .name = "VIR", .desc = "[I/O] Virtual I/O Devices"}, |
| 90 | {.irq = IRQIO_VAI, .name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"}, | 90 | {.irq = IRQIO_VAI, .name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"}, |
| 91 | {.irq = IRQIO_GAL, .name = "GAL", .desc = "[I/O] GIB Alert"}, | ||
| 91 | {.irq = NMI_NMI, .name = "NMI", .desc = "[NMI] Machine Check"}, | 92 | {.irq = NMI_NMI, .name = "NMI", .desc = "[NMI] Machine Check"}, |
| 92 | {.irq = CPU_RST, .name = "RST", .desc = "[CPU] CPU Restart"}, | 93 | {.irq = CPU_RST, .name = "RST", .desc = "[CPU] CPU Restart"}, |
| 93 | }; | 94 | }; |
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index fcb55b02990e..82162867f378 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c | |||
| @@ -7,6 +7,9 @@ | |||
| 7 | * Author(s): Carsten Otte <cotte@de.ibm.com> | 7 | * Author(s): Carsten Otte <cotte@de.ibm.com> |
| 8 | */ | 8 | */ |
| 9 | 9 | ||
| 10 | #define KMSG_COMPONENT "kvm-s390" | ||
| 11 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | ||
| 12 | |||
| 10 | #include <linux/interrupt.h> | 13 | #include <linux/interrupt.h> |
| 11 | #include <linux/kvm_host.h> | 14 | #include <linux/kvm_host.h> |
| 12 | #include <linux/hrtimer.h> | 15 | #include <linux/hrtimer.h> |
| @@ -23,6 +26,7 @@ | |||
| 23 | #include <asm/gmap.h> | 26 | #include <asm/gmap.h> |
| 24 | #include <asm/switch_to.h> | 27 | #include <asm/switch_to.h> |
| 25 | #include <asm/nmi.h> | 28 | #include <asm/nmi.h> |
| 29 | #include <asm/airq.h> | ||
| 26 | #include "kvm-s390.h" | 30 | #include "kvm-s390.h" |
| 27 | #include "gaccess.h" | 31 | #include "gaccess.h" |
| 28 | #include "trace-s390.h" | 32 | #include "trace-s390.h" |
| @@ -31,6 +35,8 @@ | |||
| 31 | #define PFAULT_DONE 0x0680 | 35 | #define PFAULT_DONE 0x0680 |
| 32 | #define VIRTIO_PARAM 0x0d00 | 36 | #define VIRTIO_PARAM 0x0d00 |
| 33 | 37 | ||
| 38 | static struct kvm_s390_gib *gib; | ||
| 39 | |||
| 34 | /* handle external calls via sigp interpretation facility */ | 40 | /* handle external calls via sigp interpretation facility */ |
| 35 | static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id) | 41 | static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id) |
| 36 | { | 42 | { |
| @@ -217,22 +223,100 @@ static inline u8 int_word_to_isc(u32 int_word) | |||
| 217 | */ | 223 | */ |
| 218 | #define IPM_BIT_OFFSET (offsetof(struct kvm_s390_gisa, ipm) * BITS_PER_BYTE) | 224 | #define IPM_BIT_OFFSET (offsetof(struct kvm_s390_gisa, ipm) * BITS_PER_BYTE) |
| 219 | 225 | ||
| 220 | static inline void kvm_s390_gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) | 226 | /** |
| 227 | * gisa_set_iam - change the GISA interruption alert mask | ||
| 228 | * | ||
| 229 | * @gisa: gisa to operate on | ||
| 230 | * @iam: new IAM value to use | ||
| 231 | * | ||
| 232 | * Change the IAM atomically with the next alert address and the IPM | ||
| 233 | * of the GISA if the GISA is not part of the GIB alert list. All three | ||
| 234 | * fields are located in the first long word of the GISA. | ||
| 235 | * | ||
| 236 | * Returns: 0 on success | ||
| 237 | * -EBUSY in case the gisa is part of the alert list | ||
| 238 | */ | ||
| 239 | static inline int gisa_set_iam(struct kvm_s390_gisa *gisa, u8 iam) | ||
| 240 | { | ||
| 241 | u64 word, _word; | ||
| 242 | |||
| 243 | do { | ||
| 244 | word = READ_ONCE(gisa->u64.word[0]); | ||
| 245 | if ((u64)gisa != word >> 32) | ||
| 246 | return -EBUSY; | ||
| 247 | _word = (word & ~0xffUL) | iam; | ||
| 248 | } while (cmpxchg(&gisa->u64.word[0], word, _word) != word); | ||
| 249 | |||
| 250 | return 0; | ||
| 251 | } | ||
| 252 | |||
| 253 | /** | ||
| 254 | * gisa_clear_ipm - clear the GISA interruption pending mask | ||
| 255 | * | ||
| 256 | * @gisa: gisa to operate on | ||
| 257 | * | ||
| 258 | * Clear the IPM atomically with the next alert address and the IAM | ||
| 259 | * of the GISA unconditionally. All three fields are located in the | ||
| 260 | * first long word of the GISA. | ||
| 261 | */ | ||
| 262 | static inline void gisa_clear_ipm(struct kvm_s390_gisa *gisa) | ||
| 263 | { | ||
| 264 | u64 word, _word; | ||
| 265 | |||
| 266 | do { | ||
| 267 | word = READ_ONCE(gisa->u64.word[0]); | ||
| 268 | _word = word & ~(0xffUL << 24); | ||
| 269 | } while (cmpxchg(&gisa->u64.word[0], word, _word) != word); | ||
| 270 | } | ||
| 271 | |||
| 272 | /** | ||
| 273 | * gisa_get_ipm_or_restore_iam - return IPM or restore GISA IAM | ||
| 274 | * | ||
| 275 | * @gi: gisa interrupt struct to work on | ||
| 276 | * | ||
| 277 | * Atomically restores the interruption alert mask if none of the | ||
| 278 | * relevant ISCs are pending and return the IPM. | ||
| 279 | * | ||
| 280 | * Returns: the relevant pending ISCs | ||
| 281 | */ | ||
| 282 | static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi) | ||
| 283 | { | ||
| 284 | u8 pending_mask, alert_mask; | ||
| 285 | u64 word, _word; | ||
| 286 | |||
| 287 | do { | ||
| 288 | word = READ_ONCE(gi->origin->u64.word[0]); | ||
| 289 | alert_mask = READ_ONCE(gi->alert.mask); | ||
| 290 | pending_mask = (u8)(word >> 24) & alert_mask; | ||
| 291 | if (pending_mask) | ||
| 292 | return pending_mask; | ||
| 293 | _word = (word & ~0xffUL) | alert_mask; | ||
| 294 | } while (cmpxchg(&gi->origin->u64.word[0], word, _word) != word); | ||
| 295 | |||
| 296 | return 0; | ||
| 297 | } | ||
| 298 | |||
| 299 | static inline int gisa_in_alert_list(struct kvm_s390_gisa *gisa) | ||
| 300 | { | ||
| 301 | return READ_ONCE(gisa->next_alert) != (u32)(u64)gisa; | ||
| 302 | } | ||
| 303 | |||
| 304 | static inline void gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) | ||
| 221 | { | 305 | { |
| 222 | set_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); | 306 | set_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); |
| 223 | } | 307 | } |
| 224 | 308 | ||
| 225 | static inline u8 kvm_s390_gisa_get_ipm(struct kvm_s390_gisa *gisa) | 309 | static inline u8 gisa_get_ipm(struct kvm_s390_gisa *gisa) |
| 226 | { | 310 | { |
| 227 | return READ_ONCE(gisa->ipm); | 311 | return READ_ONCE(gisa->ipm); |
| 228 | } | 312 | } |
| 229 | 313 | ||
| 230 | static inline void kvm_s390_gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) | 314 | static inline void gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) |
| 231 | { | 315 | { |
| 232 | clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); | 316 | clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); |
| 233 | } | 317 | } |
| 234 | 318 | ||
| 235 | static inline int kvm_s390_gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) | 319 | static inline int gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) |
| 236 | { | 320 | { |
| 237 | return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); | 321 | return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); |
| 238 | } | 322 | } |
| @@ -245,8 +329,13 @@ static inline unsigned long pending_irqs_no_gisa(struct kvm_vcpu *vcpu) | |||
| 245 | 329 | ||
| 246 | static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) | 330 | static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) |
| 247 | { | 331 | { |
| 248 | return pending_irqs_no_gisa(vcpu) | | 332 | struct kvm_s390_gisa_interrupt *gi = &vcpu->kvm->arch.gisa_int; |
| 249 | kvm_s390_gisa_get_ipm(vcpu->kvm->arch.gisa) << IRQ_PEND_IO_ISC_7; | 333 | unsigned long pending_mask; |
| 334 | |||
| 335 | pending_mask = pending_irqs_no_gisa(vcpu); | ||
| 336 | if (gi->origin) | ||
| 337 | pending_mask |= gisa_get_ipm(gi->origin) << IRQ_PEND_IO_ISC_7; | ||
| 338 | return pending_mask; | ||
| 250 | } | 339 | } |
| 251 | 340 | ||
| 252 | static inline int isc_to_irq_type(unsigned long isc) | 341 | static inline int isc_to_irq_type(unsigned long isc) |
| @@ -318,13 +407,13 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu) | |||
| 318 | static void __set_cpu_idle(struct kvm_vcpu *vcpu) | 407 | static void __set_cpu_idle(struct kvm_vcpu *vcpu) |
| 319 | { | 408 | { |
| 320 | kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT); | 409 | kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT); |
| 321 | set_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask); | 410 | set_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask); |
| 322 | } | 411 | } |
| 323 | 412 | ||
| 324 | static void __unset_cpu_idle(struct kvm_vcpu *vcpu) | 413 | static void __unset_cpu_idle(struct kvm_vcpu *vcpu) |
| 325 | { | 414 | { |
| 326 | kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT); | 415 | kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT); |
| 327 | clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask); | 416 | clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask); |
| 328 | } | 417 | } |
| 329 | 418 | ||
| 330 | static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) | 419 | static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) |
| @@ -345,7 +434,7 @@ static void set_intercept_indicators_io(struct kvm_vcpu *vcpu) | |||
| 345 | { | 434 | { |
| 346 | if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_IO_MASK)) | 435 | if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_IO_MASK)) |
| 347 | return; | 436 | return; |
| 348 | else if (psw_ioint_disabled(vcpu)) | 437 | if (psw_ioint_disabled(vcpu)) |
| 349 | kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT); | 438 | kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT); |
| 350 | else | 439 | else |
| 351 | vcpu->arch.sie_block->lctl |= LCTL_CR6; | 440 | vcpu->arch.sie_block->lctl |= LCTL_CR6; |
| @@ -353,7 +442,7 @@ static void set_intercept_indicators_io(struct kvm_vcpu *vcpu) | |||
| 353 | 442 | ||
| 354 | static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu) | 443 | static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu) |
| 355 | { | 444 | { |
| 356 | if (!(pending_irqs(vcpu) & IRQ_PEND_EXT_MASK)) | 445 | if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_EXT_MASK)) |
| 357 | return; | 446 | return; |
| 358 | if (psw_extint_disabled(vcpu)) | 447 | if (psw_extint_disabled(vcpu)) |
| 359 | kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT); | 448 | kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT); |
| @@ -363,7 +452,7 @@ static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu) | |||
| 363 | 452 | ||
| 364 | static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu) | 453 | static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu) |
| 365 | { | 454 | { |
| 366 | if (!(pending_irqs(vcpu) & IRQ_PEND_MCHK_MASK)) | 455 | if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_MCHK_MASK)) |
| 367 | return; | 456 | return; |
| 368 | if (psw_mchk_disabled(vcpu)) | 457 | if (psw_mchk_disabled(vcpu)) |
| 369 | vcpu->arch.sie_block->ictl |= ICTL_LPSW; | 458 | vcpu->arch.sie_block->ictl |= ICTL_LPSW; |
| @@ -956,6 +1045,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu, | |||
| 956 | { | 1045 | { |
| 957 | struct list_head *isc_list; | 1046 | struct list_head *isc_list; |
| 958 | struct kvm_s390_float_interrupt *fi; | 1047 | struct kvm_s390_float_interrupt *fi; |
| 1048 | struct kvm_s390_gisa_interrupt *gi = &vcpu->kvm->arch.gisa_int; | ||
| 959 | struct kvm_s390_interrupt_info *inti = NULL; | 1049 | struct kvm_s390_interrupt_info *inti = NULL; |
| 960 | struct kvm_s390_io_info io; | 1050 | struct kvm_s390_io_info io; |
| 961 | u32 isc; | 1051 | u32 isc; |
| @@ -998,8 +1088,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu, | |||
| 998 | goto out; | 1088 | goto out; |
| 999 | } | 1089 | } |
| 1000 | 1090 | ||
| 1001 | if (vcpu->kvm->arch.gisa && | 1091 | if (gi->origin && gisa_tac_ipm_gisc(gi->origin, isc)) { |
| 1002 | kvm_s390_gisa_tac_ipm_gisc(vcpu->kvm->arch.gisa, isc)) { | ||
| 1003 | /* | 1092 | /* |
| 1004 | * in case an adapter interrupt was not delivered | 1093 | * in case an adapter interrupt was not delivered |
| 1005 | * in SIE context KVM will handle the delivery | 1094 | * in SIE context KVM will handle the delivery |
| @@ -1089,6 +1178,7 @@ static u64 __calculate_sltime(struct kvm_vcpu *vcpu) | |||
| 1089 | 1178 | ||
| 1090 | int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) | 1179 | int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) |
| 1091 | { | 1180 | { |
| 1181 | struct kvm_s390_gisa_interrupt *gi = &vcpu->kvm->arch.gisa_int; | ||
| 1092 | u64 sltime; | 1182 | u64 sltime; |
| 1093 | 1183 | ||
| 1094 | vcpu->stat.exit_wait_state++; | 1184 | vcpu->stat.exit_wait_state++; |
| @@ -1102,6 +1192,11 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) | |||
| 1102 | return -EOPNOTSUPP; /* disabled wait */ | 1192 | return -EOPNOTSUPP; /* disabled wait */ |
| 1103 | } | 1193 | } |
| 1104 | 1194 | ||
| 1195 | if (gi->origin && | ||
| 1196 | (gisa_get_ipm_or_restore_iam(gi) & | ||
| 1197 | vcpu->arch.sie_block->gcr[6] >> 24)) | ||
| 1198 | return 0; | ||
| 1199 | |||
| 1105 | if (!ckc_interrupts_enabled(vcpu) && | 1200 | if (!ckc_interrupts_enabled(vcpu) && |
| 1106 | !cpu_timer_interrupts_enabled(vcpu)) { | 1201 | !cpu_timer_interrupts_enabled(vcpu)) { |
| 1107 | VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); | 1202 | VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); |
| @@ -1533,18 +1628,19 @@ static struct kvm_s390_interrupt_info *get_top_io_int(struct kvm *kvm, | |||
| 1533 | 1628 | ||
| 1534 | static int get_top_gisa_isc(struct kvm *kvm, u64 isc_mask, u32 schid) | 1629 | static int get_top_gisa_isc(struct kvm *kvm, u64 isc_mask, u32 schid) |
| 1535 | { | 1630 | { |
| 1631 | struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; | ||
| 1536 | unsigned long active_mask; | 1632 | unsigned long active_mask; |
| 1537 | int isc; | 1633 | int isc; |
| 1538 | 1634 | ||
| 1539 | if (schid) | 1635 | if (schid) |
| 1540 | goto out; | 1636 | goto out; |
| 1541 | if (!kvm->arch.gisa) | 1637 | if (!gi->origin) |
| 1542 | goto out; | 1638 | goto out; |
| 1543 | 1639 | ||
| 1544 | active_mask = (isc_mask & kvm_s390_gisa_get_ipm(kvm->arch.gisa) << 24) << 32; | 1640 | active_mask = (isc_mask & gisa_get_ipm(gi->origin) << 24) << 32; |
| 1545 | while (active_mask) { | 1641 | while (active_mask) { |
| 1546 | isc = __fls(active_mask) ^ (BITS_PER_LONG - 1); | 1642 | isc = __fls(active_mask) ^ (BITS_PER_LONG - 1); |
| 1547 | if (kvm_s390_gisa_tac_ipm_gisc(kvm->arch.gisa, isc)) | 1643 | if (gisa_tac_ipm_gisc(gi->origin, isc)) |
| 1548 | return isc; | 1644 | return isc; |
| 1549 | clear_bit_inv(isc, &active_mask); | 1645 | clear_bit_inv(isc, &active_mask); |
| 1550 | } | 1646 | } |
| @@ -1567,6 +1663,7 @@ out: | |||
| 1567 | struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, | 1663 | struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, |
| 1568 | u64 isc_mask, u32 schid) | 1664 | u64 isc_mask, u32 schid) |
| 1569 | { | 1665 | { |
| 1666 | struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; | ||
| 1570 | struct kvm_s390_interrupt_info *inti, *tmp_inti; | 1667 | struct kvm_s390_interrupt_info *inti, *tmp_inti; |
| 1571 | int isc; | 1668 | int isc; |
| 1572 | 1669 | ||
| @@ -1584,7 +1681,7 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, | |||
| 1584 | /* both types of interrupts present */ | 1681 | /* both types of interrupts present */ |
| 1585 | if (int_word_to_isc(inti->io.io_int_word) <= isc) { | 1682 | if (int_word_to_isc(inti->io.io_int_word) <= isc) { |
| 1586 | /* classical IO int with higher priority */ | 1683 | /* classical IO int with higher priority */ |
| 1587 | kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc); | 1684 | gisa_set_ipm_gisc(gi->origin, isc); |
| 1588 | goto out; | 1685 | goto out; |
| 1589 | } | 1686 | } |
| 1590 | gisa_out: | 1687 | gisa_out: |
| @@ -1596,7 +1693,7 @@ gisa_out: | |||
| 1596 | kvm_s390_reinject_io_int(kvm, inti); | 1693 | kvm_s390_reinject_io_int(kvm, inti); |
| 1597 | inti = tmp_inti; | 1694 | inti = tmp_inti; |
| 1598 | } else | 1695 | } else |
| 1599 | kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc); | 1696 | gisa_set_ipm_gisc(gi->origin, isc); |
| 1600 | out: | 1697 | out: |
| 1601 | return inti; | 1698 | return inti; |
| 1602 | } | 1699 | } |
| @@ -1685,6 +1782,7 @@ static int __inject_float_mchk(struct kvm *kvm, | |||
| 1685 | 1782 | ||
| 1686 | static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) | 1783 | static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) |
| 1687 | { | 1784 | { |
| 1785 | struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; | ||
| 1688 | struct kvm_s390_float_interrupt *fi; | 1786 | struct kvm_s390_float_interrupt *fi; |
| 1689 | struct list_head *list; | 1787 | struct list_head *list; |
| 1690 | int isc; | 1788 | int isc; |
| @@ -1692,9 +1790,9 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) | |||
| 1692 | kvm->stat.inject_io++; | 1790 | kvm->stat.inject_io++; |
| 1693 | isc = int_word_to_isc(inti->io.io_int_word); | 1791 | isc = int_word_to_isc(inti->io.io_int_word); |
| 1694 | 1792 | ||
| 1695 | if (kvm->arch.gisa && inti->type & KVM_S390_INT_IO_AI_MASK) { | 1793 | if (gi->origin && inti->type & KVM_S390_INT_IO_AI_MASK) { |
| 1696 | VM_EVENT(kvm, 4, "%s isc %1u", "inject: I/O (AI/gisa)", isc); | 1794 | VM_EVENT(kvm, 4, "%s isc %1u", "inject: I/O (AI/gisa)", isc); |
| 1697 | kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc); | 1795 | gisa_set_ipm_gisc(gi->origin, isc); |
| 1698 | kfree(inti); | 1796 | kfree(inti); |
| 1699 | return 0; | 1797 | return 0; |
| 1700 | } | 1798 | } |
| @@ -1726,7 +1824,6 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) | |||
| 1726 | */ | 1824 | */ |
| 1727 | static void __floating_irq_kick(struct kvm *kvm, u64 type) | 1825 | static void __floating_irq_kick(struct kvm *kvm, u64 type) |
| 1728 | { | 1826 | { |
| 1729 | struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; | ||
| 1730 | struct kvm_vcpu *dst_vcpu; | 1827 | struct kvm_vcpu *dst_vcpu; |
| 1731 | int sigcpu, online_vcpus, nr_tries = 0; | 1828 | int sigcpu, online_vcpus, nr_tries = 0; |
| 1732 | 1829 | ||
| @@ -1735,11 +1832,11 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type) | |||
| 1735 | return; | 1832 | return; |
| 1736 | 1833 | ||
| 1737 | /* find idle VCPUs first, then round robin */ | 1834 | /* find idle VCPUs first, then round robin */ |
| 1738 | sigcpu = find_first_bit(fi->idle_mask, online_vcpus); | 1835 | sigcpu = find_first_bit(kvm->arch.idle_mask, online_vcpus); |
| 1739 | if (sigcpu == online_vcpus) { | 1836 | if (sigcpu == online_vcpus) { |
| 1740 | do { | 1837 | do { |
| 1741 | sigcpu = fi->next_rr_cpu; | 1838 | sigcpu = kvm->arch.float_int.next_rr_cpu++; |
| 1742 | fi->next_rr_cpu = (fi->next_rr_cpu + 1) % online_vcpus; | 1839 | kvm->arch.float_int.next_rr_cpu %= online_vcpus; |
| 1743 | /* avoid endless loops if all vcpus are stopped */ | 1840 | /* avoid endless loops if all vcpus are stopped */ |
| 1744 | if (nr_tries++ >= online_vcpus) | 1841 | if (nr_tries++ >= online_vcpus) |
| 1745 | return; | 1842 | return; |
| @@ -1753,7 +1850,8 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type) | |||
| 1753 | kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT); | 1850 | kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT); |
| 1754 | break; | 1851 | break; |
| 1755 | case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: | 1852 | case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: |
| 1756 | if (!(type & KVM_S390_INT_IO_AI_MASK && kvm->arch.gisa)) | 1853 | if (!(type & KVM_S390_INT_IO_AI_MASK && |
| 1854 | kvm->arch.gisa_int.origin)) | ||
| 1757 | kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT); | 1855 | kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT); |
| 1758 | break; | 1856 | break; |
| 1759 | default: | 1857 | default: |
| @@ -2003,6 +2101,7 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm) | |||
| 2003 | 2101 | ||
| 2004 | static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len) | 2102 | static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len) |
| 2005 | { | 2103 | { |
| 2104 | struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; | ||
| 2006 | struct kvm_s390_interrupt_info *inti; | 2105 | struct kvm_s390_interrupt_info *inti; |
| 2007 | struct kvm_s390_float_interrupt *fi; | 2106 | struct kvm_s390_float_interrupt *fi; |
| 2008 | struct kvm_s390_irq *buf; | 2107 | struct kvm_s390_irq *buf; |
| @@ -2026,15 +2125,14 @@ static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len) | |||
| 2026 | 2125 | ||
| 2027 | max_irqs = len / sizeof(struct kvm_s390_irq); | 2126 | max_irqs = len / sizeof(struct kvm_s390_irq); |
| 2028 | 2127 | ||
| 2029 | if (kvm->arch.gisa && | 2128 | if (gi->origin && gisa_get_ipm(gi->origin)) { |
| 2030 | kvm_s390_gisa_get_ipm(kvm->arch.gisa)) { | ||
| 2031 | for (i = 0; i <= MAX_ISC; i++) { | 2129 | for (i = 0; i <= MAX_ISC; i++) { |
| 2032 | if (n == max_irqs) { | 2130 | if (n == max_irqs) { |
| 2033 | /* signal userspace to try again */ | 2131 | /* signal userspace to try again */ |
| 2034 | ret = -ENOMEM; | 2132 | ret = -ENOMEM; |
| 2035 | goto out_nolock; | 2133 | goto out_nolock; |
| 2036 | } | 2134 | } |
| 2037 | if (kvm_s390_gisa_tac_ipm_gisc(kvm->arch.gisa, i)) { | 2135 | if (gisa_tac_ipm_gisc(gi->origin, i)) { |
| 2038 | irq = (struct kvm_s390_irq *) &buf[n]; | 2136 | irq = (struct kvm_s390_irq *) &buf[n]; |
| 2039 | irq->type = KVM_S390_INT_IO(1, 0, 0, 0); | 2137 | irq->type = KVM_S390_INT_IO(1, 0, 0, 0); |
| 2040 | irq->u.io.io_int_word = isc_to_int_word(i); | 2138 | irq->u.io.io_int_word = isc_to_int_word(i); |
| @@ -2831,7 +2929,7 @@ static void store_local_irq(struct kvm_s390_local_interrupt *li, | |||
| 2831 | int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) | 2929 | int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) |
| 2832 | { | 2930 | { |
| 2833 | int scn; | 2931 | int scn; |
| 2834 | unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)]; | 2932 | DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS); |
| 2835 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 2933 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
| 2836 | unsigned long pending_irqs; | 2934 | unsigned long pending_irqs; |
| 2837 | struct kvm_s390_irq irq; | 2935 | struct kvm_s390_irq irq; |
| @@ -2884,27 +2982,278 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) | |||
| 2884 | return n; | 2982 | return n; |
| 2885 | } | 2983 | } |
| 2886 | 2984 | ||
| 2887 | void kvm_s390_gisa_clear(struct kvm *kvm) | 2985 | static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask) |
| 2888 | { | 2986 | { |
| 2889 | if (kvm->arch.gisa) { | 2987 | int vcpu_id, online_vcpus = atomic_read(&kvm->online_vcpus); |
| 2890 | memset(kvm->arch.gisa, 0, sizeof(struct kvm_s390_gisa)); | 2988 | struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; |
| 2891 | kvm->arch.gisa->next_alert = (u32)(u64)kvm->arch.gisa; | 2989 | struct kvm_vcpu *vcpu; |
| 2892 | VM_EVENT(kvm, 3, "gisa 0x%pK cleared", kvm->arch.gisa); | 2990 | |
| 2991 | for_each_set_bit(vcpu_id, kvm->arch.idle_mask, online_vcpus) { | ||
| 2992 | vcpu = kvm_get_vcpu(kvm, vcpu_id); | ||
| 2993 | if (psw_ioint_disabled(vcpu)) | ||
| 2994 | continue; | ||
| 2995 | deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24); | ||
| 2996 | if (deliverable_mask) { | ||
| 2997 | /* lately kicked but not yet running */ | ||
| 2998 | if (test_and_set_bit(vcpu_id, gi->kicked_mask)) | ||
| 2999 | return; | ||
| 3000 | kvm_s390_vcpu_wakeup(vcpu); | ||
| 3001 | return; | ||
| 3002 | } | ||
| 2893 | } | 3003 | } |
| 2894 | } | 3004 | } |
| 2895 | 3005 | ||
| 3006 | static enum hrtimer_restart gisa_vcpu_kicker(struct hrtimer *timer) | ||
| 3007 | { | ||
| 3008 | struct kvm_s390_gisa_interrupt *gi = | ||
| 3009 | container_of(timer, struct kvm_s390_gisa_interrupt, timer); | ||
| 3010 | struct kvm *kvm = | ||
| 3011 | container_of(gi->origin, struct sie_page2, gisa)->kvm; | ||
| 3012 | u8 pending_mask; | ||
| 3013 | |||
| 3014 | pending_mask = gisa_get_ipm_or_restore_iam(gi); | ||
| 3015 | if (pending_mask) { | ||
| 3016 | __airqs_kick_single_vcpu(kvm, pending_mask); | ||
| 3017 | hrtimer_forward_now(timer, ns_to_ktime(gi->expires)); | ||
| 3018 | return HRTIMER_RESTART; | ||
| 3019 | }; | ||
| 3020 | |||
| 3021 | return HRTIMER_NORESTART; | ||
| 3022 | } | ||
| 3023 | |||
| 3024 | #define NULL_GISA_ADDR 0x00000000UL | ||
| 3025 | #define NONE_GISA_ADDR 0x00000001UL | ||
| 3026 | #define GISA_ADDR_MASK 0xfffff000UL | ||
| 3027 | |||
| 3028 | static void process_gib_alert_list(void) | ||
| 3029 | { | ||
| 3030 | struct kvm_s390_gisa_interrupt *gi; | ||
| 3031 | struct kvm_s390_gisa *gisa; | ||
| 3032 | struct kvm *kvm; | ||
| 3033 | u32 final, origin = 0UL; | ||
| 3034 | |||
| 3035 | do { | ||
| 3036 | /* | ||
| 3037 | * If the NONE_GISA_ADDR is still stored in the alert list | ||
| 3038 | * origin, we will leave the outer loop. No further GISA has | ||
| 3039 | * been added to the alert list by millicode while processing | ||
| 3040 | * the current alert list. | ||
| 3041 | */ | ||
| 3042 | final = (origin & NONE_GISA_ADDR); | ||
| 3043 | /* | ||
| 3044 | * Cut off the alert list and store the NONE_GISA_ADDR in the | ||
| 3045 | * alert list origin to avoid further GAL interruptions. | ||
| 3046 | * A new alert list can be build up by millicode in parallel | ||
| 3047 | * for guests not in the yet cut-off alert list. When in the | ||
| 3048 | * final loop, store the NULL_GISA_ADDR instead. This will re- | ||
| 3049 | * enable GAL interruptions on the host again. | ||
| 3050 | */ | ||
| 3051 | origin = xchg(&gib->alert_list_origin, | ||
| 3052 | (!final) ? NONE_GISA_ADDR : NULL_GISA_ADDR); | ||
| 3053 | /* | ||
| 3054 | * Loop through the just cut-off alert list and start the | ||
| 3055 | * gisa timers to kick idle vcpus to consume the pending | ||
| 3056 | * interruptions asap. | ||
| 3057 | */ | ||
| 3058 | while (origin & GISA_ADDR_MASK) { | ||
| 3059 | gisa = (struct kvm_s390_gisa *)(u64)origin; | ||
| 3060 | origin = gisa->next_alert; | ||
| 3061 | gisa->next_alert = (u32)(u64)gisa; | ||
| 3062 | kvm = container_of(gisa, struct sie_page2, gisa)->kvm; | ||
| 3063 | gi = &kvm->arch.gisa_int; | ||
| 3064 | if (hrtimer_active(&gi->timer)) | ||
| 3065 | hrtimer_cancel(&gi->timer); | ||
| 3066 | hrtimer_start(&gi->timer, 0, HRTIMER_MODE_REL); | ||
| 3067 | } | ||
| 3068 | } while (!final); | ||
| 3069 | |||
| 3070 | } | ||
| 3071 | |||
| 3072 | void kvm_s390_gisa_clear(struct kvm *kvm) | ||
| 3073 | { | ||
| 3074 | struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; | ||
| 3075 | |||
| 3076 | if (!gi->origin) | ||
| 3077 | return; | ||
| 3078 | gisa_clear_ipm(gi->origin); | ||
| 3079 | VM_EVENT(kvm, 3, "gisa 0x%pK cleared", gi->origin); | ||
| 3080 | } | ||
| 3081 | |||
| 2896 | void kvm_s390_gisa_init(struct kvm *kvm) | 3082 | void kvm_s390_gisa_init(struct kvm *kvm) |
| 2897 | { | 3083 | { |
| 2898 | if (css_general_characteristics.aiv) { | 3084 | struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; |
| 2899 | kvm->arch.gisa = &kvm->arch.sie_page2->gisa; | 3085 | |
| 2900 | VM_EVENT(kvm, 3, "gisa 0x%pK initialized", kvm->arch.gisa); | 3086 | if (!css_general_characteristics.aiv) |
| 2901 | kvm_s390_gisa_clear(kvm); | 3087 | return; |
| 2902 | } | 3088 | gi->origin = &kvm->arch.sie_page2->gisa; |
| 3089 | gi->alert.mask = 0; | ||
| 3090 | spin_lock_init(&gi->alert.ref_lock); | ||
| 3091 | gi->expires = 50 * 1000; /* 50 usec */ | ||
| 3092 | hrtimer_init(&gi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
| 3093 | gi->timer.function = gisa_vcpu_kicker; | ||
| 3094 | memset(gi->origin, 0, sizeof(struct kvm_s390_gisa)); | ||
| 3095 | gi->origin->next_alert = (u32)(u64)gi->origin; | ||
| 3096 | VM_EVENT(kvm, 3, "gisa 0x%pK initialized", gi->origin); | ||
| 2903 | } | 3097 | } |
| 2904 | 3098 | ||
| 2905 | void kvm_s390_gisa_destroy(struct kvm *kvm) | 3099 | void kvm_s390_gisa_destroy(struct kvm *kvm) |
| 2906 | { | 3100 | { |
| 2907 | if (!kvm->arch.gisa) | 3101 | struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; |
| 3102 | |||
| 3103 | if (!gi->origin) | ||
| 3104 | return; | ||
| 3105 | if (gi->alert.mask) | ||
| 3106 | KVM_EVENT(3, "vm 0x%pK has unexpected iam 0x%02x", | ||
| 3107 | kvm, gi->alert.mask); | ||
| 3108 | while (gisa_in_alert_list(gi->origin)) | ||
| 3109 | cpu_relax(); | ||
| 3110 | hrtimer_cancel(&gi->timer); | ||
| 3111 | gi->origin = NULL; | ||
| 3112 | } | ||
| 3113 | |||
| 3114 | /** | ||
| 3115 | * kvm_s390_gisc_register - register a guest ISC | ||
| 3116 | * | ||
| 3117 | * @kvm: the kernel vm to work with | ||
| 3118 | * @gisc: the guest interruption sub class to register | ||
| 3119 | * | ||
| 3120 | * The function extends the vm specific alert mask to use. | ||
| 3121 | * The effective IAM mask in the GISA is updated as well | ||
| 3122 | * in case the GISA is not part of the GIB alert list. | ||
| 3123 | * It will be updated latest when the IAM gets restored | ||
| 3124 | * by gisa_get_ipm_or_restore_iam(). | ||
| 3125 | * | ||
| 3126 | * Returns: the nonspecific ISC (NISC) the gib alert mechanism | ||
| 3127 | * has registered with the channel subsystem. | ||
| 3128 | * -ENODEV in case the vm uses no GISA | ||
| 3129 | * -ERANGE in case the guest ISC is invalid | ||
| 3130 | */ | ||
| 3131 | int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc) | ||
| 3132 | { | ||
| 3133 | struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; | ||
| 3134 | |||
| 3135 | if (!gi->origin) | ||
| 3136 | return -ENODEV; | ||
| 3137 | if (gisc > MAX_ISC) | ||
| 3138 | return -ERANGE; | ||
| 3139 | |||
| 3140 | spin_lock(&gi->alert.ref_lock); | ||
| 3141 | gi->alert.ref_count[gisc]++; | ||
| 3142 | if (gi->alert.ref_count[gisc] == 1) { | ||
| 3143 | gi->alert.mask |= 0x80 >> gisc; | ||
| 3144 | gisa_set_iam(gi->origin, gi->alert.mask); | ||
| 3145 | } | ||
| 3146 | spin_unlock(&gi->alert.ref_lock); | ||
| 3147 | |||
| 3148 | return gib->nisc; | ||
| 3149 | } | ||
| 3150 | EXPORT_SYMBOL_GPL(kvm_s390_gisc_register); | ||
| 3151 | |||
| 3152 | /** | ||
| 3153 | * kvm_s390_gisc_unregister - unregister a guest ISC | ||
| 3154 | * | ||
| 3155 | * @kvm: the kernel vm to work with | ||
| 3156 | * @gisc: the guest interruption sub class to register | ||
| 3157 | * | ||
| 3158 | * The function reduces the vm specific alert mask to use. | ||
| 3159 | * The effective IAM mask in the GISA is updated as well | ||
| 3160 | * in case the GISA is not part of the GIB alert list. | ||
| 3161 | * It will be updated latest when the IAM gets restored | ||
| 3162 | * by gisa_get_ipm_or_restore_iam(). | ||
| 3163 | * | ||
| 3164 | * Returns: the nonspecific ISC (NISC) the gib alert mechanism | ||
| 3165 | * has registered with the channel subsystem. | ||
| 3166 | * -ENODEV in case the vm uses no GISA | ||
| 3167 | * -ERANGE in case the guest ISC is invalid | ||
| 3168 | * -EINVAL in case the guest ISC is not registered | ||
| 3169 | */ | ||
| 3170 | int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc) | ||
| 3171 | { | ||
| 3172 | struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; | ||
| 3173 | int rc = 0; | ||
| 3174 | |||
| 3175 | if (!gi->origin) | ||
| 3176 | return -ENODEV; | ||
| 3177 | if (gisc > MAX_ISC) | ||
| 3178 | return -ERANGE; | ||
| 3179 | |||
| 3180 | spin_lock(&gi->alert.ref_lock); | ||
| 3181 | if (gi->alert.ref_count[gisc] == 0) { | ||
| 3182 | rc = -EINVAL; | ||
| 3183 | goto out; | ||
| 3184 | } | ||
| 3185 | gi->alert.ref_count[gisc]--; | ||
| 3186 | if (gi->alert.ref_count[gisc] == 0) { | ||
| 3187 | gi->alert.mask &= ~(0x80 >> gisc); | ||
| 3188 | gisa_set_iam(gi->origin, gi->alert.mask); | ||
| 3189 | } | ||
| 3190 | out: | ||
| 3191 | spin_unlock(&gi->alert.ref_lock); | ||
| 3192 | |||
| 3193 | return rc; | ||
| 3194 | } | ||
| 3195 | EXPORT_SYMBOL_GPL(kvm_s390_gisc_unregister); | ||
| 3196 | |||
| 3197 | static void gib_alert_irq_handler(struct airq_struct *airq) | ||
| 3198 | { | ||
| 3199 | inc_irq_stat(IRQIO_GAL); | ||
| 3200 | process_gib_alert_list(); | ||
| 3201 | } | ||
| 3202 | |||
| 3203 | static struct airq_struct gib_alert_irq = { | ||
| 3204 | .handler = gib_alert_irq_handler, | ||
| 3205 | .lsi_ptr = &gib_alert_irq.lsi_mask, | ||
| 3206 | }; | ||
| 3207 | |||
| 3208 | void kvm_s390_gib_destroy(void) | ||
| 3209 | { | ||
| 3210 | if (!gib) | ||
| 2908 | return; | 3211 | return; |
| 2909 | kvm->arch.gisa = NULL; | 3212 | chsc_sgib(0); |
| 3213 | unregister_adapter_interrupt(&gib_alert_irq); | ||
| 3214 | free_page((unsigned long)gib); | ||
| 3215 | gib = NULL; | ||
| 3216 | } | ||
| 3217 | |||
| 3218 | int kvm_s390_gib_init(u8 nisc) | ||
| 3219 | { | ||
| 3220 | int rc = 0; | ||
| 3221 | |||
| 3222 | if (!css_general_characteristics.aiv) { | ||
| 3223 | KVM_EVENT(3, "%s", "gib not initialized, no AIV facility"); | ||
| 3224 | goto out; | ||
| 3225 | } | ||
| 3226 | |||
| 3227 | gib = (struct kvm_s390_gib *)get_zeroed_page(GFP_KERNEL | GFP_DMA); | ||
| 3228 | if (!gib) { | ||
| 3229 | rc = -ENOMEM; | ||
| 3230 | goto out; | ||
| 3231 | } | ||
| 3232 | |||
| 3233 | gib_alert_irq.isc = nisc; | ||
| 3234 | if (register_adapter_interrupt(&gib_alert_irq)) { | ||
| 3235 | pr_err("Registering the GIB alert interruption handler failed\n"); | ||
| 3236 | rc = -EIO; | ||
| 3237 | goto out_free_gib; | ||
| 3238 | } | ||
| 3239 | |||
| 3240 | gib->nisc = nisc; | ||
| 3241 | if (chsc_sgib((u32)(u64)gib)) { | ||
| 3242 | pr_err("Associating the GIB with the AIV facility failed\n"); | ||
| 3243 | free_page((unsigned long)gib); | ||
| 3244 | gib = NULL; | ||
| 3245 | rc = -EIO; | ||
| 3246 | goto out_unreg_gal; | ||
| 3247 | } | ||
| 3248 | |||
| 3249 | KVM_EVENT(3, "gib 0x%pK (nisc=%d) initialized", gib, gib->nisc); | ||
| 3250 | goto out; | ||
| 3251 | |||
| 3252 | out_unreg_gal: | ||
| 3253 | unregister_adapter_interrupt(&gib_alert_irq); | ||
| 3254 | out_free_gib: | ||
| 3255 | free_page((unsigned long)gib); | ||
| 3256 | gib = NULL; | ||
| 3257 | out: | ||
| 3258 | return rc; | ||
| 2910 | } | 3259 | } |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 7f4bc58a53b9..4638303ba6a8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
| @@ -432,11 +432,18 @@ int kvm_arch_init(void *opaque) | |||
| 432 | /* Register floating interrupt controller interface. */ | 432 | /* Register floating interrupt controller interface. */ |
| 433 | rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); | 433 | rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); |
| 434 | if (rc) { | 434 | if (rc) { |
| 435 | pr_err("Failed to register FLIC rc=%d\n", rc); | 435 | pr_err("A FLIC registration call failed with rc=%d\n", rc); |
| 436 | goto out_debug_unreg; | 436 | goto out_debug_unreg; |
| 437 | } | 437 | } |
| 438 | |||
| 439 | rc = kvm_s390_gib_init(GAL_ISC); | ||
| 440 | if (rc) | ||
| 441 | goto out_gib_destroy; | ||
| 442 | |||
| 438 | return 0; | 443 | return 0; |
| 439 | 444 | ||
| 445 | out_gib_destroy: | ||
| 446 | kvm_s390_gib_destroy(); | ||
| 440 | out_debug_unreg: | 447 | out_debug_unreg: |
| 441 | debug_unregister(kvm_s390_dbf); | 448 | debug_unregister(kvm_s390_dbf); |
| 442 | return rc; | 449 | return rc; |
| @@ -444,6 +451,7 @@ out_debug_unreg: | |||
| 444 | 451 | ||
| 445 | void kvm_arch_exit(void) | 452 | void kvm_arch_exit(void) |
| 446 | { | 453 | { |
| 454 | kvm_s390_gib_destroy(); | ||
| 447 | debug_unregister(kvm_s390_dbf); | 455 | debug_unregister(kvm_s390_dbf); |
| 448 | } | 456 | } |
| 449 | 457 | ||
| @@ -1258,11 +1266,65 @@ static int kvm_s390_set_processor_feat(struct kvm *kvm, | |||
| 1258 | static int kvm_s390_set_processor_subfunc(struct kvm *kvm, | 1266 | static int kvm_s390_set_processor_subfunc(struct kvm *kvm, |
| 1259 | struct kvm_device_attr *attr) | 1267 | struct kvm_device_attr *attr) |
| 1260 | { | 1268 | { |
| 1261 | /* | 1269 | mutex_lock(&kvm->lock); |
| 1262 | * Once supported by kernel + hw, we have to store the subfunctions | 1270 | if (kvm->created_vcpus) { |
| 1263 | * in kvm->arch and remember that user space configured them. | 1271 | mutex_unlock(&kvm->lock); |
| 1264 | */ | 1272 | return -EBUSY; |
| 1265 | return -ENXIO; | 1273 | } |
| 1274 | |||
| 1275 | if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr, | ||
| 1276 | sizeof(struct kvm_s390_vm_cpu_subfunc))) { | ||
| 1277 | mutex_unlock(&kvm->lock); | ||
| 1278 | return -EFAULT; | ||
| 1279 | } | ||
| 1280 | mutex_unlock(&kvm->lock); | ||
| 1281 | |||
| 1282 | VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", | ||
| 1283 | ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], | ||
| 1284 | ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], | ||
| 1285 | ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], | ||
| 1286 | ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); | ||
| 1287 | VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx", | ||
| 1288 | ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], | ||
| 1289 | ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); | ||
| 1290 | VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx", | ||
| 1291 | ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], | ||
| 1292 | ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); | ||
| 1293 | VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx", | ||
| 1294 | ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], | ||
| 1295 | ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); | ||
| 1296 | VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx", | ||
| 1297 | ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], | ||
| 1298 | ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); | ||
| 1299 | VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx", | ||
| 1300 | ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], | ||
| 1301 | ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); | ||
| 1302 | VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx", | ||
| 1303 | ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], | ||
| 1304 | ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); | ||
| 1305 | VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", | ||
| 1306 | ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], | ||
| 1307 | ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); | ||
| 1308 | VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", | ||
| 1309 | ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], | ||
| 1310 | ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); | ||
| 1311 | VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx", | ||
| 1312 | ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], | ||
| 1313 | ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); | ||
| 1314 | VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx", | ||
| 1315 | ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], | ||
| 1316 | ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); | ||
| 1317 | VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx", | ||
| 1318 | ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], | ||
| 1319 | ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); | ||
| 1320 | VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx", | ||
| 1321 | ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], | ||
| 1322 | ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); | ||
| 1323 | VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx", | ||
| 1324 | ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], | ||
| 1325 | ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); | ||
| 1326 | |||
| 1327 | return 0; | ||
| 1266 | } | 1328 | } |
| 1267 | 1329 | ||
| 1268 | static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) | 1330 | static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) |
| @@ -1381,12 +1443,56 @@ static int kvm_s390_get_machine_feat(struct kvm *kvm, | |||
| 1381 | static int kvm_s390_get_processor_subfunc(struct kvm *kvm, | 1443 | static int kvm_s390_get_processor_subfunc(struct kvm *kvm, |
| 1382 | struct kvm_device_attr *attr) | 1444 | struct kvm_device_attr *attr) |
| 1383 | { | 1445 | { |
| 1384 | /* | 1446 | if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs, |
| 1385 | * Once we can actually configure subfunctions (kernel + hw support), | 1447 | sizeof(struct kvm_s390_vm_cpu_subfunc))) |
| 1386 | * we have to check if they were already set by user space, if so copy | 1448 | return -EFAULT; |
| 1387 | * them from kvm->arch. | 1449 | |
| 1388 | */ | 1450 | VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", |
| 1389 | return -ENXIO; | 1451 | ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], |
| 1452 | ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], | ||
| 1453 | ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], | ||
| 1454 | ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); | ||
| 1455 | VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx", | ||
| 1456 | ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], | ||
| 1457 | ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); | ||
| 1458 | VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx", | ||
| 1459 | ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], | ||
| 1460 | ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); | ||
| 1461 | VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx", | ||
| 1462 | ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], | ||
| 1463 | ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); | ||
| 1464 | VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx", | ||
| 1465 | ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], | ||
| 1466 | ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); | ||
| 1467 | VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx", | ||
| 1468 | ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], | ||
| 1469 | ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); | ||
| 1470 | VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx", | ||
| 1471 | ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], | ||
| 1472 | ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); | ||
| 1473 | VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", | ||
| 1474 | ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], | ||
| 1475 | ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); | ||
| 1476 | VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", | ||
| 1477 | ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], | ||
| 1478 | ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); | ||
| 1479 | VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx", | ||
| 1480 | ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], | ||
| 1481 | ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); | ||
| 1482 | VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx", | ||
| 1483 | ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], | ||
| 1484 | ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); | ||
| 1485 | VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx", | ||
| 1486 | ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], | ||
| 1487 | ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); | ||
| 1488 | VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx", | ||
| 1489 | ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], | ||
| 1490 | ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); | ||
| 1491 | VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx", | ||
| 1492 | ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], | ||
| 1493 | ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); | ||
| 1494 | |||
| 1495 | return 0; | ||
| 1390 | } | 1496 | } |
| 1391 | 1497 | ||
| 1392 | static int kvm_s390_get_machine_subfunc(struct kvm *kvm, | 1498 | static int kvm_s390_get_machine_subfunc(struct kvm *kvm, |
| @@ -1395,8 +1501,55 @@ static int kvm_s390_get_machine_subfunc(struct kvm *kvm, | |||
| 1395 | if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, | 1501 | if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, |
| 1396 | sizeof(struct kvm_s390_vm_cpu_subfunc))) | 1502 | sizeof(struct kvm_s390_vm_cpu_subfunc))) |
| 1397 | return -EFAULT; | 1503 | return -EFAULT; |
| 1504 | |||
| 1505 | VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", | ||
| 1506 | ((unsigned long *) &kvm_s390_available_subfunc.plo)[0], | ||
| 1507 | ((unsigned long *) &kvm_s390_available_subfunc.plo)[1], | ||
| 1508 | ((unsigned long *) &kvm_s390_available_subfunc.plo)[2], | ||
| 1509 | ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]); | ||
| 1510 | VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx", | ||
| 1511 | ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0], | ||
| 1512 | ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]); | ||
| 1513 | VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx", | ||
| 1514 | ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0], | ||
| 1515 | ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]); | ||
| 1516 | VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx", | ||
| 1517 | ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0], | ||
| 1518 | ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]); | ||
| 1519 | VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx", | ||
| 1520 | ((unsigned long *) &kvm_s390_available_subfunc.km)[0], | ||
| 1521 | ((unsigned long *) &kvm_s390_available_subfunc.km)[1]); | ||
| 1522 | VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx", | ||
| 1523 | ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0], | ||
| 1524 | ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]); | ||
| 1525 | VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx", | ||
| 1526 | ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0], | ||
| 1527 | ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]); | ||
| 1528 | VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx", | ||
| 1529 | ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0], | ||
| 1530 | ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]); | ||
| 1531 | VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx", | ||
| 1532 | ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0], | ||
| 1533 | ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]); | ||
| 1534 | VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx", | ||
| 1535 | ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0], | ||
| 1536 | ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]); | ||
| 1537 | VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx", | ||
| 1538 | ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0], | ||
| 1539 | ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]); | ||
| 1540 | VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx", | ||
| 1541 | ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0], | ||
| 1542 | ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]); | ||
| 1543 | VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx", | ||
| 1544 | ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0], | ||
| 1545 | ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]); | ||
| 1546 | VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx", | ||
| 1547 | ((unsigned long *) &kvm_s390_available_subfunc.kma)[0], | ||
| 1548 | ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]); | ||
| 1549 | |||
| 1398 | return 0; | 1550 | return 0; |
| 1399 | } | 1551 | } |
| 1552 | |||
| 1400 | static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) | 1553 | static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) |
| 1401 | { | 1554 | { |
| 1402 | int ret = -ENXIO; | 1555 | int ret = -ENXIO; |
| @@ -1514,10 +1667,9 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) | |||
| 1514 | case KVM_S390_VM_CPU_PROCESSOR_FEAT: | 1667 | case KVM_S390_VM_CPU_PROCESSOR_FEAT: |
| 1515 | case KVM_S390_VM_CPU_MACHINE_FEAT: | 1668 | case KVM_S390_VM_CPU_MACHINE_FEAT: |
| 1516 | case KVM_S390_VM_CPU_MACHINE_SUBFUNC: | 1669 | case KVM_S390_VM_CPU_MACHINE_SUBFUNC: |
| 1670 | case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: | ||
| 1517 | ret = 0; | 1671 | ret = 0; |
| 1518 | break; | 1672 | break; |
| 1519 | /* configuring subfunctions is not supported yet */ | ||
| 1520 | case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: | ||
| 1521 | default: | 1673 | default: |
| 1522 | ret = -ENXIO; | 1674 | ret = -ENXIO; |
| 1523 | break; | 1675 | break; |
| @@ -2209,6 +2361,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
| 2209 | if (!kvm->arch.sie_page2) | 2361 | if (!kvm->arch.sie_page2) |
| 2210 | goto out_err; | 2362 | goto out_err; |
| 2211 | 2363 | ||
| 2364 | kvm->arch.sie_page2->kvm = kvm; | ||
| 2212 | kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; | 2365 | kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; |
| 2213 | 2366 | ||
| 2214 | for (i = 0; i < kvm_s390_fac_size(); i++) { | 2367 | for (i = 0; i < kvm_s390_fac_size(); i++) { |
| @@ -2218,6 +2371,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
| 2218 | kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] & | 2371 | kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] & |
| 2219 | kvm_s390_fac_base[i]; | 2372 | kvm_s390_fac_base[i]; |
| 2220 | } | 2373 | } |
| 2374 | kvm->arch.model.subfuncs = kvm_s390_available_subfunc; | ||
| 2221 | 2375 | ||
| 2222 | /* we are always in czam mode - even on pre z14 machines */ | 2376 | /* we are always in czam mode - even on pre z14 machines */ |
| 2223 | set_kvm_facility(kvm->arch.model.fac_mask, 138); | 2377 | set_kvm_facility(kvm->arch.model.fac_mask, 138); |
| @@ -2812,7 +2966,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, | |||
| 2812 | 2966 | ||
| 2813 | vcpu->arch.sie_block->icpua = id; | 2967 | vcpu->arch.sie_block->icpua = id; |
| 2814 | spin_lock_init(&vcpu->arch.local_int.lock); | 2968 | spin_lock_init(&vcpu->arch.local_int.lock); |
| 2815 | vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa; | 2969 | vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin; |
| 2816 | if (vcpu->arch.sie_block->gd && sclp.has_gisaf) | 2970 | if (vcpu->arch.sie_block->gd && sclp.has_gisaf) |
| 2817 | vcpu->arch.sie_block->gd |= GISA_FORMAT1; | 2971 | vcpu->arch.sie_block->gd |= GISA_FORMAT1; |
| 2818 | seqcount_init(&vcpu->arch.cputm_seqcount); | 2972 | seqcount_init(&vcpu->arch.cputm_seqcount); |
| @@ -3458,6 +3612,8 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) | |||
| 3458 | kvm_s390_patch_guest_per_regs(vcpu); | 3612 | kvm_s390_patch_guest_per_regs(vcpu); |
| 3459 | } | 3613 | } |
| 3460 | 3614 | ||
| 3615 | clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask); | ||
| 3616 | |||
| 3461 | vcpu->arch.sie_block->icptcode = 0; | 3617 | vcpu->arch.sie_block->icptcode = 0; |
| 3462 | cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); | 3618 | cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); |
| 3463 | VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); | 3619 | VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); |
| @@ -4293,12 +4449,12 @@ static int __init kvm_s390_init(void) | |||
| 4293 | int i; | 4449 | int i; |
| 4294 | 4450 | ||
| 4295 | if (!sclp.has_sief2) { | 4451 | if (!sclp.has_sief2) { |
| 4296 | pr_info("SIE not available\n"); | 4452 | pr_info("SIE is not available\n"); |
| 4297 | return -ENODEV; | 4453 | return -ENODEV; |
| 4298 | } | 4454 | } |
| 4299 | 4455 | ||
| 4300 | if (nested && hpage) { | 4456 | if (nested && hpage) { |
| 4301 | pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently"); | 4457 | pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n"); |
| 4302 | return -EINVAL; | 4458 | return -EINVAL; |
| 4303 | } | 4459 | } |
| 4304 | 4460 | ||
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 1f6e36cdce0d..6d9448dbd052 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
| @@ -67,7 +67,7 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) | |||
| 67 | 67 | ||
| 68 | static inline int is_vcpu_idle(struct kvm_vcpu *vcpu) | 68 | static inline int is_vcpu_idle(struct kvm_vcpu *vcpu) |
| 69 | { | 69 | { |
| 70 | return test_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask); | 70 | return test_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask); |
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | static inline int kvm_is_ucontrol(struct kvm *kvm) | 73 | static inline int kvm_is_ucontrol(struct kvm *kvm) |
| @@ -381,6 +381,8 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, | |||
| 381 | void kvm_s390_gisa_init(struct kvm *kvm); | 381 | void kvm_s390_gisa_init(struct kvm *kvm); |
| 382 | void kvm_s390_gisa_clear(struct kvm *kvm); | 382 | void kvm_s390_gisa_clear(struct kvm *kvm); |
| 383 | void kvm_s390_gisa_destroy(struct kvm *kvm); | 383 | void kvm_s390_gisa_destroy(struct kvm *kvm); |
| 384 | int kvm_s390_gib_init(u8 nisc); | ||
| 385 | void kvm_s390_gib_destroy(void); | ||
| 384 | 386 | ||
| 385 | /* implemented in guestdbg.c */ | 387 | /* implemented in guestdbg.c */ |
| 386 | void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu); | 388 | void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 180373360e34..a5db4475e72d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
| @@ -35,6 +35,7 @@ | |||
| 35 | #include <asm/msr-index.h> | 35 | #include <asm/msr-index.h> |
| 36 | #include <asm/asm.h> | 36 | #include <asm/asm.h> |
| 37 | #include <asm/kvm_page_track.h> | 37 | #include <asm/kvm_page_track.h> |
| 38 | #include <asm/kvm_vcpu_regs.h> | ||
| 38 | #include <asm/hyperv-tlfs.h> | 39 | #include <asm/hyperv-tlfs.h> |
| 39 | 40 | ||
| 40 | #define KVM_MAX_VCPUS 288 | 41 | #define KVM_MAX_VCPUS 288 |
| @@ -137,23 +138,23 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) | |||
| 137 | #define ASYNC_PF_PER_VCPU 64 | 138 | #define ASYNC_PF_PER_VCPU 64 |
| 138 | 139 | ||
| 139 | enum kvm_reg { | 140 | enum kvm_reg { |
| 140 | VCPU_REGS_RAX = 0, | 141 | VCPU_REGS_RAX = __VCPU_REGS_RAX, |
| 141 | VCPU_REGS_RCX = 1, | 142 | VCPU_REGS_RCX = __VCPU_REGS_RCX, |
| 142 | VCPU_REGS_RDX = 2, | 143 | VCPU_REGS_RDX = __VCPU_REGS_RDX, |
| 143 | VCPU_REGS_RBX = 3, | 144 | VCPU_REGS_RBX = __VCPU_REGS_RBX, |
| 144 | VCPU_REGS_RSP = 4, | 145 | VCPU_REGS_RSP = __VCPU_REGS_RSP, |
| 145 | VCPU_REGS_RBP = 5, | 146 | VCPU_REGS_RBP = __VCPU_REGS_RBP, |
| 146 | VCPU_REGS_RSI = 6, | 147 | VCPU_REGS_RSI = __VCPU_REGS_RSI, |
| 147 | VCPU_REGS_RDI = 7, | 148 | VCPU_REGS_RDI = __VCPU_REGS_RDI, |
| 148 | #ifdef CONFIG_X86_64 | 149 | #ifdef CONFIG_X86_64 |
| 149 | VCPU_REGS_R8 = 8, | 150 | VCPU_REGS_R8 = __VCPU_REGS_R8, |
| 150 | VCPU_REGS_R9 = 9, | 151 | VCPU_REGS_R9 = __VCPU_REGS_R9, |
| 151 | VCPU_REGS_R10 = 10, | 152 | VCPU_REGS_R10 = __VCPU_REGS_R10, |
| 152 | VCPU_REGS_R11 = 11, | 153 | VCPU_REGS_R11 = __VCPU_REGS_R11, |
| 153 | VCPU_REGS_R12 = 12, | 154 | VCPU_REGS_R12 = __VCPU_REGS_R12, |
| 154 | VCPU_REGS_R13 = 13, | 155 | VCPU_REGS_R13 = __VCPU_REGS_R13, |
| 155 | VCPU_REGS_R14 = 14, | 156 | VCPU_REGS_R14 = __VCPU_REGS_R14, |
| 156 | VCPU_REGS_R15 = 15, | 157 | VCPU_REGS_R15 = __VCPU_REGS_R15, |
| 157 | #endif | 158 | #endif |
| 158 | VCPU_REGS_RIP, | 159 | VCPU_REGS_RIP, |
| 159 | NR_VCPU_REGS | 160 | NR_VCPU_REGS |
| @@ -319,6 +320,7 @@ struct kvm_mmu_page { | |||
| 319 | struct list_head link; | 320 | struct list_head link; |
| 320 | struct hlist_node hash_link; | 321 | struct hlist_node hash_link; |
| 321 | bool unsync; | 322 | bool unsync; |
| 323 | bool mmio_cached; | ||
| 322 | 324 | ||
| 323 | /* | 325 | /* |
| 324 | * The following two entries are used to key the shadow page in the | 326 | * The following two entries are used to key the shadow page in the |
| @@ -333,10 +335,6 @@ struct kvm_mmu_page { | |||
| 333 | int root_count; /* Currently serving as active root */ | 335 | int root_count; /* Currently serving as active root */ |
| 334 | unsigned int unsync_children; | 336 | unsigned int unsync_children; |
| 335 | struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ | 337 | struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ |
| 336 | |||
| 337 | /* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen. */ | ||
| 338 | unsigned long mmu_valid_gen; | ||
| 339 | |||
| 340 | DECLARE_BITMAP(unsync_child_bitmap, 512); | 338 | DECLARE_BITMAP(unsync_child_bitmap, 512); |
| 341 | 339 | ||
| 342 | #ifdef CONFIG_X86_32 | 340 | #ifdef CONFIG_X86_32 |
| @@ -848,13 +846,11 @@ struct kvm_arch { | |||
| 848 | unsigned int n_requested_mmu_pages; | 846 | unsigned int n_requested_mmu_pages; |
| 849 | unsigned int n_max_mmu_pages; | 847 | unsigned int n_max_mmu_pages; |
| 850 | unsigned int indirect_shadow_pages; | 848 | unsigned int indirect_shadow_pages; |
| 851 | unsigned long mmu_valid_gen; | ||
| 852 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; | 849 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; |
| 853 | /* | 850 | /* |
| 854 | * Hash table of struct kvm_mmu_page. | 851 | * Hash table of struct kvm_mmu_page. |
| 855 | */ | 852 | */ |
| 856 | struct list_head active_mmu_pages; | 853 | struct list_head active_mmu_pages; |
| 857 | struct list_head zapped_obsolete_pages; | ||
| 858 | struct kvm_page_track_notifier_node mmu_sp_tracker; | 854 | struct kvm_page_track_notifier_node mmu_sp_tracker; |
| 859 | struct kvm_page_track_notifier_head track_notifier_head; | 855 | struct kvm_page_track_notifier_head track_notifier_head; |
| 860 | 856 | ||
| @@ -1255,7 +1251,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, | |||
| 1255 | struct kvm_memory_slot *slot, | 1251 | struct kvm_memory_slot *slot, |
| 1256 | gfn_t gfn_offset, unsigned long mask); | 1252 | gfn_t gfn_offset, unsigned long mask); |
| 1257 | void kvm_mmu_zap_all(struct kvm *kvm); | 1253 | void kvm_mmu_zap_all(struct kvm *kvm); |
| 1258 | void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots); | 1254 | void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); |
| 1259 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); | 1255 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); |
| 1260 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); | 1256 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); |
| 1261 | 1257 | ||
diff --git a/arch/x86/include/asm/kvm_vcpu_regs.h b/arch/x86/include/asm/kvm_vcpu_regs.h new file mode 100644 index 000000000000..1af2cb59233b --- /dev/null +++ b/arch/x86/include/asm/kvm_vcpu_regs.h | |||
| @@ -0,0 +1,25 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
| 2 | #ifndef _ASM_X86_KVM_VCPU_REGS_H | ||
| 3 | #define _ASM_X86_KVM_VCPU_REGS_H | ||
| 4 | |||
| 5 | #define __VCPU_REGS_RAX 0 | ||
| 6 | #define __VCPU_REGS_RCX 1 | ||
| 7 | #define __VCPU_REGS_RDX 2 | ||
| 8 | #define __VCPU_REGS_RBX 3 | ||
| 9 | #define __VCPU_REGS_RSP 4 | ||
| 10 | #define __VCPU_REGS_RBP 5 | ||
| 11 | #define __VCPU_REGS_RSI 6 | ||
| 12 | #define __VCPU_REGS_RDI 7 | ||
| 13 | |||
| 14 | #ifdef CONFIG_X86_64 | ||
| 15 | #define __VCPU_REGS_R8 8 | ||
| 16 | #define __VCPU_REGS_R9 9 | ||
| 17 | #define __VCPU_REGS_R10 10 | ||
| 18 | #define __VCPU_REGS_R11 11 | ||
| 19 | #define __VCPU_REGS_R12 12 | ||
| 20 | #define __VCPU_REGS_R13 13 | ||
| 21 | #define __VCPU_REGS_R14 14 | ||
| 22 | #define __VCPU_REGS_R15 15 | ||
| 23 | #endif | ||
| 24 | |||
| 25 | #endif /* _ASM_X86_KVM_VCPU_REGS_H */ | ||
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index e811d4d1c824..904494b924c1 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
| @@ -104,12 +104,8 @@ static u64 kvm_sched_clock_read(void) | |||
| 104 | 104 | ||
| 105 | static inline void kvm_sched_clock_init(bool stable) | 105 | static inline void kvm_sched_clock_init(bool stable) |
| 106 | { | 106 | { |
| 107 | if (!stable) { | 107 | if (!stable) |
| 108 | pv_ops.time.sched_clock = kvm_clock_read; | ||
| 109 | clear_sched_clock_stable(); | 108 | clear_sched_clock_stable(); |
| 110 | return; | ||
| 111 | } | ||
| 112 | |||
| 113 | kvm_sched_clock_offset = kvm_clock_read(); | 109 | kvm_sched_clock_offset = kvm_clock_read(); |
| 114 | pv_ops.time.sched_clock = kvm_sched_clock_read; | 110 | pv_ops.time.sched_clock = kvm_sched_clock_read; |
| 115 | 111 | ||
| @@ -355,6 +351,20 @@ void __init kvmclock_init(void) | |||
| 355 | machine_ops.crash_shutdown = kvm_crash_shutdown; | 351 | machine_ops.crash_shutdown = kvm_crash_shutdown; |
| 356 | #endif | 352 | #endif |
| 357 | kvm_get_preset_lpj(); | 353 | kvm_get_preset_lpj(); |
| 354 | |||
| 355 | /* | ||
| 356 | * X86_FEATURE_NONSTOP_TSC is TSC runs at constant rate | ||
| 357 | * with P/T states and does not stop in deep C-states. | ||
| 358 | * | ||
| 359 | * Invariant TSC exposed by host means kvmclock is not necessary: | ||
| 360 | * can use TSC as clocksource. | ||
| 361 | * | ||
| 362 | */ | ||
| 363 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && | ||
| 364 | boot_cpu_has(X86_FEATURE_NONSTOP_TSC) && | ||
| 365 | !check_tsc_unstable()) | ||
| 366 | kvm_clock.rating = 299; | ||
| 367 | |||
| 358 | clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); | 368 | clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); |
| 359 | pv_info.name = "KVM"; | 369 | pv_info.name = "KVM"; |
| 360 | } | 370 | } |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c07958b59f50..fd3951638ae4 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
| @@ -405,7 +405,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
| 405 | F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | | 405 | F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | |
| 406 | F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | | 406 | F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | |
| 407 | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | | 407 | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | |
| 408 | F(CLDEMOTE); | 408 | F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B); |
| 409 | 409 | ||
| 410 | /* cpuid 7.0.edx*/ | 410 | /* cpuid 7.0.edx*/ |
| 411 | const u32 kvm_cpuid_7_0_edx_x86_features = | 411 | const u32 kvm_cpuid_7_0_edx_x86_features = |
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 89d20ed1d2e8..27c43525a05f 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c | |||
| @@ -1729,7 +1729,7 @@ static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd) | |||
| 1729 | 1729 | ||
| 1730 | mutex_lock(&hv->hv_lock); | 1730 | mutex_lock(&hv->hv_lock); |
| 1731 | ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1, | 1731 | ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1, |
| 1732 | GFP_KERNEL); | 1732 | GFP_KERNEL_ACCOUNT); |
| 1733 | mutex_unlock(&hv->hv_lock); | 1733 | mutex_unlock(&hv->hv_lock); |
| 1734 | 1734 | ||
| 1735 | if (ret >= 0) | 1735 | if (ret >= 0) |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index af192895b1fc..4a6dc54cc12b 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
| @@ -653,7 +653,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | |||
| 653 | pid_t pid_nr; | 653 | pid_t pid_nr; |
| 654 | int ret; | 654 | int ret; |
| 655 | 655 | ||
| 656 | pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL); | 656 | pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL_ACCOUNT); |
| 657 | if (!pit) | 657 | if (!pit) |
| 658 | return NULL; | 658 | return NULL; |
| 659 | 659 | ||
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index bdcd4139eca9..8b38bb4868a6 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
| @@ -583,7 +583,7 @@ int kvm_pic_init(struct kvm *kvm) | |||
| 583 | struct kvm_pic *s; | 583 | struct kvm_pic *s; |
| 584 | int ret; | 584 | int ret; |
| 585 | 585 | ||
| 586 | s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); | 586 | s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL_ACCOUNT); |
| 587 | if (!s) | 587 | if (!s) |
| 588 | return -ENOMEM; | 588 | return -ENOMEM; |
| 589 | spin_lock_init(&s->lock); | 589 | spin_lock_init(&s->lock); |
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 4e822ad363f3..1add1bc881e2 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c | |||
| @@ -622,7 +622,7 @@ int kvm_ioapic_init(struct kvm *kvm) | |||
| 622 | struct kvm_ioapic *ioapic; | 622 | struct kvm_ioapic *ioapic; |
| 623 | int ret; | 623 | int ret; |
| 624 | 624 | ||
| 625 | ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); | 625 | ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL_ACCOUNT); |
| 626 | if (!ioapic) | 626 | if (!ioapic) |
| 627 | return -ENOMEM; | 627 | return -ENOMEM; |
| 628 | spin_lock_init(&ioapic->lock); | 628 | spin_lock_init(&ioapic->lock); |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 4b6c2da7265c..991fdf7fc17f 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
| @@ -181,7 +181,8 @@ static void recalculate_apic_map(struct kvm *kvm) | |||
| 181 | max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic)); | 181 | max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic)); |
| 182 | 182 | ||
| 183 | new = kvzalloc(sizeof(struct kvm_apic_map) + | 183 | new = kvzalloc(sizeof(struct kvm_apic_map) + |
| 184 | sizeof(struct kvm_lapic *) * ((u64)max_id + 1), GFP_KERNEL); | 184 | sizeof(struct kvm_lapic *) * ((u64)max_id + 1), |
| 185 | GFP_KERNEL_ACCOUNT); | ||
| 185 | 186 | ||
| 186 | if (!new) | 187 | if (!new) |
| 187 | goto out; | 188 | goto out; |
| @@ -2259,13 +2260,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) | |||
| 2259 | ASSERT(vcpu != NULL); | 2260 | ASSERT(vcpu != NULL); |
| 2260 | apic_debug("apic_init %d\n", vcpu->vcpu_id); | 2261 | apic_debug("apic_init %d\n", vcpu->vcpu_id); |
| 2261 | 2262 | ||
| 2262 | apic = kzalloc(sizeof(*apic), GFP_KERNEL); | 2263 | apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT); |
| 2263 | if (!apic) | 2264 | if (!apic) |
| 2264 | goto nomem; | 2265 | goto nomem; |
| 2265 | 2266 | ||
| 2266 | vcpu->arch.apic = apic; | 2267 | vcpu->arch.apic = apic; |
| 2267 | 2268 | ||
| 2268 | apic->regs = (void *)get_zeroed_page(GFP_KERNEL); | 2269 | apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); |
| 2269 | if (!apic->regs) { | 2270 | if (!apic->regs) { |
| 2270 | printk(KERN_ERR "malloc apic regs error for vcpu %x\n", | 2271 | printk(KERN_ERR "malloc apic regs error for vcpu %x\n", |
| 2271 | vcpu->vcpu_id); | 2272 | vcpu->vcpu_id); |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f2d1d230d5b8..7837ab001d80 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
| @@ -109,9 +109,11 @@ module_param(dbg, bool, 0644); | |||
| 109 | (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1)) | 109 | (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1)) |
| 110 | 110 | ||
| 111 | 111 | ||
| 112 | #define PT64_BASE_ADDR_MASK __sme_clr((((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))) | 112 | #ifdef CONFIG_DYNAMIC_PHYSICAL_MASK |
| 113 | #define PT64_DIR_BASE_ADDR_MASK \ | 113 | #define PT64_BASE_ADDR_MASK (physical_mask & ~(u64)(PAGE_SIZE-1)) |
| 114 | (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1)) | 114 | #else |
| 115 | #define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)) | ||
| 116 | #endif | ||
| 115 | #define PT64_LVL_ADDR_MASK(level) \ | 117 | #define PT64_LVL_ADDR_MASK(level) \ |
| 116 | (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ | 118 | (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ |
| 117 | * PT64_LEVEL_BITS))) - 1)) | 119 | * PT64_LEVEL_BITS))) - 1)) |
| @@ -330,53 +332,56 @@ static inline bool is_access_track_spte(u64 spte) | |||
| 330 | } | 332 | } |
| 331 | 333 | ||
| 332 | /* | 334 | /* |
| 333 | * the low bit of the generation number is always presumed to be zero. | 335 | * Due to limited space in PTEs, the MMIO generation is a 19 bit subset of |
| 334 | * This disables mmio caching during memslot updates. The concept is | 336 | * the memslots generation and is derived as follows: |
| 335 | * similar to a seqcount but instead of retrying the access we just punt | ||
| 336 | * and ignore the cache. | ||
| 337 | * | 337 | * |
| 338 | * spte bits 3-11 are used as bits 1-9 of the generation number, | 338 | * Bits 0-8 of the MMIO generation are propagated to spte bits 3-11 |
| 339 | * the bits 52-61 are used as bits 10-19 of the generation number. | 339 | * Bits 9-18 of the MMIO generation are propagated to spte bits 52-61 |
| 340 | * | ||
| 341 | * The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in | ||
| 342 | * the MMIO generation number, as doing so would require stealing a bit from | ||
| 343 | * the "real" generation number and thus effectively halve the maximum number | ||
| 344 | * of MMIO generations that can be handled before encountering a wrap (which | ||
| 345 | * requires a full MMU zap). The flag is instead explicitly queried when | ||
| 346 | * checking for MMIO spte cache hits. | ||
| 340 | */ | 347 | */ |
| 341 | #define MMIO_SPTE_GEN_LOW_SHIFT 2 | 348 | #define MMIO_SPTE_GEN_MASK GENMASK_ULL(18, 0) |
| 342 | #define MMIO_SPTE_GEN_HIGH_SHIFT 52 | ||
| 343 | 349 | ||
| 344 | #define MMIO_GEN_SHIFT 20 | 350 | #define MMIO_SPTE_GEN_LOW_START 3 |
| 345 | #define MMIO_GEN_LOW_SHIFT 10 | 351 | #define MMIO_SPTE_GEN_LOW_END 11 |
| 346 | #define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 2) | 352 | #define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \ |
| 347 | #define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1) | 353 | MMIO_SPTE_GEN_LOW_START) |
| 348 | 354 | ||
| 349 | static u64 generation_mmio_spte_mask(unsigned int gen) | 355 | #define MMIO_SPTE_GEN_HIGH_START 52 |
| 356 | #define MMIO_SPTE_GEN_HIGH_END 61 | ||
| 357 | #define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \ | ||
| 358 | MMIO_SPTE_GEN_HIGH_START) | ||
| 359 | static u64 generation_mmio_spte_mask(u64 gen) | ||
| 350 | { | 360 | { |
| 351 | u64 mask; | 361 | u64 mask; |
| 352 | 362 | ||
| 353 | WARN_ON(gen & ~MMIO_GEN_MASK); | 363 | WARN_ON(gen & ~MMIO_SPTE_GEN_MASK); |
| 354 | 364 | ||
| 355 | mask = (gen & MMIO_GEN_LOW_MASK) << MMIO_SPTE_GEN_LOW_SHIFT; | 365 | mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK; |
| 356 | mask |= ((u64)gen >> MMIO_GEN_LOW_SHIFT) << MMIO_SPTE_GEN_HIGH_SHIFT; | 366 | mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK; |
| 357 | return mask; | 367 | return mask; |
| 358 | } | 368 | } |
| 359 | 369 | ||
| 360 | static unsigned int get_mmio_spte_generation(u64 spte) | 370 | static u64 get_mmio_spte_generation(u64 spte) |
| 361 | { | 371 | { |
| 362 | unsigned int gen; | 372 | u64 gen; |
| 363 | 373 | ||
| 364 | spte &= ~shadow_mmio_mask; | 374 | spte &= ~shadow_mmio_mask; |
| 365 | 375 | ||
| 366 | gen = (spte >> MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_GEN_LOW_MASK; | 376 | gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START; |
| 367 | gen |= (spte >> MMIO_SPTE_GEN_HIGH_SHIFT) << MMIO_GEN_LOW_SHIFT; | 377 | gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START; |
| 368 | return gen; | 378 | return gen; |
| 369 | } | 379 | } |
| 370 | 380 | ||
| 371 | static unsigned int kvm_current_mmio_generation(struct kvm_vcpu *vcpu) | ||
| 372 | { | ||
| 373 | return kvm_vcpu_memslots(vcpu)->generation & MMIO_GEN_MASK; | ||
| 374 | } | ||
| 375 | |||
| 376 | static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, | 381 | static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, |
| 377 | unsigned access) | 382 | unsigned access) |
| 378 | { | 383 | { |
| 379 | unsigned int gen = kvm_current_mmio_generation(vcpu); | 384 | u64 gen = kvm_vcpu_memslots(vcpu)->generation & MMIO_SPTE_GEN_MASK; |
| 380 | u64 mask = generation_mmio_spte_mask(gen); | 385 | u64 mask = generation_mmio_spte_mask(gen); |
| 381 | u64 gpa = gfn << PAGE_SHIFT; | 386 | u64 gpa = gfn << PAGE_SHIFT; |
| 382 | 387 | ||
| @@ -386,6 +391,8 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, | |||
| 386 | mask |= (gpa & shadow_nonpresent_or_rsvd_mask) | 391 | mask |= (gpa & shadow_nonpresent_or_rsvd_mask) |
| 387 | << shadow_nonpresent_or_rsvd_mask_len; | 392 | << shadow_nonpresent_or_rsvd_mask_len; |
| 388 | 393 | ||
| 394 | page_header(__pa(sptep))->mmio_cached = true; | ||
| 395 | |||
| 389 | trace_mark_mmio_spte(sptep, gfn, access, gen); | 396 | trace_mark_mmio_spte(sptep, gfn, access, gen); |
| 390 | mmu_spte_set(sptep, mask); | 397 | mmu_spte_set(sptep, mask); |
| 391 | } | 398 | } |
| @@ -407,7 +414,7 @@ static gfn_t get_mmio_spte_gfn(u64 spte) | |||
| 407 | 414 | ||
| 408 | static unsigned get_mmio_spte_access(u64 spte) | 415 | static unsigned get_mmio_spte_access(u64 spte) |
| 409 | { | 416 | { |
| 410 | u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask; | 417 | u64 mask = generation_mmio_spte_mask(MMIO_SPTE_GEN_MASK) | shadow_mmio_mask; |
| 411 | return (spte & ~mask) & ~PAGE_MASK; | 418 | return (spte & ~mask) & ~PAGE_MASK; |
| 412 | } | 419 | } |
| 413 | 420 | ||
| @@ -424,9 +431,13 @@ static bool set_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, | |||
| 424 | 431 | ||
| 425 | static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte) | 432 | static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte) |
| 426 | { | 433 | { |
| 427 | unsigned int kvm_gen, spte_gen; | 434 | u64 kvm_gen, spte_gen, gen; |
| 428 | 435 | ||
| 429 | kvm_gen = kvm_current_mmio_generation(vcpu); | 436 | gen = kvm_vcpu_memslots(vcpu)->generation; |
| 437 | if (unlikely(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS)) | ||
| 438 | return false; | ||
| 439 | |||
| 440 | kvm_gen = gen & MMIO_SPTE_GEN_MASK; | ||
| 430 | spte_gen = get_mmio_spte_generation(spte); | 441 | spte_gen = get_mmio_spte_generation(spte); |
| 431 | 442 | ||
| 432 | trace_check_mmio_spte(spte, kvm_gen, spte_gen); | 443 | trace_check_mmio_spte(spte, kvm_gen, spte_gen); |
| @@ -959,7 +970,7 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | |||
| 959 | if (cache->nobjs >= min) | 970 | if (cache->nobjs >= min) |
| 960 | return 0; | 971 | return 0; |
| 961 | while (cache->nobjs < ARRAY_SIZE(cache->objects)) { | 972 | while (cache->nobjs < ARRAY_SIZE(cache->objects)) { |
| 962 | obj = kmem_cache_zalloc(base_cache, GFP_KERNEL); | 973 | obj = kmem_cache_zalloc(base_cache, GFP_KERNEL_ACCOUNT); |
| 963 | if (!obj) | 974 | if (!obj) |
| 964 | return cache->nobjs >= min ? 0 : -ENOMEM; | 975 | return cache->nobjs >= min ? 0 : -ENOMEM; |
| 965 | cache->objects[cache->nobjs++] = obj; | 976 | cache->objects[cache->nobjs++] = obj; |
| @@ -2049,12 +2060,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct | |||
| 2049 | if (!direct) | 2060 | if (!direct) |
| 2050 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); | 2061 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); |
| 2051 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | 2062 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); |
| 2052 | |||
| 2053 | /* | ||
| 2054 | * The active_mmu_pages list is the FIFO list, do not move the | ||
| 2055 | * page until it is zapped. kvm_zap_obsolete_pages depends on | ||
| 2056 | * this feature. See the comments in kvm_zap_obsolete_pages(). | ||
| 2057 | */ | ||
| 2058 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | 2063 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); |
| 2059 | kvm_mod_used_mmu_pages(vcpu->kvm, +1); | 2064 | kvm_mod_used_mmu_pages(vcpu->kvm, +1); |
| 2060 | return sp; | 2065 | return sp; |
| @@ -2195,23 +2200,15 @@ static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
| 2195 | --kvm->stat.mmu_unsync; | 2200 | --kvm->stat.mmu_unsync; |
| 2196 | } | 2201 | } |
| 2197 | 2202 | ||
| 2198 | static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | 2203 | static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, |
| 2199 | struct list_head *invalid_list); | 2204 | struct list_head *invalid_list); |
| 2200 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | 2205 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, |
| 2201 | struct list_head *invalid_list); | 2206 | struct list_head *invalid_list); |
| 2202 | 2207 | ||
| 2203 | /* | ||
| 2204 | * NOTE: we should pay more attention on the zapped-obsolete page | ||
| 2205 | * (is_obsolete_sp(sp) && sp->role.invalid) when you do hash list walk | ||
| 2206 | * since it has been deleted from active_mmu_pages but still can be found | ||
| 2207 | * at hast list. | ||
| 2208 | * | ||
| 2209 | * for_each_valid_sp() has skipped that kind of pages. | ||
| 2210 | */ | ||
| 2211 | #define for_each_valid_sp(_kvm, _sp, _gfn) \ | 2208 | #define for_each_valid_sp(_kvm, _sp, _gfn) \ |
| 2212 | hlist_for_each_entry(_sp, \ | 2209 | hlist_for_each_entry(_sp, \ |
| 2213 | &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ | 2210 | &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ |
| 2214 | if (is_obsolete_sp((_kvm), (_sp)) || (_sp)->role.invalid) { \ | 2211 | if ((_sp)->role.invalid) { \ |
| 2215 | } else | 2212 | } else |
| 2216 | 2213 | ||
| 2217 | #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \ | 2214 | #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \ |
| @@ -2231,18 +2228,28 @@ static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
| 2231 | return true; | 2228 | return true; |
| 2232 | } | 2229 | } |
| 2233 | 2230 | ||
| 2231 | static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm, | ||
| 2232 | struct list_head *invalid_list, | ||
| 2233 | bool remote_flush) | ||
| 2234 | { | ||
| 2235 | if (!remote_flush && !list_empty(invalid_list)) | ||
| 2236 | return false; | ||
| 2237 | |||
| 2238 | if (!list_empty(invalid_list)) | ||
| 2239 | kvm_mmu_commit_zap_page(kvm, invalid_list); | ||
| 2240 | else | ||
| 2241 | kvm_flush_remote_tlbs(kvm); | ||
| 2242 | return true; | ||
| 2243 | } | ||
| 2244 | |||
| 2234 | static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu, | 2245 | static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu, |
| 2235 | struct list_head *invalid_list, | 2246 | struct list_head *invalid_list, |
| 2236 | bool remote_flush, bool local_flush) | 2247 | bool remote_flush, bool local_flush) |
| 2237 | { | 2248 | { |
| 2238 | if (!list_empty(invalid_list)) { | 2249 | if (kvm_mmu_remote_flush_or_zap(vcpu->kvm, invalid_list, remote_flush)) |
| 2239 | kvm_mmu_commit_zap_page(vcpu->kvm, invalid_list); | ||
| 2240 | return; | 2250 | return; |
| 2241 | } | ||
| 2242 | 2251 | ||
| 2243 | if (remote_flush) | 2252 | if (local_flush) |
| 2244 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
| 2245 | else if (local_flush) | ||
| 2246 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); | 2253 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
| 2247 | } | 2254 | } |
| 2248 | 2255 | ||
| @@ -2253,11 +2260,6 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { } | |||
| 2253 | static void mmu_audit_disable(void) { } | 2260 | static void mmu_audit_disable(void) { } |
| 2254 | #endif | 2261 | #endif |
| 2255 | 2262 | ||
| 2256 | static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp) | ||
| 2257 | { | ||
| 2258 | return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen); | ||
| 2259 | } | ||
| 2260 | |||
| 2261 | static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 2263 | static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
| 2262 | struct list_head *invalid_list) | 2264 | struct list_head *invalid_list) |
| 2263 | { | 2265 | { |
| @@ -2482,7 +2484,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
| 2482 | if (level > PT_PAGE_TABLE_LEVEL && need_sync) | 2484 | if (level > PT_PAGE_TABLE_LEVEL && need_sync) |
| 2483 | flush |= kvm_sync_pages(vcpu, gfn, &invalid_list); | 2485 | flush |= kvm_sync_pages(vcpu, gfn, &invalid_list); |
| 2484 | } | 2486 | } |
| 2485 | sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; | ||
| 2486 | clear_page(sp->spt); | 2487 | clear_page(sp->spt); |
| 2487 | trace_kvm_mmu_get_page(sp, true); | 2488 | trace_kvm_mmu_get_page(sp, true); |
| 2488 | 2489 | ||
| @@ -2668,17 +2669,22 @@ static int mmu_zap_unsync_children(struct kvm *kvm, | |||
| 2668 | return zapped; | 2669 | return zapped; |
| 2669 | } | 2670 | } |
| 2670 | 2671 | ||
| 2671 | static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | 2672 | static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, |
| 2672 | struct list_head *invalid_list) | 2673 | struct kvm_mmu_page *sp, |
| 2674 | struct list_head *invalid_list, | ||
| 2675 | int *nr_zapped) | ||
| 2673 | { | 2676 | { |
| 2674 | int ret; | 2677 | bool list_unstable; |
| 2675 | 2678 | ||
| 2676 | trace_kvm_mmu_prepare_zap_page(sp); | 2679 | trace_kvm_mmu_prepare_zap_page(sp); |
| 2677 | ++kvm->stat.mmu_shadow_zapped; | 2680 | ++kvm->stat.mmu_shadow_zapped; |
| 2678 | ret = mmu_zap_unsync_children(kvm, sp, invalid_list); | 2681 | *nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list); |
| 2679 | kvm_mmu_page_unlink_children(kvm, sp); | 2682 | kvm_mmu_page_unlink_children(kvm, sp); |
| 2680 | kvm_mmu_unlink_parents(kvm, sp); | 2683 | kvm_mmu_unlink_parents(kvm, sp); |
| 2681 | 2684 | ||
| 2685 | /* Zapping children means active_mmu_pages has become unstable. */ | ||
| 2686 | list_unstable = *nr_zapped; | ||
| 2687 | |||
| 2682 | if (!sp->role.invalid && !sp->role.direct) | 2688 | if (!sp->role.invalid && !sp->role.direct) |
| 2683 | unaccount_shadowed(kvm, sp); | 2689 | unaccount_shadowed(kvm, sp); |
| 2684 | 2690 | ||
| @@ -2686,22 +2692,27 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
| 2686 | kvm_unlink_unsync_page(kvm, sp); | 2692 | kvm_unlink_unsync_page(kvm, sp); |
| 2687 | if (!sp->root_count) { | 2693 | if (!sp->root_count) { |
| 2688 | /* Count self */ | 2694 | /* Count self */ |
| 2689 | ret++; | 2695 | (*nr_zapped)++; |
| 2690 | list_move(&sp->link, invalid_list); | 2696 | list_move(&sp->link, invalid_list); |
| 2691 | kvm_mod_used_mmu_pages(kvm, -1); | 2697 | kvm_mod_used_mmu_pages(kvm, -1); |
| 2692 | } else { | 2698 | } else { |
| 2693 | list_move(&sp->link, &kvm->arch.active_mmu_pages); | 2699 | list_move(&sp->link, &kvm->arch.active_mmu_pages); |
| 2694 | 2700 | ||
| 2695 | /* | 2701 | if (!sp->role.invalid) |
| 2696 | * The obsolete pages can not be used on any vcpus. | ||
| 2697 | * See the comments in kvm_mmu_invalidate_zap_all_pages(). | ||
| 2698 | */ | ||
| 2699 | if (!sp->role.invalid && !is_obsolete_sp(kvm, sp)) | ||
| 2700 | kvm_reload_remote_mmus(kvm); | 2702 | kvm_reload_remote_mmus(kvm); |
| 2701 | } | 2703 | } |
| 2702 | 2704 | ||
| 2703 | sp->role.invalid = 1; | 2705 | sp->role.invalid = 1; |
| 2704 | return ret; | 2706 | return list_unstable; |
| 2707 | } | ||
| 2708 | |||
| 2709 | static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | ||
| 2710 | struct list_head *invalid_list) | ||
| 2711 | { | ||
| 2712 | int nr_zapped; | ||
| 2713 | |||
| 2714 | __kvm_mmu_prepare_zap_page(kvm, sp, invalid_list, &nr_zapped); | ||
| 2715 | return nr_zapped; | ||
| 2705 | } | 2716 | } |
| 2706 | 2717 | ||
| 2707 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | 2718 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, |
| @@ -3703,7 +3714,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | |||
| 3703 | 3714 | ||
| 3704 | u64 *lm_root; | 3715 | u64 *lm_root; |
| 3705 | 3716 | ||
| 3706 | lm_root = (void*)get_zeroed_page(GFP_KERNEL); | 3717 | lm_root = (void*)get_zeroed_page(GFP_KERNEL_ACCOUNT); |
| 3707 | if (lm_root == NULL) | 3718 | if (lm_root == NULL) |
| 3708 | return 1; | 3719 | return 1; |
| 3709 | 3720 | ||
| @@ -4204,14 +4215,6 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, | |||
| 4204 | return false; | 4215 | return false; |
| 4205 | 4216 | ||
| 4206 | if (cached_root_available(vcpu, new_cr3, new_role)) { | 4217 | if (cached_root_available(vcpu, new_cr3, new_role)) { |
| 4207 | /* | ||
| 4208 | * It is possible that the cached previous root page is | ||
| 4209 | * obsolete because of a change in the MMU | ||
| 4210 | * generation number. However, that is accompanied by | ||
| 4211 | * KVM_REQ_MMU_RELOAD, which will free the root that we | ||
| 4212 | * have set here and allocate a new one. | ||
| 4213 | */ | ||
| 4214 | |||
| 4215 | kvm_make_request(KVM_REQ_LOAD_CR3, vcpu); | 4218 | kvm_make_request(KVM_REQ_LOAD_CR3, vcpu); |
| 4216 | if (!skip_tlb_flush) { | 4219 | if (!skip_tlb_flush) { |
| 4217 | kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); | 4220 | kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); |
| @@ -5486,6 +5489,76 @@ void kvm_disable_tdp(void) | |||
| 5486 | } | 5489 | } |
| 5487 | EXPORT_SYMBOL_GPL(kvm_disable_tdp); | 5490 | EXPORT_SYMBOL_GPL(kvm_disable_tdp); |
| 5488 | 5491 | ||
| 5492 | |||
| 5493 | /* The return value indicates if tlb flush on all vcpus is needed. */ | ||
| 5494 | typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head); | ||
| 5495 | |||
| 5496 | /* The caller should hold mmu-lock before calling this function. */ | ||
| 5497 | static __always_inline bool | ||
| 5498 | slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
| 5499 | slot_level_handler fn, int start_level, int end_level, | ||
| 5500 | gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb) | ||
| 5501 | { | ||
| 5502 | struct slot_rmap_walk_iterator iterator; | ||
| 5503 | bool flush = false; | ||
| 5504 | |||
| 5505 | for_each_slot_rmap_range(memslot, start_level, end_level, start_gfn, | ||
| 5506 | end_gfn, &iterator) { | ||
| 5507 | if (iterator.rmap) | ||
| 5508 | flush |= fn(kvm, iterator.rmap); | ||
| 5509 | |||
| 5510 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { | ||
| 5511 | if (flush && lock_flush_tlb) { | ||
| 5512 | kvm_flush_remote_tlbs(kvm); | ||
| 5513 | flush = false; | ||
| 5514 | } | ||
| 5515 | cond_resched_lock(&kvm->mmu_lock); | ||
| 5516 | } | ||
| 5517 | } | ||
| 5518 | |||
| 5519 | if (flush && lock_flush_tlb) { | ||
| 5520 | kvm_flush_remote_tlbs(kvm); | ||
| 5521 | flush = false; | ||
| 5522 | } | ||
| 5523 | |||
| 5524 | return flush; | ||
| 5525 | } | ||
| 5526 | |||
| 5527 | static __always_inline bool | ||
| 5528 | slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
| 5529 | slot_level_handler fn, int start_level, int end_level, | ||
| 5530 | bool lock_flush_tlb) | ||
| 5531 | { | ||
| 5532 | return slot_handle_level_range(kvm, memslot, fn, start_level, | ||
| 5533 | end_level, memslot->base_gfn, | ||
| 5534 | memslot->base_gfn + memslot->npages - 1, | ||
| 5535 | lock_flush_tlb); | ||
| 5536 | } | ||
| 5537 | |||
| 5538 | static __always_inline bool | ||
| 5539 | slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
| 5540 | slot_level_handler fn, bool lock_flush_tlb) | ||
| 5541 | { | ||
| 5542 | return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL, | ||
| 5543 | PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); | ||
| 5544 | } | ||
| 5545 | |||
| 5546 | static __always_inline bool | ||
| 5547 | slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
| 5548 | slot_level_handler fn, bool lock_flush_tlb) | ||
| 5549 | { | ||
| 5550 | return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL + 1, | ||
| 5551 | PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); | ||
| 5552 | } | ||
| 5553 | |||
| 5554 | static __always_inline bool | ||
| 5555 | slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
| 5556 | slot_level_handler fn, bool lock_flush_tlb) | ||
| 5557 | { | ||
| 5558 | return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL, | ||
| 5559 | PT_PAGE_TABLE_LEVEL, lock_flush_tlb); | ||
| 5560 | } | ||
| 5561 | |||
| 5489 | static void free_mmu_pages(struct kvm_vcpu *vcpu) | 5562 | static void free_mmu_pages(struct kvm_vcpu *vcpu) |
| 5490 | { | 5563 | { |
| 5491 | free_page((unsigned long)vcpu->arch.mmu->pae_root); | 5564 | free_page((unsigned long)vcpu->arch.mmu->pae_root); |
| @@ -5505,7 +5578,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) | |||
| 5505 | * Therefore we need to allocate shadow page tables in the first | 5578 | * Therefore we need to allocate shadow page tables in the first |
| 5506 | * 4GB of memory, which happens to fit the DMA32 zone. | 5579 | * 4GB of memory, which happens to fit the DMA32 zone. |
| 5507 | */ | 5580 | */ |
| 5508 | page = alloc_page(GFP_KERNEL | __GFP_DMA32); | 5581 | page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_DMA32); |
| 5509 | if (!page) | 5582 | if (!page) |
| 5510 | return -ENOMEM; | 5583 | return -ENOMEM; |
| 5511 | 5584 | ||
| @@ -5543,105 +5616,62 @@ static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm, | |||
| 5543 | struct kvm_memory_slot *slot, | 5616 | struct kvm_memory_slot *slot, |
| 5544 | struct kvm_page_track_notifier_node *node) | 5617 | struct kvm_page_track_notifier_node *node) |
| 5545 | { | 5618 | { |
| 5546 | kvm_mmu_invalidate_zap_all_pages(kvm); | 5619 | struct kvm_mmu_page *sp; |
| 5547 | } | 5620 | LIST_HEAD(invalid_list); |
| 5548 | 5621 | unsigned long i; | |
| 5549 | void kvm_mmu_init_vm(struct kvm *kvm) | 5622 | bool flush; |
| 5550 | { | 5623 | gfn_t gfn; |
| 5551 | struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; | ||
| 5552 | |||
| 5553 | node->track_write = kvm_mmu_pte_write; | ||
| 5554 | node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot; | ||
| 5555 | kvm_page_track_register_notifier(kvm, node); | ||
| 5556 | } | ||
| 5557 | 5624 | ||
| 5558 | void kvm_mmu_uninit_vm(struct kvm *kvm) | 5625 | spin_lock(&kvm->mmu_lock); |
| 5559 | { | ||
| 5560 | struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; | ||
| 5561 | 5626 | ||
| 5562 | kvm_page_track_unregister_notifier(kvm, node); | 5627 | if (list_empty(&kvm->arch.active_mmu_pages)) |
| 5563 | } | 5628 | goto out_unlock; |
| 5564 | 5629 | ||
| 5565 | /* The return value indicates if tlb flush on all vcpus is needed. */ | 5630 | flush = slot_handle_all_level(kvm, slot, kvm_zap_rmapp, false); |
| 5566 | typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head); | ||
| 5567 | 5631 | ||
| 5568 | /* The caller should hold mmu-lock before calling this function. */ | 5632 | for (i = 0; i < slot->npages; i++) { |
| 5569 | static __always_inline bool | 5633 | gfn = slot->base_gfn + i; |
| 5570 | slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
| 5571 | slot_level_handler fn, int start_level, int end_level, | ||
| 5572 | gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb) | ||
| 5573 | { | ||
| 5574 | struct slot_rmap_walk_iterator iterator; | ||
| 5575 | bool flush = false; | ||
| 5576 | 5634 | ||
| 5577 | for_each_slot_rmap_range(memslot, start_level, end_level, start_gfn, | 5635 | for_each_valid_sp(kvm, sp, gfn) { |
| 5578 | end_gfn, &iterator) { | 5636 | if (sp->gfn != gfn) |
| 5579 | if (iterator.rmap) | 5637 | continue; |
| 5580 | flush |= fn(kvm, iterator.rmap); | ||
| 5581 | 5638 | ||
| 5639 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); | ||
| 5640 | } | ||
| 5582 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { | 5641 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { |
| 5583 | if (flush && lock_flush_tlb) { | 5642 | kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); |
| 5584 | kvm_flush_remote_tlbs(kvm); | 5643 | flush = false; |
| 5585 | flush = false; | ||
| 5586 | } | ||
| 5587 | cond_resched_lock(&kvm->mmu_lock); | 5644 | cond_resched_lock(&kvm->mmu_lock); |
| 5588 | } | 5645 | } |
| 5589 | } | 5646 | } |
| 5647 | kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); | ||
| 5590 | 5648 | ||
| 5591 | if (flush && lock_flush_tlb) { | 5649 | out_unlock: |
| 5592 | kvm_flush_remote_tlbs(kvm); | 5650 | spin_unlock(&kvm->mmu_lock); |
| 5593 | flush = false; | ||
| 5594 | } | ||
| 5595 | |||
| 5596 | return flush; | ||
| 5597 | } | 5651 | } |
| 5598 | 5652 | ||
| 5599 | static __always_inline bool | 5653 | void kvm_mmu_init_vm(struct kvm *kvm) |
| 5600 | slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
| 5601 | slot_level_handler fn, int start_level, int end_level, | ||
| 5602 | bool lock_flush_tlb) | ||
| 5603 | { | 5654 | { |
| 5604 | return slot_handle_level_range(kvm, memslot, fn, start_level, | 5655 | struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; |
| 5605 | end_level, memslot->base_gfn, | ||
| 5606 | memslot->base_gfn + memslot->npages - 1, | ||
| 5607 | lock_flush_tlb); | ||
| 5608 | } | ||
| 5609 | 5656 | ||
| 5610 | static __always_inline bool | 5657 | node->track_write = kvm_mmu_pte_write; |
| 5611 | slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot, | 5658 | node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot; |
| 5612 | slot_level_handler fn, bool lock_flush_tlb) | 5659 | kvm_page_track_register_notifier(kvm, node); |
| 5613 | { | ||
| 5614 | return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL, | ||
| 5615 | PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); | ||
| 5616 | } | 5660 | } |
| 5617 | 5661 | ||
| 5618 | static __always_inline bool | 5662 | void kvm_mmu_uninit_vm(struct kvm *kvm) |
| 5619 | slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
| 5620 | slot_level_handler fn, bool lock_flush_tlb) | ||
| 5621 | { | 5663 | { |
| 5622 | return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL + 1, | 5664 | struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; |
| 5623 | PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); | ||
| 5624 | } | ||
| 5625 | 5665 | ||
| 5626 | static __always_inline bool | 5666 | kvm_page_track_unregister_notifier(kvm, node); |
| 5627 | slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||
| 5628 | slot_level_handler fn, bool lock_flush_tlb) | ||
| 5629 | { | ||
| 5630 | return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL, | ||
| 5631 | PT_PAGE_TABLE_LEVEL, lock_flush_tlb); | ||
| 5632 | } | 5667 | } |
| 5633 | 5668 | ||
| 5634 | void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) | 5669 | void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) |
| 5635 | { | 5670 | { |
| 5636 | struct kvm_memslots *slots; | 5671 | struct kvm_memslots *slots; |
| 5637 | struct kvm_memory_slot *memslot; | 5672 | struct kvm_memory_slot *memslot; |
| 5638 | bool flush_tlb = true; | ||
| 5639 | bool flush = false; | ||
| 5640 | int i; | 5673 | int i; |
| 5641 | 5674 | ||
| 5642 | if (kvm_available_flush_tlb_with_range()) | ||
| 5643 | flush_tlb = false; | ||
| 5644 | |||
| 5645 | spin_lock(&kvm->mmu_lock); | 5675 | spin_lock(&kvm->mmu_lock); |
| 5646 | for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { | 5676 | for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { |
| 5647 | slots = __kvm_memslots(kvm, i); | 5677 | slots = __kvm_memslots(kvm, i); |
| @@ -5653,17 +5683,12 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) | |||
| 5653 | if (start >= end) | 5683 | if (start >= end) |
| 5654 | continue; | 5684 | continue; |
| 5655 | 5685 | ||
| 5656 | flush |= slot_handle_level_range(kvm, memslot, | 5686 | slot_handle_level_range(kvm, memslot, kvm_zap_rmapp, |
| 5657 | kvm_zap_rmapp, PT_PAGE_TABLE_LEVEL, | 5687 | PT_PAGE_TABLE_LEVEL, PT_MAX_HUGEPAGE_LEVEL, |
| 5658 | PT_MAX_HUGEPAGE_LEVEL, start, | 5688 | start, end - 1, true); |
| 5659 | end - 1, flush_tlb); | ||
| 5660 | } | 5689 | } |
| 5661 | } | 5690 | } |
| 5662 | 5691 | ||
| 5663 | if (flush) | ||
| 5664 | kvm_flush_remote_tlbs_with_address(kvm, gfn_start, | ||
| 5665 | gfn_end - gfn_start + 1); | ||
| 5666 | |||
| 5667 | spin_unlock(&kvm->mmu_lock); | 5692 | spin_unlock(&kvm->mmu_lock); |
| 5668 | } | 5693 | } |
| 5669 | 5694 | ||
| @@ -5815,101 +5840,58 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm, | |||
| 5815 | } | 5840 | } |
| 5816 | EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty); | 5841 | EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty); |
| 5817 | 5842 | ||
| 5818 | #define BATCH_ZAP_PAGES 10 | 5843 | static void __kvm_mmu_zap_all(struct kvm *kvm, bool mmio_only) |
| 5819 | static void kvm_zap_obsolete_pages(struct kvm *kvm) | ||
| 5820 | { | 5844 | { |
| 5821 | struct kvm_mmu_page *sp, *node; | 5845 | struct kvm_mmu_page *sp, *node; |
| 5822 | int batch = 0; | 5846 | LIST_HEAD(invalid_list); |
| 5847 | int ign; | ||
| 5823 | 5848 | ||
| 5849 | spin_lock(&kvm->mmu_lock); | ||
| 5824 | restart: | 5850 | restart: |
| 5825 | list_for_each_entry_safe_reverse(sp, node, | 5851 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { |
| 5826 | &kvm->arch.active_mmu_pages, link) { | 5852 | if (mmio_only && !sp->mmio_cached) |
| 5827 | int ret; | ||
| 5828 | |||
| 5829 | /* | ||
| 5830 | * No obsolete page exists before new created page since | ||
| 5831 | * active_mmu_pages is the FIFO list. | ||
| 5832 | */ | ||
| 5833 | if (!is_obsolete_sp(kvm, sp)) | ||
| 5834 | break; | ||
| 5835 | |||
| 5836 | /* | ||
| 5837 | * Since we are reversely walking the list and the invalid | ||
| 5838 | * list will be moved to the head, skip the invalid page | ||
| 5839 | * can help us to avoid the infinity list walking. | ||
| 5840 | */ | ||
| 5841 | if (sp->role.invalid) | ||
| 5842 | continue; | 5853 | continue; |
| 5843 | 5854 | if (sp->role.invalid && sp->root_count) | |
| 5844 | /* | 5855 | continue; |
| 5845 | * Need not flush tlb since we only zap the sp with invalid | 5856 | if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign)) { |
| 5846 | * generation number. | 5857 | WARN_ON_ONCE(mmio_only); |
| 5847 | */ | ||
| 5848 | if (batch >= BATCH_ZAP_PAGES && | ||
| 5849 | cond_resched_lock(&kvm->mmu_lock)) { | ||
| 5850 | batch = 0; | ||
| 5851 | goto restart; | 5858 | goto restart; |
| 5852 | } | 5859 | } |
| 5853 | 5860 | if (cond_resched_lock(&kvm->mmu_lock)) | |
| 5854 | ret = kvm_mmu_prepare_zap_page(kvm, sp, | ||
| 5855 | &kvm->arch.zapped_obsolete_pages); | ||
| 5856 | batch += ret; | ||
| 5857 | |||
| 5858 | if (ret) | ||
| 5859 | goto restart; | 5861 | goto restart; |
| 5860 | } | 5862 | } |
| 5861 | 5863 | ||
| 5862 | /* | 5864 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
| 5863 | * Should flush tlb before free page tables since lockless-walking | ||
| 5864 | * may use the pages. | ||
| 5865 | */ | ||
| 5866 | kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages); | ||
| 5867 | } | ||
| 5868 | |||
| 5869 | /* | ||
| 5870 | * Fast invalidate all shadow pages and use lock-break technique | ||
| 5871 | * to zap obsolete pages. | ||
| 5872 | * | ||
| 5873 | * It's required when memslot is being deleted or VM is being | ||
| 5874 | * destroyed, in these cases, we should ensure that KVM MMU does | ||
| 5875 | * not use any resource of the being-deleted slot or all slots | ||
| 5876 | * after calling the function. | ||
| 5877 | */ | ||
| 5878 | void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm) | ||
| 5879 | { | ||
| 5880 | spin_lock(&kvm->mmu_lock); | ||
| 5881 | trace_kvm_mmu_invalidate_zap_all_pages(kvm); | ||
| 5882 | kvm->arch.mmu_valid_gen++; | ||
| 5883 | |||
| 5884 | /* | ||
| 5885 | * Notify all vcpus to reload its shadow page table | ||
| 5886 | * and flush TLB. Then all vcpus will switch to new | ||
| 5887 | * shadow page table with the new mmu_valid_gen. | ||
| 5888 | * | ||
| 5889 | * Note: we should do this under the protection of | ||
| 5890 | * mmu-lock, otherwise, vcpu would purge shadow page | ||
| 5891 | * but miss tlb flush. | ||
| 5892 | */ | ||
| 5893 | kvm_reload_remote_mmus(kvm); | ||
| 5894 | |||
| 5895 | kvm_zap_obsolete_pages(kvm); | ||
| 5896 | spin_unlock(&kvm->mmu_lock); | 5865 | spin_unlock(&kvm->mmu_lock); |
| 5897 | } | 5866 | } |
| 5898 | 5867 | ||
| 5899 | static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) | 5868 | void kvm_mmu_zap_all(struct kvm *kvm) |
| 5900 | { | 5869 | { |
| 5901 | return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages)); | 5870 | return __kvm_mmu_zap_all(kvm, false); |
| 5902 | } | 5871 | } |
| 5903 | 5872 | ||
| 5904 | void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots) | 5873 | void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen) |
| 5905 | { | 5874 | { |
| 5875 | WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS); | ||
| 5876 | |||
| 5877 | gen &= MMIO_SPTE_GEN_MASK; | ||
| 5878 | |||
| 5906 | /* | 5879 | /* |
| 5907 | * The very rare case: if the generation-number is round, | 5880 | * Generation numbers are incremented in multiples of the number of |
| 5881 | * address spaces in order to provide unique generations across all | ||
| 5882 | * address spaces. Strip what is effectively the address space | ||
| 5883 | * modifier prior to checking for a wrap of the MMIO generation so | ||
| 5884 | * that a wrap in any address space is detected. | ||
| 5885 | */ | ||
| 5886 | gen &= ~((u64)KVM_ADDRESS_SPACE_NUM - 1); | ||
| 5887 | |||
| 5888 | /* | ||
| 5889 | * The very rare case: if the MMIO generation number has wrapped, | ||
| 5908 | * zap all shadow pages. | 5890 | * zap all shadow pages. |
| 5909 | */ | 5891 | */ |
| 5910 | if (unlikely((slots->generation & MMIO_GEN_MASK) == 0)) { | 5892 | if (unlikely(gen == 0)) { |
| 5911 | kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n"); | 5893 | kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n"); |
| 5912 | kvm_mmu_invalidate_zap_all_pages(kvm); | 5894 | __kvm_mmu_zap_all(kvm, true); |
| 5913 | } | 5895 | } |
| 5914 | } | 5896 | } |
| 5915 | 5897 | ||
| @@ -5940,24 +5922,16 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) | |||
| 5940 | * want to shrink a VM that only started to populate its MMU | 5922 | * want to shrink a VM that only started to populate its MMU |
| 5941 | * anyway. | 5923 | * anyway. |
| 5942 | */ | 5924 | */ |
| 5943 | if (!kvm->arch.n_used_mmu_pages && | 5925 | if (!kvm->arch.n_used_mmu_pages) |
| 5944 | !kvm_has_zapped_obsolete_pages(kvm)) | ||
| 5945 | continue; | 5926 | continue; |
| 5946 | 5927 | ||
| 5947 | idx = srcu_read_lock(&kvm->srcu); | 5928 | idx = srcu_read_lock(&kvm->srcu); |
| 5948 | spin_lock(&kvm->mmu_lock); | 5929 | spin_lock(&kvm->mmu_lock); |
| 5949 | 5930 | ||
| 5950 | if (kvm_has_zapped_obsolete_pages(kvm)) { | ||
| 5951 | kvm_mmu_commit_zap_page(kvm, | ||
| 5952 | &kvm->arch.zapped_obsolete_pages); | ||
| 5953 | goto unlock; | ||
| 5954 | } | ||
| 5955 | |||
| 5956 | if (prepare_zap_oldest_mmu_page(kvm, &invalid_list)) | 5931 | if (prepare_zap_oldest_mmu_page(kvm, &invalid_list)) |
| 5957 | freed++; | 5932 | freed++; |
| 5958 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 5933 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
| 5959 | 5934 | ||
| 5960 | unlock: | ||
| 5961 | spin_unlock(&kvm->mmu_lock); | 5935 | spin_unlock(&kvm->mmu_lock); |
| 5962 | srcu_read_unlock(&kvm->srcu, idx); | 5936 | srcu_read_unlock(&kvm->srcu, idx); |
| 5963 | 5937 | ||
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index c7b333147c4a..bbdc60f2fae8 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
| @@ -203,7 +203,6 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | |||
| 203 | return -(u32)fault & errcode; | 203 | return -(u32)fault & errcode; |
| 204 | } | 204 | } |
| 205 | 205 | ||
| 206 | void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); | ||
| 207 | void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end); | 206 | void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end); |
| 208 | 207 | ||
| 209 | void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); | 208 | void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); |
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index c73bf4e4988c..9f6c855a0043 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
| @@ -8,18 +8,16 @@ | |||
| 8 | #undef TRACE_SYSTEM | 8 | #undef TRACE_SYSTEM |
| 9 | #define TRACE_SYSTEM kvmmmu | 9 | #define TRACE_SYSTEM kvmmmu |
| 10 | 10 | ||
| 11 | #define KVM_MMU_PAGE_FIELDS \ | 11 | #define KVM_MMU_PAGE_FIELDS \ |
| 12 | __field(unsigned long, mmu_valid_gen) \ | 12 | __field(__u64, gfn) \ |
| 13 | __field(__u64, gfn) \ | 13 | __field(__u32, role) \ |
| 14 | __field(__u32, role) \ | 14 | __field(__u32, root_count) \ |
| 15 | __field(__u32, root_count) \ | ||
| 16 | __field(bool, unsync) | 15 | __field(bool, unsync) |
| 17 | 16 | ||
| 18 | #define KVM_MMU_PAGE_ASSIGN(sp) \ | 17 | #define KVM_MMU_PAGE_ASSIGN(sp) \ |
| 19 | __entry->mmu_valid_gen = sp->mmu_valid_gen; \ | 18 | __entry->gfn = sp->gfn; \ |
| 20 | __entry->gfn = sp->gfn; \ | 19 | __entry->role = sp->role.word; \ |
| 21 | __entry->role = sp->role.word; \ | 20 | __entry->root_count = sp->root_count; \ |
| 22 | __entry->root_count = sp->root_count; \ | ||
| 23 | __entry->unsync = sp->unsync; | 21 | __entry->unsync = sp->unsync; |
| 24 | 22 | ||
| 25 | #define KVM_MMU_PAGE_PRINTK() ({ \ | 23 | #define KVM_MMU_PAGE_PRINTK() ({ \ |
| @@ -31,9 +29,8 @@ | |||
| 31 | \ | 29 | \ |
| 32 | role.word = __entry->role; \ | 30 | role.word = __entry->role; \ |
| 33 | \ | 31 | \ |
| 34 | trace_seq_printf(p, "sp gen %lx gfn %llx l%u%s q%u%s %s%s" \ | 32 | trace_seq_printf(p, "sp gfn %llx l%u%s q%u%s %s%s" \ |
| 35 | " %snxe %sad root %u %s%c", \ | 33 | " %snxe %sad root %u %s%c", \ |
| 36 | __entry->mmu_valid_gen, \ | ||
| 37 | __entry->gfn, role.level, \ | 34 | __entry->gfn, role.level, \ |
| 38 | role.cr4_pae ? " pae" : "", \ | 35 | role.cr4_pae ? " pae" : "", \ |
| 39 | role.quadrant, \ | 36 | role.quadrant, \ |
| @@ -283,27 +280,6 @@ TRACE_EVENT( | |||
| 283 | ); | 280 | ); |
| 284 | 281 | ||
| 285 | TRACE_EVENT( | 282 | TRACE_EVENT( |
| 286 | kvm_mmu_invalidate_zap_all_pages, | ||
| 287 | TP_PROTO(struct kvm *kvm), | ||
| 288 | TP_ARGS(kvm), | ||
| 289 | |||
| 290 | TP_STRUCT__entry( | ||
| 291 | __field(unsigned long, mmu_valid_gen) | ||
| 292 | __field(unsigned int, mmu_used_pages) | ||
| 293 | ), | ||
| 294 | |||
| 295 | TP_fast_assign( | ||
| 296 | __entry->mmu_valid_gen = kvm->arch.mmu_valid_gen; | ||
| 297 | __entry->mmu_used_pages = kvm->arch.n_used_mmu_pages; | ||
| 298 | ), | ||
| 299 | |||
| 300 | TP_printk("kvm-mmu-valid-gen %lx used_pages %x", | ||
| 301 | __entry->mmu_valid_gen, __entry->mmu_used_pages | ||
| 302 | ) | ||
| 303 | ); | ||
| 304 | |||
| 305 | |||
| 306 | TRACE_EVENT( | ||
| 307 | check_mmio_spte, | 283 | check_mmio_spte, |
| 308 | TP_PROTO(u64 spte, unsigned int kvm_gen, unsigned int spte_gen), | 284 | TP_PROTO(u64 spte, unsigned int kvm_gen, unsigned int spte_gen), |
| 309 | TP_ARGS(spte, kvm_gen, spte_gen), | 285 | TP_ARGS(spte, kvm_gen, spte_gen), |
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c index 3052a59a3065..fd04d462fdae 100644 --- a/arch/x86/kvm/page_track.c +++ b/arch/x86/kvm/page_track.c | |||
| @@ -42,7 +42,7 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, | |||
| 42 | for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) { | 42 | for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) { |
| 43 | slot->arch.gfn_track[i] = | 43 | slot->arch.gfn_track[i] = |
| 44 | kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]), | 44 | kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]), |
| 45 | GFP_KERNEL); | 45 | GFP_KERNEL_ACCOUNT); |
| 46 | if (!slot->arch.gfn_track[i]) | 46 | if (!slot->arch.gfn_track[i]) |
| 47 | goto track_free; | 47 | goto track_free; |
| 48 | } | 48 | } |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f13a3a24d360..b5b128a0a051 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
| @@ -145,7 +145,6 @@ struct kvm_svm { | |||
| 145 | 145 | ||
| 146 | /* Struct members for AVIC */ | 146 | /* Struct members for AVIC */ |
| 147 | u32 avic_vm_id; | 147 | u32 avic_vm_id; |
| 148 | u32 ldr_mode; | ||
| 149 | struct page *avic_logical_id_table_page; | 148 | struct page *avic_logical_id_table_page; |
| 150 | struct page *avic_physical_id_table_page; | 149 | struct page *avic_physical_id_table_page; |
| 151 | struct hlist_node hnode; | 150 | struct hlist_node hnode; |
| @@ -236,6 +235,7 @@ struct vcpu_svm { | |||
| 236 | bool nrips_enabled : 1; | 235 | bool nrips_enabled : 1; |
| 237 | 236 | ||
| 238 | u32 ldr_reg; | 237 | u32 ldr_reg; |
| 238 | u32 dfr_reg; | ||
| 239 | struct page *avic_backing_page; | 239 | struct page *avic_backing_page; |
| 240 | u64 *avic_physical_id_cache; | 240 | u64 *avic_physical_id_cache; |
| 241 | bool avic_is_running; | 241 | bool avic_is_running; |
| @@ -1795,9 +1795,10 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, | |||
| 1795 | /* Avoid using vmalloc for smaller buffers. */ | 1795 | /* Avoid using vmalloc for smaller buffers. */ |
| 1796 | size = npages * sizeof(struct page *); | 1796 | size = npages * sizeof(struct page *); |
| 1797 | if (size > PAGE_SIZE) | 1797 | if (size > PAGE_SIZE) |
| 1798 | pages = vmalloc(size); | 1798 | pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO, |
| 1799 | PAGE_KERNEL); | ||
| 1799 | else | 1800 | else |
| 1800 | pages = kmalloc(size, GFP_KERNEL); | 1801 | pages = kmalloc(size, GFP_KERNEL_ACCOUNT); |
| 1801 | 1802 | ||
| 1802 | if (!pages) | 1803 | if (!pages) |
| 1803 | return NULL; | 1804 | return NULL; |
| @@ -1865,7 +1866,9 @@ static void __unregister_enc_region_locked(struct kvm *kvm, | |||
| 1865 | 1866 | ||
| 1866 | static struct kvm *svm_vm_alloc(void) | 1867 | static struct kvm *svm_vm_alloc(void) |
| 1867 | { | 1868 | { |
| 1868 | struct kvm_svm *kvm_svm = vzalloc(sizeof(struct kvm_svm)); | 1869 | struct kvm_svm *kvm_svm = __vmalloc(sizeof(struct kvm_svm), |
| 1870 | GFP_KERNEL_ACCOUNT | __GFP_ZERO, | ||
| 1871 | PAGE_KERNEL); | ||
| 1869 | return &kvm_svm->kvm; | 1872 | return &kvm_svm->kvm; |
| 1870 | } | 1873 | } |
| 1871 | 1874 | ||
| @@ -1940,7 +1943,7 @@ static int avic_vm_init(struct kvm *kvm) | |||
| 1940 | return 0; | 1943 | return 0; |
| 1941 | 1944 | ||
| 1942 | /* Allocating physical APIC ID table (4KB) */ | 1945 | /* Allocating physical APIC ID table (4KB) */ |
| 1943 | p_page = alloc_page(GFP_KERNEL); | 1946 | p_page = alloc_page(GFP_KERNEL_ACCOUNT); |
| 1944 | if (!p_page) | 1947 | if (!p_page) |
| 1945 | goto free_avic; | 1948 | goto free_avic; |
| 1946 | 1949 | ||
| @@ -1948,7 +1951,7 @@ static int avic_vm_init(struct kvm *kvm) | |||
| 1948 | clear_page(page_address(p_page)); | 1951 | clear_page(page_address(p_page)); |
| 1949 | 1952 | ||
| 1950 | /* Allocating logical APIC ID table (4KB) */ | 1953 | /* Allocating logical APIC ID table (4KB) */ |
| 1951 | l_page = alloc_page(GFP_KERNEL); | 1954 | l_page = alloc_page(GFP_KERNEL_ACCOUNT); |
| 1952 | if (!l_page) | 1955 | if (!l_page) |
| 1953 | goto free_avic; | 1956 | goto free_avic; |
| 1954 | 1957 | ||
| @@ -2106,6 +2109,7 @@ static int avic_init_vcpu(struct vcpu_svm *svm) | |||
| 2106 | 2109 | ||
| 2107 | INIT_LIST_HEAD(&svm->ir_list); | 2110 | INIT_LIST_HEAD(&svm->ir_list); |
| 2108 | spin_lock_init(&svm->ir_list_lock); | 2111 | spin_lock_init(&svm->ir_list_lock); |
| 2112 | svm->dfr_reg = APIC_DFR_FLAT; | ||
| 2109 | 2113 | ||
| 2110 | return ret; | 2114 | return ret; |
| 2111 | } | 2115 | } |
| @@ -2119,13 +2123,14 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
| 2119 | struct page *nested_msrpm_pages; | 2123 | struct page *nested_msrpm_pages; |
| 2120 | int err; | 2124 | int err; |
| 2121 | 2125 | ||
| 2122 | svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); | 2126 | svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT); |
| 2123 | if (!svm) { | 2127 | if (!svm) { |
| 2124 | err = -ENOMEM; | 2128 | err = -ENOMEM; |
| 2125 | goto out; | 2129 | goto out; |
| 2126 | } | 2130 | } |
| 2127 | 2131 | ||
| 2128 | svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, GFP_KERNEL); | 2132 | svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, |
| 2133 | GFP_KERNEL_ACCOUNT); | ||
| 2129 | if (!svm->vcpu.arch.guest_fpu) { | 2134 | if (!svm->vcpu.arch.guest_fpu) { |
| 2130 | printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); | 2135 | printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); |
| 2131 | err = -ENOMEM; | 2136 | err = -ENOMEM; |
| @@ -2137,19 +2142,19 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
| 2137 | goto free_svm; | 2142 | goto free_svm; |
| 2138 | 2143 | ||
| 2139 | err = -ENOMEM; | 2144 | err = -ENOMEM; |
| 2140 | page = alloc_page(GFP_KERNEL); | 2145 | page = alloc_page(GFP_KERNEL_ACCOUNT); |
| 2141 | if (!page) | 2146 | if (!page) |
| 2142 | goto uninit; | 2147 | goto uninit; |
| 2143 | 2148 | ||
| 2144 | msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); | 2149 | msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER); |
| 2145 | if (!msrpm_pages) | 2150 | if (!msrpm_pages) |
| 2146 | goto free_page1; | 2151 | goto free_page1; |
| 2147 | 2152 | ||
| 2148 | nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); | 2153 | nested_msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER); |
| 2149 | if (!nested_msrpm_pages) | 2154 | if (!nested_msrpm_pages) |
| 2150 | goto free_page2; | 2155 | goto free_page2; |
| 2151 | 2156 | ||
| 2152 | hsave_page = alloc_page(GFP_KERNEL); | 2157 | hsave_page = alloc_page(GFP_KERNEL_ACCOUNT); |
| 2153 | if (!hsave_page) | 2158 | if (!hsave_page) |
| 2154 | goto free_page3; | 2159 | goto free_page3; |
| 2155 | 2160 | ||
| @@ -4565,8 +4570,7 @@ static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat) | |||
| 4565 | return &logical_apic_id_table[index]; | 4570 | return &logical_apic_id_table[index]; |
| 4566 | } | 4571 | } |
| 4567 | 4572 | ||
| 4568 | static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr, | 4573 | static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr) |
| 4569 | bool valid) | ||
| 4570 | { | 4574 | { |
| 4571 | bool flat; | 4575 | bool flat; |
| 4572 | u32 *entry, new_entry; | 4576 | u32 *entry, new_entry; |
| @@ -4579,31 +4583,39 @@ static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr, | |||
| 4579 | new_entry = READ_ONCE(*entry); | 4583 | new_entry = READ_ONCE(*entry); |
| 4580 | new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK; | 4584 | new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK; |
| 4581 | new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK); | 4585 | new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK); |
| 4582 | if (valid) | 4586 | new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK; |
| 4583 | new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK; | ||
| 4584 | else | ||
| 4585 | new_entry &= ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK; | ||
| 4586 | WRITE_ONCE(*entry, new_entry); | 4587 | WRITE_ONCE(*entry, new_entry); |
| 4587 | 4588 | ||
| 4588 | return 0; | 4589 | return 0; |
| 4589 | } | 4590 | } |
| 4590 | 4591 | ||
| 4592 | static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu) | ||
| 4593 | { | ||
| 4594 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 4595 | bool flat = svm->dfr_reg == APIC_DFR_FLAT; | ||
| 4596 | u32 *entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat); | ||
| 4597 | |||
| 4598 | if (entry) | ||
| 4599 | WRITE_ONCE(*entry, (u32) ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK); | ||
| 4600 | } | ||
| 4601 | |||
| 4591 | static int avic_handle_ldr_update(struct kvm_vcpu *vcpu) | 4602 | static int avic_handle_ldr_update(struct kvm_vcpu *vcpu) |
| 4592 | { | 4603 | { |
| 4593 | int ret; | 4604 | int ret = 0; |
| 4594 | struct vcpu_svm *svm = to_svm(vcpu); | 4605 | struct vcpu_svm *svm = to_svm(vcpu); |
| 4595 | u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR); | 4606 | u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR); |
| 4596 | 4607 | ||
| 4597 | if (!ldr) | 4608 | if (ldr == svm->ldr_reg) |
| 4598 | return 1; | 4609 | return 0; |
| 4599 | 4610 | ||
| 4600 | ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr, true); | 4611 | avic_invalidate_logical_id_entry(vcpu); |
| 4601 | if (ret && svm->ldr_reg) { | 4612 | |
| 4602 | avic_ldr_write(vcpu, 0, svm->ldr_reg, false); | 4613 | if (ldr) |
| 4603 | svm->ldr_reg = 0; | 4614 | ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr); |
| 4604 | } else { | 4615 | |
| 4616 | if (!ret) | ||
| 4605 | svm->ldr_reg = ldr; | 4617 | svm->ldr_reg = ldr; |
| 4606 | } | 4618 | |
| 4607 | return ret; | 4619 | return ret; |
| 4608 | } | 4620 | } |
| 4609 | 4621 | ||
| @@ -4637,27 +4649,16 @@ static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu) | |||
| 4637 | return 0; | 4649 | return 0; |
| 4638 | } | 4650 | } |
| 4639 | 4651 | ||
| 4640 | static int avic_handle_dfr_update(struct kvm_vcpu *vcpu) | 4652 | static void avic_handle_dfr_update(struct kvm_vcpu *vcpu) |
| 4641 | { | 4653 | { |
| 4642 | struct vcpu_svm *svm = to_svm(vcpu); | 4654 | struct vcpu_svm *svm = to_svm(vcpu); |
| 4643 | struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); | ||
| 4644 | u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR); | 4655 | u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR); |
| 4645 | u32 mod = (dfr >> 28) & 0xf; | ||
| 4646 | 4656 | ||
| 4647 | /* | 4657 | if (svm->dfr_reg == dfr) |
| 4648 | * We assume that all local APICs are using the same type. | 4658 | return; |
| 4649 | * If this changes, we need to flush the AVIC logical | ||
| 4650 | * APID id table. | ||
| 4651 | */ | ||
| 4652 | if (kvm_svm->ldr_mode == mod) | ||
| 4653 | return 0; | ||
| 4654 | |||
| 4655 | clear_page(page_address(kvm_svm->avic_logical_id_table_page)); | ||
| 4656 | kvm_svm->ldr_mode = mod; | ||
| 4657 | 4659 | ||
| 4658 | if (svm->ldr_reg) | 4660 | avic_invalidate_logical_id_entry(vcpu); |
| 4659 | avic_handle_ldr_update(vcpu); | 4661 | svm->dfr_reg = dfr; |
| 4660 | return 0; | ||
| 4661 | } | 4662 | } |
| 4662 | 4663 | ||
| 4663 | static int avic_unaccel_trap_write(struct vcpu_svm *svm) | 4664 | static int avic_unaccel_trap_write(struct vcpu_svm *svm) |
| @@ -5125,11 +5126,11 @@ static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) | |||
| 5125 | struct vcpu_svm *svm = to_svm(vcpu); | 5126 | struct vcpu_svm *svm = to_svm(vcpu); |
| 5126 | struct vmcb *vmcb = svm->vmcb; | 5127 | struct vmcb *vmcb = svm->vmcb; |
| 5127 | 5128 | ||
| 5128 | if (!kvm_vcpu_apicv_active(&svm->vcpu)) | 5129 | if (kvm_vcpu_apicv_active(vcpu)) |
| 5129 | return; | 5130 | vmcb->control.int_ctl |= AVIC_ENABLE_MASK; |
| 5130 | 5131 | else | |
| 5131 | vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK; | 5132 | vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK; |
| 5132 | mark_dirty(vmcb, VMCB_INTR); | 5133 | mark_dirty(vmcb, VMCB_AVIC); |
| 5133 | } | 5134 | } |
| 5134 | 5135 | ||
| 5135 | static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | 5136 | static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) |
| @@ -5195,7 +5196,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) | |||
| 5195 | * Allocating new amd_iommu_pi_data, which will get | 5196 | * Allocating new amd_iommu_pi_data, which will get |
| 5196 | * add to the per-vcpu ir_list. | 5197 | * add to the per-vcpu ir_list. |
| 5197 | */ | 5198 | */ |
| 5198 | ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL); | 5199 | ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT); |
| 5199 | if (!ir) { | 5200 | if (!ir) { |
| 5200 | ret = -ENOMEM; | 5201 | ret = -ENOMEM; |
| 5201 | goto out; | 5202 | goto out; |
| @@ -6163,8 +6164,7 @@ static inline void avic_post_state_restore(struct kvm_vcpu *vcpu) | |||
| 6163 | { | 6164 | { |
| 6164 | if (avic_handle_apic_id_update(vcpu) != 0) | 6165 | if (avic_handle_apic_id_update(vcpu) != 0) |
| 6165 | return; | 6166 | return; |
| 6166 | if (avic_handle_dfr_update(vcpu) != 0) | 6167 | avic_handle_dfr_update(vcpu); |
| 6167 | return; | ||
| 6168 | avic_handle_ldr_update(vcpu); | 6168 | avic_handle_ldr_update(vcpu); |
| 6169 | } | 6169 | } |
| 6170 | 6170 | ||
| @@ -6311,7 +6311,7 @@ static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) | |||
| 6311 | if (ret) | 6311 | if (ret) |
| 6312 | return ret; | 6312 | return ret; |
| 6313 | 6313 | ||
| 6314 | data = kzalloc(sizeof(*data), GFP_KERNEL); | 6314 | data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); |
| 6315 | if (!data) | 6315 | if (!data) |
| 6316 | return -ENOMEM; | 6316 | return -ENOMEM; |
| 6317 | 6317 | ||
| @@ -6361,7 +6361,7 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) | |||
| 6361 | if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) | 6361 | if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) |
| 6362 | return -EFAULT; | 6362 | return -EFAULT; |
| 6363 | 6363 | ||
| 6364 | start = kzalloc(sizeof(*start), GFP_KERNEL); | 6364 | start = kzalloc(sizeof(*start), GFP_KERNEL_ACCOUNT); |
| 6365 | if (!start) | 6365 | if (!start) |
| 6366 | return -ENOMEM; | 6366 | return -ENOMEM; |
| 6367 | 6367 | ||
| @@ -6458,7 +6458,7 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) | |||
| 6458 | if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) | 6458 | if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) |
| 6459 | return -EFAULT; | 6459 | return -EFAULT; |
| 6460 | 6460 | ||
| 6461 | data = kzalloc(sizeof(*data), GFP_KERNEL); | 6461 | data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); |
| 6462 | if (!data) | 6462 | if (!data) |
| 6463 | return -ENOMEM; | 6463 | return -ENOMEM; |
| 6464 | 6464 | ||
| @@ -6535,7 +6535,7 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) | |||
| 6535 | if (copy_from_user(¶ms, measure, sizeof(params))) | 6535 | if (copy_from_user(¶ms, measure, sizeof(params))) |
| 6536 | return -EFAULT; | 6536 | return -EFAULT; |
| 6537 | 6537 | ||
| 6538 | data = kzalloc(sizeof(*data), GFP_KERNEL); | 6538 | data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); |
| 6539 | if (!data) | 6539 | if (!data) |
| 6540 | return -ENOMEM; | 6540 | return -ENOMEM; |
| 6541 | 6541 | ||
| @@ -6597,7 +6597,7 @@ static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) | |||
| 6597 | if (!sev_guest(kvm)) | 6597 | if (!sev_guest(kvm)) |
| 6598 | return -ENOTTY; | 6598 | return -ENOTTY; |
| 6599 | 6599 | ||
| 6600 | data = kzalloc(sizeof(*data), GFP_KERNEL); | 6600 | data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); |
| 6601 | if (!data) | 6601 | if (!data) |
| 6602 | return -ENOMEM; | 6602 | return -ENOMEM; |
| 6603 | 6603 | ||
| @@ -6618,7 +6618,7 @@ static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp) | |||
| 6618 | if (!sev_guest(kvm)) | 6618 | if (!sev_guest(kvm)) |
| 6619 | return -ENOTTY; | 6619 | return -ENOTTY; |
| 6620 | 6620 | ||
| 6621 | data = kzalloc(sizeof(*data), GFP_KERNEL); | 6621 | data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); |
| 6622 | if (!data) | 6622 | if (!data) |
| 6623 | return -ENOMEM; | 6623 | return -ENOMEM; |
| 6624 | 6624 | ||
| @@ -6646,7 +6646,7 @@ static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src, | |||
| 6646 | struct sev_data_dbg *data; | 6646 | struct sev_data_dbg *data; |
| 6647 | int ret; | 6647 | int ret; |
| 6648 | 6648 | ||
| 6649 | data = kzalloc(sizeof(*data), GFP_KERNEL); | 6649 | data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); |
| 6650 | if (!data) | 6650 | if (!data) |
| 6651 | return -ENOMEM; | 6651 | return -ENOMEM; |
| 6652 | 6652 | ||
| @@ -6901,7 +6901,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) | |||
| 6901 | } | 6901 | } |
| 6902 | 6902 | ||
| 6903 | ret = -ENOMEM; | 6903 | ret = -ENOMEM; |
| 6904 | data = kzalloc(sizeof(*data), GFP_KERNEL); | 6904 | data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); |
| 6905 | if (!data) | 6905 | if (!data) |
| 6906 | goto e_unpin_memory; | 6906 | goto e_unpin_memory; |
| 6907 | 6907 | ||
| @@ -7007,7 +7007,7 @@ static int svm_register_enc_region(struct kvm *kvm, | |||
| 7007 | if (range->addr > ULONG_MAX || range->size > ULONG_MAX) | 7007 | if (range->addr > ULONG_MAX || range->size > ULONG_MAX) |
| 7008 | return -EINVAL; | 7008 | return -EINVAL; |
| 7009 | 7009 | ||
| 7010 | region = kzalloc(sizeof(*region), GFP_KERNEL); | 7010 | region = kzalloc(sizeof(*region), GFP_KERNEL_ACCOUNT); |
| 7011 | if (!region) | 7011 | if (!region) |
| 7012 | return -ENOMEM; | 7012 | return -ENOMEM; |
| 7013 | 7013 | ||
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index d737a51a53ca..f24a2c225070 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c | |||
| @@ -211,7 +211,6 @@ static void free_nested(struct kvm_vcpu *vcpu) | |||
| 211 | if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon) | 211 | if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon) |
| 212 | return; | 212 | return; |
| 213 | 213 | ||
| 214 | hrtimer_cancel(&vmx->nested.preemption_timer); | ||
| 215 | vmx->nested.vmxon = false; | 214 | vmx->nested.vmxon = false; |
| 216 | vmx->nested.smm.vmxon = false; | 215 | vmx->nested.smm.vmxon = false; |
| 217 | free_vpid(vmx->nested.vpid02); | 216 | free_vpid(vmx->nested.vpid02); |
| @@ -274,6 +273,7 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) | |||
| 274 | void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu) | 273 | void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu) |
| 275 | { | 274 | { |
| 276 | vcpu_load(vcpu); | 275 | vcpu_load(vcpu); |
| 276 | vmx_leave_nested(vcpu); | ||
| 277 | vmx_switch_vmcs(vcpu, &to_vmx(vcpu)->vmcs01); | 277 | vmx_switch_vmcs(vcpu, &to_vmx(vcpu)->vmcs01); |
| 278 | free_nested(vcpu); | 278 | free_nested(vcpu); |
| 279 | vcpu_put(vcpu); | 279 | vcpu_put(vcpu); |
| @@ -1980,17 +1980,6 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) | |||
| 1980 | prepare_vmcs02_early_full(vmx, vmcs12); | 1980 | prepare_vmcs02_early_full(vmx, vmcs12); |
| 1981 | 1981 | ||
| 1982 | /* | 1982 | /* |
| 1983 | * HOST_RSP is normally set correctly in vmx_vcpu_run() just before | ||
| 1984 | * entry, but only if the current (host) sp changed from the value | ||
| 1985 | * we wrote last (vmx->host_rsp). This cache is no longer relevant | ||
| 1986 | * if we switch vmcs, and rather than hold a separate cache per vmcs, | ||
| 1987 | * here we just force the write to happen on entry. host_rsp will | ||
| 1988 | * also be written unconditionally by nested_vmx_check_vmentry_hw() | ||
| 1989 | * if we are doing early consistency checks via hardware. | ||
| 1990 | */ | ||
| 1991 | vmx->host_rsp = 0; | ||
| 1992 | |||
| 1993 | /* | ||
| 1994 | * PIN CONTROLS | 1983 | * PIN CONTROLS |
| 1995 | */ | 1984 | */ |
| 1996 | exec_control = vmcs12->pin_based_vm_exec_control; | 1985 | exec_control = vmcs12->pin_based_vm_exec_control; |
| @@ -2289,10 +2278,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
| 2289 | } | 2278 | } |
| 2290 | vmx_set_rflags(vcpu, vmcs12->guest_rflags); | 2279 | vmx_set_rflags(vcpu, vmcs12->guest_rflags); |
| 2291 | 2280 | ||
| 2292 | vmx->nested.preemption_timer_expired = false; | ||
| 2293 | if (nested_cpu_has_preemption_timer(vmcs12)) | ||
| 2294 | vmx_start_preemption_timer(vcpu); | ||
| 2295 | |||
| 2296 | /* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the | 2281 | /* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the |
| 2297 | * bitwise-or of what L1 wants to trap for L2, and what we want to | 2282 | * bitwise-or of what L1 wants to trap for L2, and what we want to |
| 2298 | * trap. Note that CR0.TS also needs updating - we do this later. | 2283 | * trap. Note that CR0.TS also needs updating - we do this later. |
| @@ -2722,6 +2707,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) | |||
| 2722 | { | 2707 | { |
| 2723 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2708 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 2724 | unsigned long cr3, cr4; | 2709 | unsigned long cr3, cr4; |
| 2710 | bool vm_fail; | ||
| 2725 | 2711 | ||
| 2726 | if (!nested_early_check) | 2712 | if (!nested_early_check) |
| 2727 | return 0; | 2713 | return 0; |
| @@ -2755,29 +2741,34 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) | |||
| 2755 | vmx->loaded_vmcs->host_state.cr4 = cr4; | 2741 | vmx->loaded_vmcs->host_state.cr4 = cr4; |
| 2756 | } | 2742 | } |
| 2757 | 2743 | ||
| 2758 | vmx->__launched = vmx->loaded_vmcs->launched; | ||
| 2759 | |||
| 2760 | asm( | 2744 | asm( |
| 2761 | /* Set HOST_RSP */ | ||
| 2762 | "sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */ | 2745 | "sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */ |
| 2763 | __ex("vmwrite %%" _ASM_SP ", %%" _ASM_DX) "\n\t" | 2746 | "cmp %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t" |
| 2764 | "mov %%" _ASM_SP ", %c[host_rsp](%1)\n\t" | 2747 | "je 1f \n\t" |
| 2748 | __ex("vmwrite %%" _ASM_SP ", %[HOST_RSP]") "\n\t" | ||
| 2749 | "mov %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t" | ||
| 2750 | "1: \n\t" | ||
| 2765 | "add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */ | 2751 | "add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */ |
| 2766 | 2752 | ||
| 2767 | /* Check if vmlaunch or vmresume is needed */ | 2753 | /* Check if vmlaunch or vmresume is needed */ |
| 2768 | "cmpl $0, %c[launched](%% " _ASM_CX")\n\t" | 2754 | "cmpb $0, %c[launched](%[loaded_vmcs])\n\t" |
| 2769 | 2755 | ||
| 2756 | /* | ||
| 2757 | * VMLAUNCH and VMRESUME clear RFLAGS.{CF,ZF} on VM-Exit, set | ||
| 2758 | * RFLAGS.CF on VM-Fail Invalid and set RFLAGS.ZF on VM-Fail | ||
| 2759 | * Valid. vmx_vmenter() directly "returns" RFLAGS, and so the | ||
| 2760 | * results of VM-Enter is captured via CC_{SET,OUT} to vm_fail. | ||
| 2761 | */ | ||
| 2770 | "call vmx_vmenter\n\t" | 2762 | "call vmx_vmenter\n\t" |
| 2771 | 2763 | ||
| 2772 | /* Set vmx->fail accordingly */ | 2764 | CC_SET(be) |
| 2773 | "setbe %c[fail](%% " _ASM_CX")\n\t" | 2765 | : ASM_CALL_CONSTRAINT, CC_OUT(be) (vm_fail) |
| 2774 | : ASM_CALL_CONSTRAINT | 2766 | : [HOST_RSP]"r"((unsigned long)HOST_RSP), |
| 2775 | : "c"(vmx), "d"((unsigned long)HOST_RSP), | 2767 | [loaded_vmcs]"r"(vmx->loaded_vmcs), |
| 2776 | [launched]"i"(offsetof(struct vcpu_vmx, __launched)), | 2768 | [launched]"i"(offsetof(struct loaded_vmcs, launched)), |
| 2777 | [fail]"i"(offsetof(struct vcpu_vmx, fail)), | 2769 | [host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)), |
| 2778 | [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)), | ||
| 2779 | [wordsize]"i"(sizeof(ulong)) | 2770 | [wordsize]"i"(sizeof(ulong)) |
| 2780 | : "rax", "cc", "memory" | 2771 | : "cc", "memory" |
| 2781 | ); | 2772 | ); |
| 2782 | 2773 | ||
| 2783 | preempt_enable(); | 2774 | preempt_enable(); |
| @@ -2787,10 +2778,9 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) | |||
| 2787 | if (vmx->msr_autoload.guest.nr) | 2778 | if (vmx->msr_autoload.guest.nr) |
| 2788 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); | 2779 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); |
| 2789 | 2780 | ||
| 2790 | if (vmx->fail) { | 2781 | if (vm_fail) { |
| 2791 | WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) != | 2782 | WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) != |
| 2792 | VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 2783 | VMXERR_ENTRY_INVALID_CONTROL_FIELD); |
| 2793 | vmx->fail = 0; | ||
| 2794 | return 1; | 2784 | return 1; |
| 2795 | } | 2785 | } |
| 2796 | 2786 | ||
| @@ -2813,8 +2803,6 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) | |||
| 2813 | 2803 | ||
| 2814 | return 0; | 2804 | return 0; |
| 2815 | } | 2805 | } |
| 2816 | STACK_FRAME_NON_STANDARD(nested_vmx_check_vmentry_hw); | ||
| 2817 | |||
| 2818 | 2806 | ||
| 2819 | static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, | 2807 | static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, |
| 2820 | struct vmcs12 *vmcs12); | 2808 | struct vmcs12 *vmcs12); |
| @@ -3031,6 +3019,15 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) | |||
| 3031 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 3019 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
| 3032 | 3020 | ||
| 3033 | /* | 3021 | /* |
| 3022 | * Do not start the preemption timer hrtimer until after we know | ||
| 3023 | * we are successful, so that only nested_vmx_vmexit needs to cancel | ||
| 3024 | * the timer. | ||
| 3025 | */ | ||
| 3026 | vmx->nested.preemption_timer_expired = false; | ||
| 3027 | if (nested_cpu_has_preemption_timer(vmcs12)) | ||
| 3028 | vmx_start_preemption_timer(vcpu); | ||
| 3029 | |||
| 3030 | /* | ||
| 3034 | * Note no nested_vmx_succeed or nested_vmx_fail here. At this point | 3031 | * Note no nested_vmx_succeed or nested_vmx_fail here. At this point |
| 3035 | * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet | 3032 | * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet |
| 3036 | * returned as far as L1 is concerned. It will only return (and set | 3033 | * returned as far as L1 is concerned. It will only return (and set |
| @@ -3450,13 +3447,10 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
| 3450 | else | 3447 | else |
| 3451 | vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE; | 3448 | vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE; |
| 3452 | 3449 | ||
| 3453 | if (nested_cpu_has_preemption_timer(vmcs12)) { | 3450 | if (nested_cpu_has_preemption_timer(vmcs12) && |
| 3454 | if (vmcs12->vm_exit_controls & | 3451 | vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) |
| 3455 | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) | ||
| 3456 | vmcs12->vmx_preemption_timer_value = | 3452 | vmcs12->vmx_preemption_timer_value = |
| 3457 | vmx_get_preemption_timer_value(vcpu); | 3453 | vmx_get_preemption_timer_value(vcpu); |
| 3458 | hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer); | ||
| 3459 | } | ||
| 3460 | 3454 | ||
| 3461 | /* | 3455 | /* |
| 3462 | * In some cases (usually, nested EPT), L2 is allowed to change its | 3456 | * In some cases (usually, nested EPT), L2 is allowed to change its |
| @@ -3864,6 +3858,9 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
| 3864 | 3858 | ||
| 3865 | leave_guest_mode(vcpu); | 3859 | leave_guest_mode(vcpu); |
| 3866 | 3860 | ||
| 3861 | if (nested_cpu_has_preemption_timer(vmcs12)) | ||
| 3862 | hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer); | ||
| 3863 | |||
| 3867 | if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) | 3864 | if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) |
| 3868 | vcpu->arch.tsc_offset -= vmcs12->tsc_offset; | 3865 | vcpu->arch.tsc_offset -= vmcs12->tsc_offset; |
| 3869 | 3866 | ||
| @@ -3915,9 +3912,6 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
| 3915 | vmx_flush_tlb(vcpu, true); | 3912 | vmx_flush_tlb(vcpu, true); |
| 3916 | } | 3913 | } |
| 3917 | 3914 | ||
| 3918 | /* This is needed for same reason as it was needed in prepare_vmcs02 */ | ||
| 3919 | vmx->host_rsp = 0; | ||
| 3920 | |||
| 3921 | /* Unpin physical memory we referred to in vmcs02 */ | 3915 | /* Unpin physical memory we referred to in vmcs02 */ |
| 3922 | if (vmx->nested.apic_access_page) { | 3916 | if (vmx->nested.apic_access_page) { |
| 3923 | kvm_release_page_dirty(vmx->nested.apic_access_page); | 3917 | kvm_release_page_dirty(vmx->nested.apic_access_page); |
| @@ -4035,25 +4029,50 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, | |||
| 4035 | /* Addr = segment_base + offset */ | 4029 | /* Addr = segment_base + offset */ |
| 4036 | /* offset = base + [index * scale] + displacement */ | 4030 | /* offset = base + [index * scale] + displacement */ |
| 4037 | off = exit_qualification; /* holds the displacement */ | 4031 | off = exit_qualification; /* holds the displacement */ |
| 4032 | if (addr_size == 1) | ||
| 4033 | off = (gva_t)sign_extend64(off, 31); | ||
| 4034 | else if (addr_size == 0) | ||
| 4035 | off = (gva_t)sign_extend64(off, 15); | ||
| 4038 | if (base_is_valid) | 4036 | if (base_is_valid) |
| 4039 | off += kvm_register_read(vcpu, base_reg); | 4037 | off += kvm_register_read(vcpu, base_reg); |
| 4040 | if (index_is_valid) | 4038 | if (index_is_valid) |
| 4041 | off += kvm_register_read(vcpu, index_reg)<<scaling; | 4039 | off += kvm_register_read(vcpu, index_reg)<<scaling; |
| 4042 | vmx_get_segment(vcpu, &s, seg_reg); | 4040 | vmx_get_segment(vcpu, &s, seg_reg); |
| 4043 | *ret = s.base + off; | ||
| 4044 | 4041 | ||
| 4042 | /* | ||
| 4043 | * The effective address, i.e. @off, of a memory operand is truncated | ||
| 4044 | * based on the address size of the instruction. Note that this is | ||
| 4045 | * the *effective address*, i.e. the address prior to accounting for | ||
| 4046 | * the segment's base. | ||
| 4047 | */ | ||
| 4045 | if (addr_size == 1) /* 32 bit */ | 4048 | if (addr_size == 1) /* 32 bit */ |
| 4046 | *ret &= 0xffffffff; | 4049 | off &= 0xffffffff; |
| 4050 | else if (addr_size == 0) /* 16 bit */ | ||
| 4051 | off &= 0xffff; | ||
| 4047 | 4052 | ||
| 4048 | /* Checks for #GP/#SS exceptions. */ | 4053 | /* Checks for #GP/#SS exceptions. */ |
| 4049 | exn = false; | 4054 | exn = false; |
| 4050 | if (is_long_mode(vcpu)) { | 4055 | if (is_long_mode(vcpu)) { |
| 4056 | /* | ||
| 4057 | * The virtual/linear address is never truncated in 64-bit | ||
| 4058 | * mode, e.g. a 32-bit address size can yield a 64-bit virtual | ||
| 4059 | * address when using FS/GS with a non-zero base. | ||
| 4060 | */ | ||
| 4061 | *ret = s.base + off; | ||
| 4062 | |||
| 4051 | /* Long mode: #GP(0)/#SS(0) if the memory address is in a | 4063 | /* Long mode: #GP(0)/#SS(0) if the memory address is in a |
| 4052 | * non-canonical form. This is the only check on the memory | 4064 | * non-canonical form. This is the only check on the memory |
| 4053 | * destination for long mode! | 4065 | * destination for long mode! |
| 4054 | */ | 4066 | */ |
| 4055 | exn = is_noncanonical_address(*ret, vcpu); | 4067 | exn = is_noncanonical_address(*ret, vcpu); |
| 4056 | } else if (is_protmode(vcpu)) { | 4068 | } else { |
| 4069 | /* | ||
| 4070 | * When not in long mode, the virtual/linear address is | ||
| 4071 | * unconditionally truncated to 32 bits regardless of the | ||
| 4072 | * address size. | ||
| 4073 | */ | ||
| 4074 | *ret = (s.base + off) & 0xffffffff; | ||
| 4075 | |||
| 4057 | /* Protected mode: apply checks for segment validity in the | 4076 | /* Protected mode: apply checks for segment validity in the |
| 4058 | * following order: | 4077 | * following order: |
| 4059 | * - segment type check (#GP(0) may be thrown) | 4078 | * - segment type check (#GP(0) may be thrown) |
| @@ -4077,10 +4096,16 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, | |||
| 4077 | /* Protected mode: #GP(0)/#SS(0) if the segment is unusable. | 4096 | /* Protected mode: #GP(0)/#SS(0) if the segment is unusable. |
| 4078 | */ | 4097 | */ |
| 4079 | exn = (s.unusable != 0); | 4098 | exn = (s.unusable != 0); |
| 4080 | /* Protected mode: #GP(0)/#SS(0) if the memory | 4099 | |
| 4081 | * operand is outside the segment limit. | 4100 | /* |
| 4101 | * Protected mode: #GP(0)/#SS(0) if the memory operand is | ||
| 4102 | * outside the segment limit. All CPUs that support VMX ignore | ||
| 4103 | * limit checks for flat segments, i.e. segments with base==0, | ||
| 4104 | * limit==0xffffffff and of type expand-up data or code. | ||
| 4082 | */ | 4105 | */ |
| 4083 | exn = exn || (off + sizeof(u64) > s.limit); | 4106 | if (!(s.base == 0 && s.limit == 0xffffffff && |
| 4107 | ((s.type & 8) || !(s.type & 4)))) | ||
| 4108 | exn = exn || (off + sizeof(u64) > s.limit); | ||
| 4084 | } | 4109 | } |
| 4085 | if (exn) { | 4110 | if (exn) { |
| 4086 | kvm_queue_exception_e(vcpu, | 4111 | kvm_queue_exception_e(vcpu, |
| @@ -4145,11 +4170,11 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) | |||
| 4145 | if (r < 0) | 4170 | if (r < 0) |
| 4146 | goto out_vmcs02; | 4171 | goto out_vmcs02; |
| 4147 | 4172 | ||
| 4148 | vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL); | 4173 | vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT); |
| 4149 | if (!vmx->nested.cached_vmcs12) | 4174 | if (!vmx->nested.cached_vmcs12) |
| 4150 | goto out_cached_vmcs12; | 4175 | goto out_cached_vmcs12; |
| 4151 | 4176 | ||
| 4152 | vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL); | 4177 | vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT); |
| 4153 | if (!vmx->nested.cached_shadow_vmcs12) | 4178 | if (!vmx->nested.cached_shadow_vmcs12) |
| 4154 | goto out_cached_shadow_vmcs12; | 4179 | goto out_cached_shadow_vmcs12; |
| 4155 | 4180 | ||
| @@ -5696,6 +5721,10 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) | |||
| 5696 | enable_shadow_vmcs = 0; | 5721 | enable_shadow_vmcs = 0; |
| 5697 | if (enable_shadow_vmcs) { | 5722 | if (enable_shadow_vmcs) { |
| 5698 | for (i = 0; i < VMX_BITMAP_NR; i++) { | 5723 | for (i = 0; i < VMX_BITMAP_NR; i++) { |
| 5724 | /* | ||
| 5725 | * The vmx_bitmap is not tied to a VM and so should | ||
| 5726 | * not be charged to a memcg. | ||
| 5727 | */ | ||
| 5699 | vmx_bitmap[i] = (unsigned long *) | 5728 | vmx_bitmap[i] = (unsigned long *) |
| 5700 | __get_free_page(GFP_KERNEL); | 5729 | __get_free_page(GFP_KERNEL); |
| 5701 | if (!vmx_bitmap[i]) { | 5730 | if (!vmx_bitmap[i]) { |
diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index 6def3ba88e3b..cb6079f8a227 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h | |||
| @@ -34,6 +34,7 @@ struct vmcs_host_state { | |||
| 34 | unsigned long cr4; /* May not match real cr4 */ | 34 | unsigned long cr4; /* May not match real cr4 */ |
| 35 | unsigned long gs_base; | 35 | unsigned long gs_base; |
| 36 | unsigned long fs_base; | 36 | unsigned long fs_base; |
| 37 | unsigned long rsp; | ||
| 37 | 38 | ||
| 38 | u16 fs_sel, gs_sel, ldt_sel; | 39 | u16 fs_sel, gs_sel, ldt_sel; |
| 39 | #ifdef CONFIG_X86_64 | 40 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index bcef2c7e9bc4..7b272738c576 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S | |||
| @@ -1,6 +1,30 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | #include <linux/linkage.h> | 2 | #include <linux/linkage.h> |
| 3 | #include <asm/asm.h> | 3 | #include <asm/asm.h> |
| 4 | #include <asm/bitsperlong.h> | ||
| 5 | #include <asm/kvm_vcpu_regs.h> | ||
| 6 | |||
| 7 | #define WORD_SIZE (BITS_PER_LONG / 8) | ||
| 8 | |||
| 9 | #define VCPU_RAX __VCPU_REGS_RAX * WORD_SIZE | ||
| 10 | #define VCPU_RCX __VCPU_REGS_RCX * WORD_SIZE | ||
| 11 | #define VCPU_RDX __VCPU_REGS_RDX * WORD_SIZE | ||
| 12 | #define VCPU_RBX __VCPU_REGS_RBX * WORD_SIZE | ||
| 13 | /* Intentionally omit RSP as it's context switched by hardware */ | ||
| 14 | #define VCPU_RBP __VCPU_REGS_RBP * WORD_SIZE | ||
| 15 | #define VCPU_RSI __VCPU_REGS_RSI * WORD_SIZE | ||
| 16 | #define VCPU_RDI __VCPU_REGS_RDI * WORD_SIZE | ||
| 17 | |||
| 18 | #ifdef CONFIG_X86_64 | ||
| 19 | #define VCPU_R8 __VCPU_REGS_R8 * WORD_SIZE | ||
| 20 | #define VCPU_R9 __VCPU_REGS_R9 * WORD_SIZE | ||
| 21 | #define VCPU_R10 __VCPU_REGS_R10 * WORD_SIZE | ||
| 22 | #define VCPU_R11 __VCPU_REGS_R11 * WORD_SIZE | ||
| 23 | #define VCPU_R12 __VCPU_REGS_R12 * WORD_SIZE | ||
| 24 | #define VCPU_R13 __VCPU_REGS_R13 * WORD_SIZE | ||
| 25 | #define VCPU_R14 __VCPU_REGS_R14 * WORD_SIZE | ||
| 26 | #define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE | ||
| 27 | #endif | ||
| 4 | 28 | ||
| 5 | .text | 29 | .text |
| 6 | 30 | ||
| @@ -55,3 +79,146 @@ ENDPROC(vmx_vmenter) | |||
| 55 | ENTRY(vmx_vmexit) | 79 | ENTRY(vmx_vmexit) |
| 56 | ret | 80 | ret |
| 57 | ENDPROC(vmx_vmexit) | 81 | ENDPROC(vmx_vmexit) |
| 82 | |||
| 83 | /** | ||
| 84 | * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode | ||
| 85 | * @vmx: struct vcpu_vmx * | ||
| 86 | * @regs: unsigned long * (to guest registers) | ||
| 87 | * @launched: %true if the VMCS has been launched | ||
| 88 | * | ||
| 89 | * Returns: | ||
| 90 | * 0 on VM-Exit, 1 on VM-Fail | ||
| 91 | */ | ||
| 92 | ENTRY(__vmx_vcpu_run) | ||
| 93 | push %_ASM_BP | ||
| 94 | mov %_ASM_SP, %_ASM_BP | ||
| 95 | #ifdef CONFIG_X86_64 | ||
| 96 | push %r15 | ||
| 97 | push %r14 | ||
| 98 | push %r13 | ||
| 99 | push %r12 | ||
| 100 | #else | ||
| 101 | push %edi | ||
| 102 | push %esi | ||
| 103 | #endif | ||
| 104 | push %_ASM_BX | ||
| 105 | |||
| 106 | /* | ||
| 107 | * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and | ||
| 108 | * @regs is needed after VM-Exit to save the guest's register values. | ||
| 109 | */ | ||
| 110 | push %_ASM_ARG2 | ||
| 111 | |||
| 112 | /* Copy @launched to BL, _ASM_ARG3 is volatile. */ | ||
| 113 | mov %_ASM_ARG3B, %bl | ||
| 114 | |||
| 115 | /* Adjust RSP to account for the CALL to vmx_vmenter(). */ | ||
| 116 | lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2 | ||
| 117 | call vmx_update_host_rsp | ||
| 118 | |||
| 119 | /* Load @regs to RAX. */ | ||
| 120 | mov (%_ASM_SP), %_ASM_AX | ||
| 121 | |||
| 122 | /* Check if vmlaunch or vmresume is needed */ | ||
| 123 | cmpb $0, %bl | ||
| 124 | |||
| 125 | /* Load guest registers. Don't clobber flags. */ | ||
| 126 | mov VCPU_RBX(%_ASM_AX), %_ASM_BX | ||
| 127 | mov VCPU_RCX(%_ASM_AX), %_ASM_CX | ||
| 128 | mov VCPU_RDX(%_ASM_AX), %_ASM_DX | ||
| 129 | mov VCPU_RSI(%_ASM_AX), %_ASM_SI | ||
| 130 | mov VCPU_RDI(%_ASM_AX), %_ASM_DI | ||
| 131 | mov VCPU_RBP(%_ASM_AX), %_ASM_BP | ||
| 132 | #ifdef CONFIG_X86_64 | ||
| 133 | mov VCPU_R8 (%_ASM_AX), %r8 | ||
| 134 | mov VCPU_R9 (%_ASM_AX), %r9 | ||
| 135 | mov VCPU_R10(%_ASM_AX), %r10 | ||
| 136 | mov VCPU_R11(%_ASM_AX), %r11 | ||
| 137 | mov VCPU_R12(%_ASM_AX), %r12 | ||
| 138 | mov VCPU_R13(%_ASM_AX), %r13 | ||
| 139 | mov VCPU_R14(%_ASM_AX), %r14 | ||
| 140 | mov VCPU_R15(%_ASM_AX), %r15 | ||
| 141 | #endif | ||
| 142 | /* Load guest RAX. This kills the vmx_vcpu pointer! */ | ||
| 143 | mov VCPU_RAX(%_ASM_AX), %_ASM_AX | ||
| 144 | |||
| 145 | /* Enter guest mode */ | ||
| 146 | call vmx_vmenter | ||
| 147 | |||
| 148 | /* Jump on VM-Fail. */ | ||
| 149 | jbe 2f | ||
| 150 | |||
| 151 | /* Temporarily save guest's RAX. */ | ||
| 152 | push %_ASM_AX | ||
| 153 | |||
| 154 | /* Reload @regs to RAX. */ | ||
| 155 | mov WORD_SIZE(%_ASM_SP), %_ASM_AX | ||
| 156 | |||
| 157 | /* Save all guest registers, including RAX from the stack */ | ||
| 158 | __ASM_SIZE(pop) VCPU_RAX(%_ASM_AX) | ||
| 159 | mov %_ASM_BX, VCPU_RBX(%_ASM_AX) | ||
| 160 | mov %_ASM_CX, VCPU_RCX(%_ASM_AX) | ||
| 161 | mov %_ASM_DX, VCPU_RDX(%_ASM_AX) | ||
| 162 | mov %_ASM_SI, VCPU_RSI(%_ASM_AX) | ||
| 163 | mov %_ASM_DI, VCPU_RDI(%_ASM_AX) | ||
| 164 | mov %_ASM_BP, VCPU_RBP(%_ASM_AX) | ||
| 165 | #ifdef CONFIG_X86_64 | ||
| 166 | mov %r8, VCPU_R8 (%_ASM_AX) | ||
| 167 | mov %r9, VCPU_R9 (%_ASM_AX) | ||
| 168 | mov %r10, VCPU_R10(%_ASM_AX) | ||
| 169 | mov %r11, VCPU_R11(%_ASM_AX) | ||
| 170 | mov %r12, VCPU_R12(%_ASM_AX) | ||
| 171 | mov %r13, VCPU_R13(%_ASM_AX) | ||
| 172 | mov %r14, VCPU_R14(%_ASM_AX) | ||
| 173 | mov %r15, VCPU_R15(%_ASM_AX) | ||
| 174 | #endif | ||
| 175 | |||
| 176 | /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */ | ||
| 177 | xor %eax, %eax | ||
| 178 | |||
| 179 | /* | ||
| 180 | * Clear all general purpose registers except RSP and RAX to prevent | ||
| 181 | * speculative use of the guest's values, even those that are reloaded | ||
| 182 | * via the stack. In theory, an L1 cache miss when restoring registers | ||
| 183 | * could lead to speculative execution with the guest's values. | ||
| 184 | * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially | ||
| 185 | * free. RSP and RAX are exempt as RSP is restored by hardware during | ||
| 186 | * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail. | ||
| 187 | */ | ||
| 188 | 1: xor %ebx, %ebx | ||
| 189 | xor %ecx, %ecx | ||
| 190 | xor %edx, %edx | ||
| 191 | xor %esi, %esi | ||
| 192 | xor %edi, %edi | ||
| 193 | xor %ebp, %ebp | ||
| 194 | #ifdef CONFIG_X86_64 | ||
| 195 | xor %r8d, %r8d | ||
| 196 | xor %r9d, %r9d | ||
| 197 | xor %r10d, %r10d | ||
| 198 | xor %r11d, %r11d | ||
| 199 | xor %r12d, %r12d | ||
| 200 | xor %r13d, %r13d | ||
| 201 | xor %r14d, %r14d | ||
| 202 | xor %r15d, %r15d | ||
| 203 | #endif | ||
| 204 | |||
| 205 | /* "POP" @regs. */ | ||
| 206 | add $WORD_SIZE, %_ASM_SP | ||
| 207 | pop %_ASM_BX | ||
| 208 | |||
| 209 | #ifdef CONFIG_X86_64 | ||
| 210 | pop %r12 | ||
| 211 | pop %r13 | ||
| 212 | pop %r14 | ||
| 213 | pop %r15 | ||
| 214 | #else | ||
| 215 | pop %esi | ||
| 216 | pop %edi | ||
| 217 | #endif | ||
| 218 | pop %_ASM_BP | ||
| 219 | ret | ||
| 220 | |||
| 221 | /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */ | ||
| 222 | 2: mov $1, %eax | ||
| 223 | jmp 1b | ||
| 224 | ENDPROC(__vmx_vcpu_run) | ||
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 30a6bcd735ec..c73375e01ab8 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c | |||
| @@ -246,6 +246,10 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) | |||
| 246 | 246 | ||
| 247 | if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages && | 247 | if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages && |
| 248 | !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) { | 248 | !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) { |
| 249 | /* | ||
| 250 | * This allocation for vmx_l1d_flush_pages is not tied to a VM | ||
| 251 | * lifetime and so should not be charged to a memcg. | ||
| 252 | */ | ||
| 249 | page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER); | 253 | page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER); |
| 250 | if (!page) | 254 | if (!page) |
| 251 | return -ENOMEM; | 255 | return -ENOMEM; |
| @@ -2387,13 +2391,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, | |||
| 2387 | return 0; | 2391 | return 0; |
| 2388 | } | 2392 | } |
| 2389 | 2393 | ||
| 2390 | struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu) | 2394 | struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) |
| 2391 | { | 2395 | { |
| 2392 | int node = cpu_to_node(cpu); | 2396 | int node = cpu_to_node(cpu); |
| 2393 | struct page *pages; | 2397 | struct page *pages; |
| 2394 | struct vmcs *vmcs; | 2398 | struct vmcs *vmcs; |
| 2395 | 2399 | ||
| 2396 | pages = __alloc_pages_node(node, GFP_KERNEL, vmcs_config.order); | 2400 | pages = __alloc_pages_node(node, flags, vmcs_config.order); |
| 2397 | if (!pages) | 2401 | if (!pages) |
| 2398 | return NULL; | 2402 | return NULL; |
| 2399 | vmcs = page_address(pages); | 2403 | vmcs = page_address(pages); |
| @@ -2440,7 +2444,8 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) | |||
| 2440 | loaded_vmcs_init(loaded_vmcs); | 2444 | loaded_vmcs_init(loaded_vmcs); |
| 2441 | 2445 | ||
| 2442 | if (cpu_has_vmx_msr_bitmap()) { | 2446 | if (cpu_has_vmx_msr_bitmap()) { |
| 2443 | loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); | 2447 | loaded_vmcs->msr_bitmap = (unsigned long *) |
| 2448 | __get_free_page(GFP_KERNEL_ACCOUNT); | ||
| 2444 | if (!loaded_vmcs->msr_bitmap) | 2449 | if (!loaded_vmcs->msr_bitmap) |
| 2445 | goto out_vmcs; | 2450 | goto out_vmcs; |
| 2446 | memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); | 2451 | memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); |
| @@ -2481,7 +2486,7 @@ static __init int alloc_kvm_area(void) | |||
| 2481 | for_each_possible_cpu(cpu) { | 2486 | for_each_possible_cpu(cpu) { |
| 2482 | struct vmcs *vmcs; | 2487 | struct vmcs *vmcs; |
| 2483 | 2488 | ||
| 2484 | vmcs = alloc_vmcs_cpu(false, cpu); | 2489 | vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL); |
| 2485 | if (!vmcs) { | 2490 | if (!vmcs) { |
| 2486 | free_kvm_area(); | 2491 | free_kvm_area(); |
| 2487 | return -ENOMEM; | 2492 | return -ENOMEM; |
| @@ -6360,150 +6365,15 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) | |||
| 6360 | vmx->loaded_vmcs->hv_timer_armed = false; | 6365 | vmx->loaded_vmcs->hv_timer_armed = false; |
| 6361 | } | 6366 | } |
| 6362 | 6367 | ||
| 6363 | static void __vmx_vcpu_run(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) | 6368 | void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) |
| 6364 | { | 6369 | { |
| 6365 | unsigned long evmcs_rsp; | 6370 | if (unlikely(host_rsp != vmx->loaded_vmcs->host_state.rsp)) { |
| 6366 | 6371 | vmx->loaded_vmcs->host_state.rsp = host_rsp; | |
| 6367 | vmx->__launched = vmx->loaded_vmcs->launched; | 6372 | vmcs_writel(HOST_RSP, host_rsp); |
| 6368 | 6373 | } | |
| 6369 | evmcs_rsp = static_branch_unlikely(&enable_evmcs) ? | ||
| 6370 | (unsigned long)¤t_evmcs->host_rsp : 0; | ||
| 6371 | |||
| 6372 | if (static_branch_unlikely(&vmx_l1d_should_flush)) | ||
| 6373 | vmx_l1d_flush(vcpu); | ||
| 6374 | |||
| 6375 | asm( | ||
| 6376 | /* Store host registers */ | ||
| 6377 | "push %%" _ASM_DX "; push %%" _ASM_BP ";" | ||
| 6378 | "push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */ | ||
| 6379 | "push %%" _ASM_CX " \n\t" | ||
| 6380 | "sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */ | ||
| 6381 | "cmp %%" _ASM_SP ", %c[host_rsp](%%" _ASM_CX ") \n\t" | ||
| 6382 | "je 1f \n\t" | ||
| 6383 | "mov %%" _ASM_SP ", %c[host_rsp](%%" _ASM_CX ") \n\t" | ||
| 6384 | /* Avoid VMWRITE when Enlightened VMCS is in use */ | ||
| 6385 | "test %%" _ASM_SI ", %%" _ASM_SI " \n\t" | ||
| 6386 | "jz 2f \n\t" | ||
| 6387 | "mov %%" _ASM_SP ", (%%" _ASM_SI ") \n\t" | ||
| 6388 | "jmp 1f \n\t" | ||
| 6389 | "2: \n\t" | ||
| 6390 | __ex("vmwrite %%" _ASM_SP ", %%" _ASM_DX) "\n\t" | ||
| 6391 | "1: \n\t" | ||
| 6392 | "add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */ | ||
| 6393 | |||
| 6394 | /* Reload cr2 if changed */ | ||
| 6395 | "mov %c[cr2](%%" _ASM_CX "), %%" _ASM_AX " \n\t" | ||
| 6396 | "mov %%cr2, %%" _ASM_DX " \n\t" | ||
| 6397 | "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t" | ||
| 6398 | "je 3f \n\t" | ||
| 6399 | "mov %%" _ASM_AX", %%cr2 \n\t" | ||
| 6400 | "3: \n\t" | ||
| 6401 | /* Check if vmlaunch or vmresume is needed */ | ||
| 6402 | "cmpl $0, %c[launched](%%" _ASM_CX ") \n\t" | ||
| 6403 | /* Load guest registers. Don't clobber flags. */ | ||
| 6404 | "mov %c[rax](%%" _ASM_CX "), %%" _ASM_AX " \n\t" | ||
| 6405 | "mov %c[rbx](%%" _ASM_CX "), %%" _ASM_BX " \n\t" | ||
| 6406 | "mov %c[rdx](%%" _ASM_CX "), %%" _ASM_DX " \n\t" | ||
| 6407 | "mov %c[rsi](%%" _ASM_CX "), %%" _ASM_SI " \n\t" | ||
| 6408 | "mov %c[rdi](%%" _ASM_CX "), %%" _ASM_DI " \n\t" | ||
| 6409 | "mov %c[rbp](%%" _ASM_CX "), %%" _ASM_BP " \n\t" | ||
| 6410 | #ifdef CONFIG_X86_64 | ||
| 6411 | "mov %c[r8](%%" _ASM_CX "), %%r8 \n\t" | ||
| 6412 | "mov %c[r9](%%" _ASM_CX "), %%r9 \n\t" | ||
| 6413 | "mov %c[r10](%%" _ASM_CX "), %%r10 \n\t" | ||
| 6414 | "mov %c[r11](%%" _ASM_CX "), %%r11 \n\t" | ||
| 6415 | "mov %c[r12](%%" _ASM_CX "), %%r12 \n\t" | ||
| 6416 | "mov %c[r13](%%" _ASM_CX "), %%r13 \n\t" | ||
| 6417 | "mov %c[r14](%%" _ASM_CX "), %%r14 \n\t" | ||
| 6418 | "mov %c[r15](%%" _ASM_CX "), %%r15 \n\t" | ||
| 6419 | #endif | ||
| 6420 | /* Load guest RCX. This kills the vmx_vcpu pointer! */ | ||
| 6421 | "mov %c[rcx](%%" _ASM_CX "), %%" _ASM_CX " \n\t" | ||
| 6422 | |||
| 6423 | /* Enter guest mode */ | ||
| 6424 | "call vmx_vmenter\n\t" | ||
| 6425 | |||
| 6426 | /* Save guest's RCX to the stack placeholder (see above) */ | ||
| 6427 | "mov %%" _ASM_CX ", %c[wordsize](%%" _ASM_SP ") \n\t" | ||
| 6428 | |||
| 6429 | /* Load host's RCX, i.e. the vmx_vcpu pointer */ | ||
| 6430 | "pop %%" _ASM_CX " \n\t" | ||
| 6431 | |||
| 6432 | /* Set vmx->fail based on EFLAGS.{CF,ZF} */ | ||
| 6433 | "setbe %c[fail](%%" _ASM_CX ")\n\t" | ||
| 6434 | |||
| 6435 | /* Save all guest registers, including RCX from the stack */ | ||
| 6436 | "mov %%" _ASM_AX ", %c[rax](%%" _ASM_CX ") \n\t" | ||
| 6437 | "mov %%" _ASM_BX ", %c[rbx](%%" _ASM_CX ") \n\t" | ||
| 6438 | __ASM_SIZE(pop) " %c[rcx](%%" _ASM_CX ") \n\t" | ||
| 6439 | "mov %%" _ASM_DX ", %c[rdx](%%" _ASM_CX ") \n\t" | ||
| 6440 | "mov %%" _ASM_SI ", %c[rsi](%%" _ASM_CX ") \n\t" | ||
| 6441 | "mov %%" _ASM_DI ", %c[rdi](%%" _ASM_CX ") \n\t" | ||
| 6442 | "mov %%" _ASM_BP ", %c[rbp](%%" _ASM_CX ") \n\t" | ||
| 6443 | #ifdef CONFIG_X86_64 | ||
| 6444 | "mov %%r8, %c[r8](%%" _ASM_CX ") \n\t" | ||
| 6445 | "mov %%r9, %c[r9](%%" _ASM_CX ") \n\t" | ||
| 6446 | "mov %%r10, %c[r10](%%" _ASM_CX ") \n\t" | ||
| 6447 | "mov %%r11, %c[r11](%%" _ASM_CX ") \n\t" | ||
| 6448 | "mov %%r12, %c[r12](%%" _ASM_CX ") \n\t" | ||
| 6449 | "mov %%r13, %c[r13](%%" _ASM_CX ") \n\t" | ||
| 6450 | "mov %%r14, %c[r14](%%" _ASM_CX ") \n\t" | ||
| 6451 | "mov %%r15, %c[r15](%%" _ASM_CX ") \n\t" | ||
| 6452 | /* | ||
| 6453 | * Clear host registers marked as clobbered to prevent | ||
| 6454 | * speculative use. | ||
| 6455 | */ | ||
| 6456 | "xor %%r8d, %%r8d \n\t" | ||
| 6457 | "xor %%r9d, %%r9d \n\t" | ||
| 6458 | "xor %%r10d, %%r10d \n\t" | ||
| 6459 | "xor %%r11d, %%r11d \n\t" | ||
| 6460 | "xor %%r12d, %%r12d \n\t" | ||
| 6461 | "xor %%r13d, %%r13d \n\t" | ||
| 6462 | "xor %%r14d, %%r14d \n\t" | ||
| 6463 | "xor %%r15d, %%r15d \n\t" | ||
| 6464 | #endif | ||
| 6465 | "mov %%cr2, %%" _ASM_AX " \n\t" | ||
| 6466 | "mov %%" _ASM_AX ", %c[cr2](%%" _ASM_CX ") \n\t" | ||
| 6467 | |||
| 6468 | "xor %%eax, %%eax \n\t" | ||
| 6469 | "xor %%ebx, %%ebx \n\t" | ||
| 6470 | "xor %%esi, %%esi \n\t" | ||
| 6471 | "xor %%edi, %%edi \n\t" | ||
| 6472 | "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t" | ||
| 6473 | : ASM_CALL_CONSTRAINT | ||
| 6474 | : "c"(vmx), "d"((unsigned long)HOST_RSP), "S"(evmcs_rsp), | ||
| 6475 | [launched]"i"(offsetof(struct vcpu_vmx, __launched)), | ||
| 6476 | [fail]"i"(offsetof(struct vcpu_vmx, fail)), | ||
| 6477 | [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)), | ||
| 6478 | [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])), | ||
| 6479 | [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])), | ||
| 6480 | [rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])), | ||
| 6481 | [rdx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDX])), | ||
| 6482 | [rsi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RSI])), | ||
| 6483 | [rdi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDI])), | ||
| 6484 | [rbp]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBP])), | ||
| 6485 | #ifdef CONFIG_X86_64 | ||
| 6486 | [r8]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R8])), | ||
| 6487 | [r9]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R9])), | ||
| 6488 | [r10]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R10])), | ||
| 6489 | [r11]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R11])), | ||
| 6490 | [r12]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R12])), | ||
| 6491 | [r13]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R13])), | ||
| 6492 | [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])), | ||
| 6493 | [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])), | ||
| 6494 | #endif | ||
| 6495 | [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)), | ||
| 6496 | [wordsize]"i"(sizeof(ulong)) | ||
| 6497 | : "cc", "memory" | ||
| 6498 | #ifdef CONFIG_X86_64 | ||
| 6499 | , "rax", "rbx", "rdi" | ||
| 6500 | , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" | ||
| 6501 | #else | ||
| 6502 | , "eax", "ebx", "edi" | ||
| 6503 | #endif | ||
| 6504 | ); | ||
| 6505 | } | 6374 | } |
| 6506 | STACK_FRAME_NON_STANDARD(__vmx_vcpu_run); | 6375 | |
| 6376 | bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); | ||
| 6507 | 6377 | ||
| 6508 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | 6378 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu) |
| 6509 | { | 6379 | { |
| @@ -6572,7 +6442,16 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
| 6572 | */ | 6442 | */ |
| 6573 | x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); | 6443 | x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); |
| 6574 | 6444 | ||
| 6575 | __vmx_vcpu_run(vcpu, vmx); | 6445 | if (static_branch_unlikely(&vmx_l1d_should_flush)) |
| 6446 | vmx_l1d_flush(vcpu); | ||
| 6447 | |||
| 6448 | if (vcpu->arch.cr2 != read_cr2()) | ||
| 6449 | write_cr2(vcpu->arch.cr2); | ||
| 6450 | |||
| 6451 | vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, | ||
| 6452 | vmx->loaded_vmcs->launched); | ||
| 6453 | |||
| 6454 | vcpu->arch.cr2 = read_cr2(); | ||
| 6576 | 6455 | ||
| 6577 | /* | 6456 | /* |
| 6578 | * We do not use IBRS in the kernel. If this vCPU has used the | 6457 | * We do not use IBRS in the kernel. If this vCPU has used the |
| @@ -6657,7 +6536,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
| 6657 | 6536 | ||
| 6658 | static struct kvm *vmx_vm_alloc(void) | 6537 | static struct kvm *vmx_vm_alloc(void) |
| 6659 | { | 6538 | { |
| 6660 | struct kvm_vmx *kvm_vmx = vzalloc(sizeof(struct kvm_vmx)); | 6539 | struct kvm_vmx *kvm_vmx = __vmalloc(sizeof(struct kvm_vmx), |
| 6540 | GFP_KERNEL_ACCOUNT | __GFP_ZERO, | ||
| 6541 | PAGE_KERNEL); | ||
| 6661 | return &kvm_vmx->kvm; | 6542 | return &kvm_vmx->kvm; |
| 6662 | } | 6543 | } |
| 6663 | 6544 | ||
| @@ -6673,7 +6554,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
| 6673 | if (enable_pml) | 6554 | if (enable_pml) |
| 6674 | vmx_destroy_pml_buffer(vmx); | 6555 | vmx_destroy_pml_buffer(vmx); |
| 6675 | free_vpid(vmx->vpid); | 6556 | free_vpid(vmx->vpid); |
| 6676 | leave_guest_mode(vcpu); | ||
| 6677 | nested_vmx_free_vcpu(vcpu); | 6557 | nested_vmx_free_vcpu(vcpu); |
| 6678 | free_loaded_vmcs(vmx->loaded_vmcs); | 6558 | free_loaded_vmcs(vmx->loaded_vmcs); |
| 6679 | kfree(vmx->guest_msrs); | 6559 | kfree(vmx->guest_msrs); |
| @@ -6685,14 +6565,16 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
| 6685 | static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | 6565 | static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) |
| 6686 | { | 6566 | { |
| 6687 | int err; | 6567 | int err; |
| 6688 | struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); | 6568 | struct vcpu_vmx *vmx; |
| 6689 | unsigned long *msr_bitmap; | 6569 | unsigned long *msr_bitmap; |
| 6690 | int cpu; | 6570 | int cpu; |
| 6691 | 6571 | ||
| 6572 | vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT); | ||
| 6692 | if (!vmx) | 6573 | if (!vmx) |
| 6693 | return ERR_PTR(-ENOMEM); | 6574 | return ERR_PTR(-ENOMEM); |
| 6694 | 6575 | ||
| 6695 | vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, GFP_KERNEL); | 6576 | vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, |
| 6577 | GFP_KERNEL_ACCOUNT); | ||
| 6696 | if (!vmx->vcpu.arch.guest_fpu) { | 6578 | if (!vmx->vcpu.arch.guest_fpu) { |
| 6697 | printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); | 6579 | printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); |
| 6698 | err = -ENOMEM; | 6580 | err = -ENOMEM; |
| @@ -6714,12 +6596,12 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
| 6714 | * for the guest, etc. | 6596 | * for the guest, etc. |
| 6715 | */ | 6597 | */ |
| 6716 | if (enable_pml) { | 6598 | if (enable_pml) { |
| 6717 | vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO); | 6599 | vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); |
| 6718 | if (!vmx->pml_pg) | 6600 | if (!vmx->pml_pg) |
| 6719 | goto uninit_vcpu; | 6601 | goto uninit_vcpu; |
| 6720 | } | 6602 | } |
| 6721 | 6603 | ||
| 6722 | vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); | 6604 | vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT); |
| 6723 | BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0]) | 6605 | BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0]) |
| 6724 | > PAGE_SIZE); | 6606 | > PAGE_SIZE); |
| 6725 | 6607 | ||
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 0ac0a64c7790..1554cb45b393 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h | |||
| @@ -175,7 +175,6 @@ struct nested_vmx { | |||
| 175 | 175 | ||
| 176 | struct vcpu_vmx { | 176 | struct vcpu_vmx { |
| 177 | struct kvm_vcpu vcpu; | 177 | struct kvm_vcpu vcpu; |
| 178 | unsigned long host_rsp; | ||
| 179 | u8 fail; | 178 | u8 fail; |
| 180 | u8 msr_bitmap_mode; | 179 | u8 msr_bitmap_mode; |
| 181 | u32 exit_intr_info; | 180 | u32 exit_intr_info; |
| @@ -209,7 +208,7 @@ struct vcpu_vmx { | |||
| 209 | struct loaded_vmcs vmcs01; | 208 | struct loaded_vmcs vmcs01; |
| 210 | struct loaded_vmcs *loaded_vmcs; | 209 | struct loaded_vmcs *loaded_vmcs; |
| 211 | struct loaded_vmcs *loaded_cpu_state; | 210 | struct loaded_vmcs *loaded_cpu_state; |
| 212 | bool __launched; /* temporary, used in vmx_vcpu_run */ | 211 | |
| 213 | struct msr_autoload { | 212 | struct msr_autoload { |
| 214 | struct vmx_msrs guest; | 213 | struct vmx_msrs guest; |
| 215 | struct vmx_msrs host; | 214 | struct vmx_msrs host; |
| @@ -339,8 +338,8 @@ static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) | |||
| 339 | 338 | ||
| 340 | static inline void pi_set_sn(struct pi_desc *pi_desc) | 339 | static inline void pi_set_sn(struct pi_desc *pi_desc) |
| 341 | { | 340 | { |
| 342 | return set_bit(POSTED_INTR_SN, | 341 | set_bit(POSTED_INTR_SN, |
| 343 | (unsigned long *)&pi_desc->control); | 342 | (unsigned long *)&pi_desc->control); |
| 344 | } | 343 | } |
| 345 | 344 | ||
| 346 | static inline void pi_set_on(struct pi_desc *pi_desc) | 345 | static inline void pi_set_on(struct pi_desc *pi_desc) |
| @@ -445,7 +444,8 @@ static inline u32 vmx_vmentry_ctrl(void) | |||
| 445 | { | 444 | { |
| 446 | u32 vmentry_ctrl = vmcs_config.vmentry_ctrl; | 445 | u32 vmentry_ctrl = vmcs_config.vmentry_ctrl; |
| 447 | if (pt_mode == PT_MODE_SYSTEM) | 446 | if (pt_mode == PT_MODE_SYSTEM) |
| 448 | vmentry_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP | VM_EXIT_CLEAR_IA32_RTIT_CTL); | 447 | vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP | |
| 448 | VM_ENTRY_LOAD_IA32_RTIT_CTL); | ||
| 449 | /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ | 449 | /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ |
| 450 | return vmentry_ctrl & | 450 | return vmentry_ctrl & |
| 451 | ~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER); | 451 | ~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER); |
| @@ -455,9 +455,10 @@ static inline u32 vmx_vmexit_ctrl(void) | |||
| 455 | { | 455 | { |
| 456 | u32 vmexit_ctrl = vmcs_config.vmexit_ctrl; | 456 | u32 vmexit_ctrl = vmcs_config.vmexit_ctrl; |
| 457 | if (pt_mode == PT_MODE_SYSTEM) | 457 | if (pt_mode == PT_MODE_SYSTEM) |
| 458 | vmexit_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP | VM_ENTRY_LOAD_IA32_RTIT_CTL); | 458 | vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP | |
| 459 | VM_EXIT_CLEAR_IA32_RTIT_CTL); | ||
| 459 | /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ | 460 | /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ |
| 460 | return vmcs_config.vmexit_ctrl & | 461 | return vmexit_ctrl & |
| 461 | ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER); | 462 | ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER); |
| 462 | } | 463 | } |
| 463 | 464 | ||
| @@ -478,7 +479,7 @@ static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) | |||
| 478 | return &(to_vmx(vcpu)->pi_desc); | 479 | return &(to_vmx(vcpu)->pi_desc); |
| 479 | } | 480 | } |
| 480 | 481 | ||
| 481 | struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu); | 482 | struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags); |
| 482 | void free_vmcs(struct vmcs *vmcs); | 483 | void free_vmcs(struct vmcs *vmcs); |
| 483 | int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs); | 484 | int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs); |
| 484 | void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs); | 485 | void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs); |
| @@ -487,7 +488,8 @@ void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs); | |||
| 487 | 488 | ||
| 488 | static inline struct vmcs *alloc_vmcs(bool shadow) | 489 | static inline struct vmcs *alloc_vmcs(bool shadow) |
| 489 | { | 490 | { |
| 490 | return alloc_vmcs_cpu(shadow, raw_smp_processor_id()); | 491 | return alloc_vmcs_cpu(shadow, raw_smp_processor_id(), |
| 492 | GFP_KERNEL_ACCOUNT); | ||
| 491 | } | 493 | } |
| 492 | 494 | ||
| 493 | u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa); | 495 | u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa); |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 941f932373d0..65e4559eef2f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -3879,7 +3879,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 3879 | r = -EINVAL; | 3879 | r = -EINVAL; |
| 3880 | if (!lapic_in_kernel(vcpu)) | 3880 | if (!lapic_in_kernel(vcpu)) |
| 3881 | goto out; | 3881 | goto out; |
| 3882 | u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); | 3882 | u.lapic = kzalloc(sizeof(struct kvm_lapic_state), |
| 3883 | GFP_KERNEL_ACCOUNT); | ||
| 3883 | 3884 | ||
| 3884 | r = -ENOMEM; | 3885 | r = -ENOMEM; |
| 3885 | if (!u.lapic) | 3886 | if (!u.lapic) |
| @@ -4066,7 +4067,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 4066 | break; | 4067 | break; |
| 4067 | } | 4068 | } |
| 4068 | case KVM_GET_XSAVE: { | 4069 | case KVM_GET_XSAVE: { |
| 4069 | u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL); | 4070 | u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT); |
| 4070 | r = -ENOMEM; | 4071 | r = -ENOMEM; |
| 4071 | if (!u.xsave) | 4072 | if (!u.xsave) |
| 4072 | break; | 4073 | break; |
| @@ -4090,7 +4091,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 4090 | break; | 4091 | break; |
| 4091 | } | 4092 | } |
| 4092 | case KVM_GET_XCRS: { | 4093 | case KVM_GET_XCRS: { |
| 4093 | u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL); | 4094 | u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT); |
| 4094 | r = -ENOMEM; | 4095 | r = -ENOMEM; |
| 4095 | if (!u.xcrs) | 4096 | if (!u.xcrs) |
| 4096 | break; | 4097 | break; |
| @@ -7055,6 +7056,13 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) | |||
| 7055 | 7056 | ||
| 7056 | void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) | 7057 | void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) |
| 7057 | { | 7058 | { |
| 7059 | if (!lapic_in_kernel(vcpu)) { | ||
| 7060 | WARN_ON_ONCE(vcpu->arch.apicv_active); | ||
| 7061 | return; | ||
| 7062 | } | ||
| 7063 | if (!vcpu->arch.apicv_active) | ||
| 7064 | return; | ||
| 7065 | |||
| 7058 | vcpu->arch.apicv_active = false; | 7066 | vcpu->arch.apicv_active = false; |
| 7059 | kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu); | 7067 | kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu); |
| 7060 | } | 7068 | } |
| @@ -9005,7 +9013,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
| 9005 | struct page *page; | 9013 | struct page *page; |
| 9006 | int r; | 9014 | int r; |
| 9007 | 9015 | ||
| 9008 | vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu); | ||
| 9009 | vcpu->arch.emulate_ctxt.ops = &emulate_ops; | 9016 | vcpu->arch.emulate_ctxt.ops = &emulate_ops; |
| 9010 | if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu)) | 9017 | if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu)) |
| 9011 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 9018 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
| @@ -9026,6 +9033,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
| 9026 | goto fail_free_pio_data; | 9033 | goto fail_free_pio_data; |
| 9027 | 9034 | ||
| 9028 | if (irqchip_in_kernel(vcpu->kvm)) { | 9035 | if (irqchip_in_kernel(vcpu->kvm)) { |
| 9036 | vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu); | ||
| 9029 | r = kvm_create_lapic(vcpu); | 9037 | r = kvm_create_lapic(vcpu); |
| 9030 | if (r < 0) | 9038 | if (r < 0) |
| 9031 | goto fail_mmu_destroy; | 9039 | goto fail_mmu_destroy; |
| @@ -9033,14 +9041,15 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
| 9033 | static_key_slow_inc(&kvm_no_apic_vcpu); | 9041 | static_key_slow_inc(&kvm_no_apic_vcpu); |
| 9034 | 9042 | ||
| 9035 | vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4, | 9043 | vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4, |
| 9036 | GFP_KERNEL); | 9044 | GFP_KERNEL_ACCOUNT); |
| 9037 | if (!vcpu->arch.mce_banks) { | 9045 | if (!vcpu->arch.mce_banks) { |
| 9038 | r = -ENOMEM; | 9046 | r = -ENOMEM; |
| 9039 | goto fail_free_lapic; | 9047 | goto fail_free_lapic; |
| 9040 | } | 9048 | } |
| 9041 | vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; | 9049 | vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; |
| 9042 | 9050 | ||
| 9043 | if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) { | 9051 | if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, |
| 9052 | GFP_KERNEL_ACCOUNT)) { | ||
| 9044 | r = -ENOMEM; | 9053 | r = -ENOMEM; |
| 9045 | goto fail_free_mce_banks; | 9054 | goto fail_free_mce_banks; |
| 9046 | } | 9055 | } |
| @@ -9104,7 +9113,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
| 9104 | 9113 | ||
| 9105 | INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); | 9114 | INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); |
| 9106 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 9115 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
| 9107 | INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); | ||
| 9108 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | 9116 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); |
| 9109 | atomic_set(&kvm->arch.noncoherent_dma_count, 0); | 9117 | atomic_set(&kvm->arch.noncoherent_dma_count, 0); |
| 9110 | 9118 | ||
| @@ -9299,13 +9307,13 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | |||
| 9299 | 9307 | ||
| 9300 | slot->arch.rmap[i] = | 9308 | slot->arch.rmap[i] = |
| 9301 | kvcalloc(lpages, sizeof(*slot->arch.rmap[i]), | 9309 | kvcalloc(lpages, sizeof(*slot->arch.rmap[i]), |
| 9302 | GFP_KERNEL); | 9310 | GFP_KERNEL_ACCOUNT); |
| 9303 | if (!slot->arch.rmap[i]) | 9311 | if (!slot->arch.rmap[i]) |
| 9304 | goto out_free; | 9312 | goto out_free; |
| 9305 | if (i == 0) | 9313 | if (i == 0) |
| 9306 | continue; | 9314 | continue; |
| 9307 | 9315 | ||
| 9308 | linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL); | 9316 | linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); |
| 9309 | if (!linfo) | 9317 | if (!linfo) |
| 9310 | goto out_free; | 9318 | goto out_free; |
| 9311 | 9319 | ||
| @@ -9348,13 +9356,13 @@ out_free: | |||
| 9348 | return -ENOMEM; | 9356 | return -ENOMEM; |
| 9349 | } | 9357 | } |
| 9350 | 9358 | ||
| 9351 | void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) | 9359 | void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) |
| 9352 | { | 9360 | { |
| 9353 | /* | 9361 | /* |
| 9354 | * memslots->generation has been incremented. | 9362 | * memslots->generation has been incremented. |
| 9355 | * mmio generation may have reached its maximum value. | 9363 | * mmio generation may have reached its maximum value. |
| 9356 | */ | 9364 | */ |
| 9357 | kvm_mmu_invalidate_mmio_sptes(kvm, slots); | 9365 | kvm_mmu_invalidate_mmio_sptes(kvm, gen); |
| 9358 | } | 9366 | } |
| 9359 | 9367 | ||
| 9360 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 9368 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
| @@ -9462,7 +9470,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
| 9462 | 9470 | ||
| 9463 | void kvm_arch_flush_shadow_all(struct kvm *kvm) | 9471 | void kvm_arch_flush_shadow_all(struct kvm *kvm) |
| 9464 | { | 9472 | { |
| 9465 | kvm_mmu_invalidate_zap_all_pages(kvm); | 9473 | kvm_mmu_zap_all(kvm); |
| 9466 | } | 9474 | } |
| 9467 | 9475 | ||
| 9468 | void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | 9476 | void kvm_arch_flush_shadow_memslot(struct kvm *kvm, |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 224cd0a47568..28406aa1136d 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
| @@ -181,6 +181,11 @@ static inline bool emul_is_noncanonical_address(u64 la, | |||
| 181 | static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, | 181 | static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, |
| 182 | gva_t gva, gfn_t gfn, unsigned access) | 182 | gva_t gva, gfn_t gfn, unsigned access) |
| 183 | { | 183 | { |
| 184 | u64 gen = kvm_memslots(vcpu->kvm)->generation; | ||
| 185 | |||
| 186 | if (unlikely(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS)) | ||
| 187 | return; | ||
| 188 | |||
| 184 | /* | 189 | /* |
| 185 | * If this is a shadow nested page table, the "GVA" is | 190 | * If this is a shadow nested page table, the "GVA" is |
| 186 | * actually a nGPA. | 191 | * actually a nGPA. |
| @@ -188,7 +193,7 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, | |||
| 188 | vcpu->arch.mmio_gva = mmu_is_nested(vcpu) ? 0 : gva & PAGE_MASK; | 193 | vcpu->arch.mmio_gva = mmu_is_nested(vcpu) ? 0 : gva & PAGE_MASK; |
| 189 | vcpu->arch.access = access; | 194 | vcpu->arch.access = access; |
| 190 | vcpu->arch.mmio_gfn = gfn; | 195 | vcpu->arch.mmio_gfn = gfn; |
| 191 | vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation; | 196 | vcpu->arch.mmio_gen = gen; |
| 192 | } | 197 | } |
| 193 | 198 | ||
| 194 | static inline bool vcpu_match_mmio_gen(struct kvm_vcpu *vcpu) | 199 | static inline bool vcpu_match_mmio_gen(struct kvm_vcpu *vcpu) |
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index a8b20b65bd4b..aa4ec53281ce 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c | |||
| @@ -1261,6 +1261,13 @@ static enum arch_timer_ppi_nr __init arch_timer_select_ppi(void) | |||
| 1261 | return ARCH_TIMER_PHYS_SECURE_PPI; | 1261 | return ARCH_TIMER_PHYS_SECURE_PPI; |
| 1262 | } | 1262 | } |
| 1263 | 1263 | ||
| 1264 | static void __init arch_timer_populate_kvm_info(void) | ||
| 1265 | { | ||
| 1266 | arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI]; | ||
| 1267 | if (is_kernel_in_hyp_mode()) | ||
| 1268 | arch_timer_kvm_info.physical_irq = arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]; | ||
| 1269 | } | ||
| 1270 | |||
| 1264 | static int __init arch_timer_of_init(struct device_node *np) | 1271 | static int __init arch_timer_of_init(struct device_node *np) |
| 1265 | { | 1272 | { |
| 1266 | int i, ret; | 1273 | int i, ret; |
| @@ -1275,7 +1282,7 @@ static int __init arch_timer_of_init(struct device_node *np) | |||
| 1275 | for (i = ARCH_TIMER_PHYS_SECURE_PPI; i < ARCH_TIMER_MAX_TIMER_PPI; i++) | 1282 | for (i = ARCH_TIMER_PHYS_SECURE_PPI; i < ARCH_TIMER_MAX_TIMER_PPI; i++) |
| 1276 | arch_timer_ppi[i] = irq_of_parse_and_map(np, i); | 1283 | arch_timer_ppi[i] = irq_of_parse_and_map(np, i); |
| 1277 | 1284 | ||
| 1278 | arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI]; | 1285 | arch_timer_populate_kvm_info(); |
| 1279 | 1286 | ||
| 1280 | rate = arch_timer_get_cntfrq(); | 1287 | rate = arch_timer_get_cntfrq(); |
| 1281 | arch_timer_of_configure_rate(rate, np); | 1288 | arch_timer_of_configure_rate(rate, np); |
| @@ -1605,7 +1612,7 @@ static int __init arch_timer_acpi_init(struct acpi_table_header *table) | |||
| 1605 | arch_timer_ppi[ARCH_TIMER_HYP_PPI] = | 1612 | arch_timer_ppi[ARCH_TIMER_HYP_PPI] = |
| 1606 | acpi_gtdt_map_ppi(ARCH_TIMER_HYP_PPI); | 1613 | acpi_gtdt_map_ppi(ARCH_TIMER_HYP_PPI); |
| 1607 | 1614 | ||
| 1608 | arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI]; | 1615 | arch_timer_populate_kvm_info(); |
| 1609 | 1616 | ||
| 1610 | /* | 1617 | /* |
| 1611 | * When probing via ACPI, we have no mechanism to override the sysreg | 1618 | * When probing via ACPI, we have no mechanism to override the sysreg |
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index a0baee25134c..4159c63a5fd2 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c | |||
| @@ -1382,3 +1382,40 @@ int chsc_pnso_brinfo(struct subchannel_id schid, | |||
| 1382 | return chsc_error_from_response(brinfo_area->response.code); | 1382 | return chsc_error_from_response(brinfo_area->response.code); |
| 1383 | } | 1383 | } |
| 1384 | EXPORT_SYMBOL_GPL(chsc_pnso_brinfo); | 1384 | EXPORT_SYMBOL_GPL(chsc_pnso_brinfo); |
| 1385 | |||
| 1386 | int chsc_sgib(u32 origin) | ||
| 1387 | { | ||
| 1388 | struct { | ||
| 1389 | struct chsc_header request; | ||
| 1390 | u16 op; | ||
| 1391 | u8 reserved01[2]; | ||
| 1392 | u8 reserved02:4; | ||
| 1393 | u8 fmt:4; | ||
| 1394 | u8 reserved03[7]; | ||
| 1395 | /* operation data area begin */ | ||
| 1396 | u8 reserved04[4]; | ||
| 1397 | u32 gib_origin; | ||
| 1398 | u8 reserved05[10]; | ||
| 1399 | u8 aix; | ||
| 1400 | u8 reserved06[4029]; | ||
| 1401 | struct chsc_header response; | ||
| 1402 | u8 reserved07[4]; | ||
| 1403 | } *sgib_area; | ||
| 1404 | int ret; | ||
| 1405 | |||
| 1406 | spin_lock_irq(&chsc_page_lock); | ||
| 1407 | memset(chsc_page, 0, PAGE_SIZE); | ||
| 1408 | sgib_area = chsc_page; | ||
| 1409 | sgib_area->request.length = 0x0fe0; | ||
| 1410 | sgib_area->request.code = 0x0021; | ||
| 1411 | sgib_area->op = 0x1; | ||
| 1412 | sgib_area->gib_origin = origin; | ||
| 1413 | |||
| 1414 | ret = chsc(sgib_area); | ||
| 1415 | if (ret == 0) | ||
| 1416 | ret = chsc_error_from_response(sgib_area->response.code); | ||
| 1417 | spin_unlock_irq(&chsc_page_lock); | ||
| 1418 | |||
| 1419 | return ret; | ||
| 1420 | } | ||
| 1421 | EXPORT_SYMBOL_GPL(chsc_sgib); | ||
diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h index 78aba8d94eec..e57d68e325a3 100644 --- a/drivers/s390/cio/chsc.h +++ b/drivers/s390/cio/chsc.h | |||
| @@ -164,6 +164,7 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp); | |||
| 164 | int chsc_ssqd(struct subchannel_id schid, struct chsc_ssqd_area *ssqd); | 164 | int chsc_ssqd(struct subchannel_id schid, struct chsc_ssqd_area *ssqd); |
| 165 | int chsc_sadc(struct subchannel_id schid, struct chsc_scssc_area *scssc, | 165 | int chsc_sadc(struct subchannel_id schid, struct chsc_scssc_area *scssc, |
| 166 | u64 summary_indicator_addr, u64 subchannel_indicator_addr); | 166 | u64 summary_indicator_addr, u64 subchannel_indicator_addr); |
| 167 | int chsc_sgib(u32 origin); | ||
| 167 | int chsc_error_from_response(int response); | 168 | int chsc_error_from_response(int response); |
| 168 | 169 | ||
| 169 | int chsc_siosl(struct subchannel_id schid); | 170 | int chsc_siosl(struct subchannel_id schid); |
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h index 349e5957c949..702967d996bb 100644 --- a/include/clocksource/arm_arch_timer.h +++ b/include/clocksource/arm_arch_timer.h | |||
| @@ -74,6 +74,7 @@ enum arch_timer_spi_nr { | |||
| 74 | struct arch_timer_kvm_info { | 74 | struct arch_timer_kvm_info { |
| 75 | struct timecounter timecounter; | 75 | struct timecounter timecounter; |
| 76 | int virtual_irq; | 76 | int virtual_irq; |
| 77 | int physical_irq; | ||
| 77 | }; | 78 | }; |
| 78 | 79 | ||
| 79 | struct arch_timer_mem_frame { | 80 | struct arch_timer_mem_frame { |
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 33771352dcd6..05a18dd265b5 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h | |||
| @@ -22,7 +22,22 @@ | |||
| 22 | #include <linux/clocksource.h> | 22 | #include <linux/clocksource.h> |
| 23 | #include <linux/hrtimer.h> | 23 | #include <linux/hrtimer.h> |
| 24 | 24 | ||
| 25 | enum kvm_arch_timers { | ||
| 26 | TIMER_PTIMER, | ||
| 27 | TIMER_VTIMER, | ||
| 28 | NR_KVM_TIMERS | ||
| 29 | }; | ||
| 30 | |||
| 31 | enum kvm_arch_timer_regs { | ||
| 32 | TIMER_REG_CNT, | ||
| 33 | TIMER_REG_CVAL, | ||
| 34 | TIMER_REG_TVAL, | ||
| 35 | TIMER_REG_CTL, | ||
| 36 | }; | ||
| 37 | |||
| 25 | struct arch_timer_context { | 38 | struct arch_timer_context { |
| 39 | struct kvm_vcpu *vcpu; | ||
| 40 | |||
| 26 | /* Registers: control register, timer value */ | 41 | /* Registers: control register, timer value */ |
| 27 | u32 cnt_ctl; | 42 | u32 cnt_ctl; |
| 28 | u64 cnt_cval; | 43 | u64 cnt_cval; |
| @@ -30,30 +45,36 @@ struct arch_timer_context { | |||
| 30 | /* Timer IRQ */ | 45 | /* Timer IRQ */ |
| 31 | struct kvm_irq_level irq; | 46 | struct kvm_irq_level irq; |
| 32 | 47 | ||
| 48 | /* Virtual offset */ | ||
| 49 | u64 cntvoff; | ||
| 50 | |||
| 51 | /* Emulated Timer (may be unused) */ | ||
| 52 | struct hrtimer hrtimer; | ||
| 53 | |||
| 33 | /* | 54 | /* |
| 34 | * We have multiple paths which can save/restore the timer state | 55 | * We have multiple paths which can save/restore the timer state onto |
| 35 | * onto the hardware, so we need some way of keeping track of | 56 | * the hardware, so we need some way of keeping track of where the |
| 36 | * where the latest state is. | 57 | * latest state is. |
| 37 | * | ||
| 38 | * loaded == true: State is loaded on the hardware registers. | ||
| 39 | * loaded == false: State is stored in memory. | ||
| 40 | */ | 58 | */ |
| 41 | bool loaded; | 59 | bool loaded; |
| 42 | 60 | ||
| 43 | /* Virtual offset */ | 61 | /* Duplicated state from arch_timer.c for convenience */ |
| 44 | u64 cntvoff; | 62 | u32 host_timer_irq; |
| 63 | u32 host_timer_irq_flags; | ||
| 64 | }; | ||
| 65 | |||
| 66 | struct timer_map { | ||
| 67 | struct arch_timer_context *direct_vtimer; | ||
| 68 | struct arch_timer_context *direct_ptimer; | ||
| 69 | struct arch_timer_context *emul_ptimer; | ||
| 45 | }; | 70 | }; |
| 46 | 71 | ||
| 47 | struct arch_timer_cpu { | 72 | struct arch_timer_cpu { |
| 48 | struct arch_timer_context vtimer; | 73 | struct arch_timer_context timers[NR_KVM_TIMERS]; |
| 49 | struct arch_timer_context ptimer; | ||
| 50 | 74 | ||
| 51 | /* Background timer used when the guest is not running */ | 75 | /* Background timer used when the guest is not running */ |
| 52 | struct hrtimer bg_timer; | 76 | struct hrtimer bg_timer; |
| 53 | 77 | ||
| 54 | /* Physical timer emulation */ | ||
| 55 | struct hrtimer phys_timer; | ||
| 56 | |||
| 57 | /* Is the timer enabled */ | 78 | /* Is the timer enabled */ |
| 58 | bool enabled; | 79 | bool enabled; |
| 59 | }; | 80 | }; |
| @@ -76,9 +97,6 @@ int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); | |||
| 76 | 97 | ||
| 77 | bool kvm_timer_is_pending(struct kvm_vcpu *vcpu); | 98 | bool kvm_timer_is_pending(struct kvm_vcpu *vcpu); |
| 78 | 99 | ||
| 79 | void kvm_timer_schedule(struct kvm_vcpu *vcpu); | ||
| 80 | void kvm_timer_unschedule(struct kvm_vcpu *vcpu); | ||
| 81 | |||
| 82 | u64 kvm_phys_timer_read(void); | 100 | u64 kvm_phys_timer_read(void); |
| 83 | 101 | ||
| 84 | void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu); | 102 | void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu); |
| @@ -88,7 +106,19 @@ void kvm_timer_init_vhe(void); | |||
| 88 | 106 | ||
| 89 | bool kvm_arch_timer_get_input_level(int vintid); | 107 | bool kvm_arch_timer_get_input_level(int vintid); |
| 90 | 108 | ||
| 91 | #define vcpu_vtimer(v) (&(v)->arch.timer_cpu.vtimer) | 109 | #define vcpu_timer(v) (&(v)->arch.timer_cpu) |
| 92 | #define vcpu_ptimer(v) (&(v)->arch.timer_cpu.ptimer) | 110 | #define vcpu_get_timer(v,t) (&vcpu_timer(v)->timers[(t)]) |
| 111 | #define vcpu_vtimer(v) (&(v)->arch.timer_cpu.timers[TIMER_VTIMER]) | ||
| 112 | #define vcpu_ptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_PTIMER]) | ||
| 113 | |||
| 114 | #define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers) | ||
| 115 | |||
| 116 | u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, | ||
| 117 | enum kvm_arch_timers tmr, | ||
| 118 | enum kvm_arch_timer_regs treg); | ||
| 119 | void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, | ||
| 120 | enum kvm_arch_timers tmr, | ||
| 121 | enum kvm_arch_timer_regs treg, | ||
| 122 | u64 val); | ||
| 93 | 123 | ||
| 94 | #endif | 124 | #endif |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c38cc5eb7e73..9d55c63db09b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
| @@ -48,6 +48,27 @@ | |||
| 48 | */ | 48 | */ |
| 49 | #define KVM_MEMSLOT_INVALID (1UL << 16) | 49 | #define KVM_MEMSLOT_INVALID (1UL << 16) |
| 50 | 50 | ||
| 51 | /* | ||
| 52 | * Bit 63 of the memslot generation number is an "update in-progress flag", | ||
| 53 | * e.g. is temporarily set for the duration of install_new_memslots(). | ||
| 54 | * This flag effectively creates a unique generation number that is used to | ||
| 55 | * mark cached memslot data, e.g. MMIO accesses, as potentially being stale, | ||
| 56 | * i.e. may (or may not) have come from the previous memslots generation. | ||
| 57 | * | ||
| 58 | * This is necessary because the actual memslots update is not atomic with | ||
| 59 | * respect to the generation number update. Updating the generation number | ||
| 60 | * first would allow a vCPU to cache a spte from the old memslots using the | ||
| 61 | * new generation number, and updating the generation number after switching | ||
| 62 | * to the new memslots would allow cache hits using the old generation number | ||
| 63 | * to reference the defunct memslots. | ||
| 64 | * | ||
| 65 | * This mechanism is used to prevent getting hits in KVM's caches while a | ||
| 66 | * memslot update is in-progress, and to prevent cache hits *after* updating | ||
| 67 | * the actual generation number against accesses that were inserted into the | ||
| 68 | * cache *before* the memslots were updated. | ||
| 69 | */ | ||
| 70 | #define KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS BIT_ULL(63) | ||
| 71 | |||
| 51 | /* Two fragments for cross MMIO pages. */ | 72 | /* Two fragments for cross MMIO pages. */ |
| 52 | #define KVM_MAX_MMIO_FRAGMENTS 2 | 73 | #define KVM_MAX_MMIO_FRAGMENTS 2 |
| 53 | 74 | ||
| @@ -634,7 +655,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, | |||
| 634 | struct kvm_memory_slot *dont); | 655 | struct kvm_memory_slot *dont); |
| 635 | int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | 656 | int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, |
| 636 | unsigned long npages); | 657 | unsigned long npages); |
| 637 | void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots); | 658 | void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen); |
| 638 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 659 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
| 639 | struct kvm_memory_slot *memslot, | 660 | struct kvm_memory_slot *memslot, |
| 640 | const struct kvm_userspace_memory_region *mem, | 661 | const struct kvm_userspace_memory_region *mem, |
| @@ -1182,6 +1203,7 @@ extern bool kvm_rebooting; | |||
| 1182 | 1203 | ||
| 1183 | extern unsigned int halt_poll_ns; | 1204 | extern unsigned int halt_poll_ns; |
| 1184 | extern unsigned int halt_poll_ns_grow; | 1205 | extern unsigned int halt_poll_ns_grow; |
| 1206 | extern unsigned int halt_poll_ns_grow_start; | ||
| 1185 | extern unsigned int halt_poll_ns_shrink; | 1207 | extern unsigned int halt_poll_ns_shrink; |
| 1186 | 1208 | ||
| 1187 | struct kvm_device { | 1209 | struct kvm_device { |
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 6210ba41c29e..2689d1ea6d7a 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | /x86_64/platform_info_test | 3 | /x86_64/platform_info_test |
| 4 | /x86_64/set_sregs_test | 4 | /x86_64/set_sregs_test |
| 5 | /x86_64/sync_regs_test | 5 | /x86_64/sync_regs_test |
| 6 | /x86_64/vmx_close_while_nested_test | ||
| 6 | /x86_64/vmx_tsc_adjust_test | 7 | /x86_64/vmx_tsc_adjust_test |
| 7 | /x86_64/state_test | 8 | /x86_64/state_test |
| 8 | /dirty_log_test | 9 | /dirty_log_test |
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index f9a0e9938480..3c1f4bdf9000 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile | |||
| @@ -16,6 +16,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test | |||
| 16 | TEST_GEN_PROGS_x86_64 += x86_64/state_test | 16 | TEST_GEN_PROGS_x86_64 += x86_64/state_test |
| 17 | TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test | 17 | TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test |
| 18 | TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid | 18 | TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid |
| 19 | TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test | ||
| 19 | TEST_GEN_PROGS_x86_64 += dirty_log_test | 20 | TEST_GEN_PROGS_x86_64 += dirty_log_test |
| 20 | TEST_GEN_PROGS_x86_64 += clear_dirty_log_test | 21 | TEST_GEN_PROGS_x86_64 += clear_dirty_log_test |
| 21 | 22 | ||
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c new file mode 100644 index 000000000000..6edec6fd790b --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c | |||
| @@ -0,0 +1,95 @@ | |||
| 1 | /* | ||
| 2 | * vmx_close_while_nested | ||
| 3 | * | ||
| 4 | * Copyright (C) 2019, Red Hat, Inc. | ||
| 5 | * | ||
| 6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
| 7 | * | ||
| 8 | * Verify that nothing bad happens if a KVM user exits with open | ||
| 9 | * file descriptors while executing a nested guest. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include "test_util.h" | ||
| 13 | #include "kvm_util.h" | ||
| 14 | #include "processor.h" | ||
| 15 | #include "vmx.h" | ||
| 16 | |||
| 17 | #include <string.h> | ||
| 18 | #include <sys/ioctl.h> | ||
| 19 | |||
| 20 | #include "kselftest.h" | ||
| 21 | |||
| 22 | #define VCPU_ID 5 | ||
| 23 | |||
| 24 | enum { | ||
| 25 | PORT_L0_EXIT = 0x2000, | ||
| 26 | }; | ||
| 27 | |||
| 28 | /* The virtual machine object. */ | ||
| 29 | static struct kvm_vm *vm; | ||
| 30 | |||
| 31 | static void l2_guest_code(void) | ||
| 32 | { | ||
| 33 | /* Exit to L0 */ | ||
| 34 | asm volatile("inb %%dx, %%al" | ||
| 35 | : : [port] "d" (PORT_L0_EXIT) : "rax"); | ||
| 36 | } | ||
| 37 | |||
| 38 | static void l1_guest_code(struct vmx_pages *vmx_pages) | ||
| 39 | { | ||
| 40 | #define L2_GUEST_STACK_SIZE 64 | ||
| 41 | unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; | ||
| 42 | uint32_t control; | ||
| 43 | uintptr_t save_cr3; | ||
| 44 | |||
| 45 | GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); | ||
| 46 | GUEST_ASSERT(load_vmcs(vmx_pages)); | ||
| 47 | |||
| 48 | /* Prepare the VMCS for L2 execution. */ | ||
| 49 | prepare_vmcs(vmx_pages, l2_guest_code, | ||
| 50 | &l2_guest_stack[L2_GUEST_STACK_SIZE]); | ||
| 51 | |||
| 52 | GUEST_ASSERT(!vmlaunch()); | ||
| 53 | GUEST_ASSERT(0); | ||
| 54 | } | ||
| 55 | |||
| 56 | int main(int argc, char *argv[]) | ||
| 57 | { | ||
| 58 | struct vmx_pages *vmx_pages; | ||
| 59 | vm_vaddr_t vmx_pages_gva; | ||
| 60 | struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); | ||
| 61 | |||
| 62 | if (!(entry->ecx & CPUID_VMX)) { | ||
| 63 | fprintf(stderr, "nested VMX not enabled, skipping test\n"); | ||
| 64 | exit(KSFT_SKIP); | ||
| 65 | } | ||
| 66 | |||
| 67 | vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); | ||
| 68 | vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); | ||
| 69 | |||
| 70 | /* Allocate VMX pages and shared descriptors (vmx_pages). */ | ||
| 71 | vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva); | ||
| 72 | vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); | ||
| 73 | |||
| 74 | for (;;) { | ||
| 75 | volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); | ||
| 76 | struct ucall uc; | ||
| 77 | |||
| 78 | vcpu_run(vm, VCPU_ID); | ||
| 79 | TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, | ||
| 80 | "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", | ||
| 81 | run->exit_reason, | ||
| 82 | exit_reason_str(run->exit_reason)); | ||
| 83 | |||
| 84 | if (run->io.port == PORT_L0_EXIT) | ||
| 85 | break; | ||
| 86 | |||
| 87 | switch (get_ucall(vm, VCPU_ID, &uc)) { | ||
| 88 | case UCALL_ABORT: | ||
| 89 | TEST_ASSERT(false, "%s", (const char *)uc.args[0]); | ||
| 90 | /* NOT REACHED */ | ||
| 91 | default: | ||
| 92 | TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); | ||
| 93 | } | ||
| 94 | } | ||
| 95 | } | ||
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index b07ac4614e1c..3417f2dbc366 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | 25 | ||
| 26 | #include <clocksource/arm_arch_timer.h> | 26 | #include <clocksource/arm_arch_timer.h> |
| 27 | #include <asm/arch_timer.h> | 27 | #include <asm/arch_timer.h> |
| 28 | #include <asm/kvm_emulate.h> | ||
| 28 | #include <asm/kvm_hyp.h> | 29 | #include <asm/kvm_hyp.h> |
| 29 | 30 | ||
| 30 | #include <kvm/arm_vgic.h> | 31 | #include <kvm/arm_vgic.h> |
| @@ -34,7 +35,9 @@ | |||
| 34 | 35 | ||
| 35 | static struct timecounter *timecounter; | 36 | static struct timecounter *timecounter; |
| 36 | static unsigned int host_vtimer_irq; | 37 | static unsigned int host_vtimer_irq; |
| 38 | static unsigned int host_ptimer_irq; | ||
| 37 | static u32 host_vtimer_irq_flags; | 39 | static u32 host_vtimer_irq_flags; |
| 40 | static u32 host_ptimer_irq_flags; | ||
| 38 | 41 | ||
| 39 | static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); | 42 | static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); |
| 40 | 43 | ||
| @@ -52,12 +55,34 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); | |||
| 52 | static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, | 55 | static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, |
| 53 | struct arch_timer_context *timer_ctx); | 56 | struct arch_timer_context *timer_ctx); |
| 54 | static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); | 57 | static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); |
| 58 | static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, | ||
| 59 | struct arch_timer_context *timer, | ||
| 60 | enum kvm_arch_timer_regs treg, | ||
| 61 | u64 val); | ||
| 62 | static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, | ||
| 63 | struct arch_timer_context *timer, | ||
| 64 | enum kvm_arch_timer_regs treg); | ||
| 55 | 65 | ||
| 56 | u64 kvm_phys_timer_read(void) | 66 | u64 kvm_phys_timer_read(void) |
| 57 | { | 67 | { |
| 58 | return timecounter->cc->read(timecounter->cc); | 68 | return timecounter->cc->read(timecounter->cc); |
| 59 | } | 69 | } |
| 60 | 70 | ||
| 71 | static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) | ||
| 72 | { | ||
| 73 | if (has_vhe()) { | ||
| 74 | map->direct_vtimer = vcpu_vtimer(vcpu); | ||
| 75 | map->direct_ptimer = vcpu_ptimer(vcpu); | ||
| 76 | map->emul_ptimer = NULL; | ||
| 77 | } else { | ||
| 78 | map->direct_vtimer = vcpu_vtimer(vcpu); | ||
| 79 | map->direct_ptimer = NULL; | ||
| 80 | map->emul_ptimer = vcpu_ptimer(vcpu); | ||
| 81 | } | ||
| 82 | |||
| 83 | trace_kvm_get_timer_map(vcpu->vcpu_id, map); | ||
| 84 | } | ||
| 85 | |||
| 61 | static inline bool userspace_irqchip(struct kvm *kvm) | 86 | static inline bool userspace_irqchip(struct kvm *kvm) |
| 62 | { | 87 | { |
| 63 | return static_branch_unlikely(&userspace_irqchip_in_use) && | 88 | return static_branch_unlikely(&userspace_irqchip_in_use) && |
| @@ -78,20 +103,27 @@ static void soft_timer_cancel(struct hrtimer *hrt) | |||
| 78 | static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) | 103 | static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) |
| 79 | { | 104 | { |
| 80 | struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; | 105 | struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; |
| 81 | struct arch_timer_context *vtimer; | 106 | struct arch_timer_context *ctx; |
| 107 | struct timer_map map; | ||
| 82 | 108 | ||
| 83 | /* | 109 | /* |
| 84 | * We may see a timer interrupt after vcpu_put() has been called which | 110 | * We may see a timer interrupt after vcpu_put() has been called which |
| 85 | * sets the CPU's vcpu pointer to NULL, because even though the timer | 111 | * sets the CPU's vcpu pointer to NULL, because even though the timer |
| 86 | * has been disabled in vtimer_save_state(), the hardware interrupt | 112 | * has been disabled in timer_save_state(), the hardware interrupt |
| 87 | * signal may not have been retired from the interrupt controller yet. | 113 | * signal may not have been retired from the interrupt controller yet. |
| 88 | */ | 114 | */ |
| 89 | if (!vcpu) | 115 | if (!vcpu) |
| 90 | return IRQ_HANDLED; | 116 | return IRQ_HANDLED; |
| 91 | 117 | ||
| 92 | vtimer = vcpu_vtimer(vcpu); | 118 | get_timer_map(vcpu, &map); |
| 93 | if (kvm_timer_should_fire(vtimer)) | 119 | |
| 94 | kvm_timer_update_irq(vcpu, true, vtimer); | 120 | if (irq == host_vtimer_irq) |
| 121 | ctx = map.direct_vtimer; | ||
| 122 | else | ||
| 123 | ctx = map.direct_ptimer; | ||
| 124 | |||
| 125 | if (kvm_timer_should_fire(ctx)) | ||
| 126 | kvm_timer_update_irq(vcpu, true, ctx); | ||
| 95 | 127 | ||
| 96 | if (userspace_irqchip(vcpu->kvm) && | 128 | if (userspace_irqchip(vcpu->kvm) && |
| 97 | !static_branch_unlikely(&has_gic_active_state)) | 129 | !static_branch_unlikely(&has_gic_active_state)) |
| @@ -122,7 +154,9 @@ static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) | |||
| 122 | 154 | ||
| 123 | static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) | 155 | static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) |
| 124 | { | 156 | { |
| 125 | return !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) && | 157 | WARN_ON(timer_ctx && timer_ctx->loaded); |
| 158 | return timer_ctx && | ||
| 159 | !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) && | ||
| 126 | (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE); | 160 | (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE); |
| 127 | } | 161 | } |
| 128 | 162 | ||
| @@ -132,21 +166,22 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) | |||
| 132 | */ | 166 | */ |
| 133 | static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) | 167 | static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) |
| 134 | { | 168 | { |
| 135 | u64 min_virt = ULLONG_MAX, min_phys = ULLONG_MAX; | 169 | u64 min_delta = ULLONG_MAX; |
| 136 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 170 | int i; |
| 137 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
| 138 | 171 | ||
| 139 | if (kvm_timer_irq_can_fire(vtimer)) | 172 | for (i = 0; i < NR_KVM_TIMERS; i++) { |
| 140 | min_virt = kvm_timer_compute_delta(vtimer); | 173 | struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i]; |
| 141 | 174 | ||
| 142 | if (kvm_timer_irq_can_fire(ptimer)) | 175 | WARN(ctx->loaded, "timer %d loaded\n", i); |
| 143 | min_phys = kvm_timer_compute_delta(ptimer); | 176 | if (kvm_timer_irq_can_fire(ctx)) |
| 177 | min_delta = min(min_delta, kvm_timer_compute_delta(ctx)); | ||
| 178 | } | ||
| 144 | 179 | ||
| 145 | /* If none of timers can fire, then return 0 */ | 180 | /* If none of timers can fire, then return 0 */ |
| 146 | if ((min_virt == ULLONG_MAX) && (min_phys == ULLONG_MAX)) | 181 | if (min_delta == ULLONG_MAX) |
| 147 | return 0; | 182 | return 0; |
| 148 | 183 | ||
| 149 | return min(min_virt, min_phys); | 184 | return min_delta; |
| 150 | } | 185 | } |
| 151 | 186 | ||
| 152 | static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) | 187 | static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) |
| @@ -173,41 +208,58 @@ static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) | |||
| 173 | return HRTIMER_NORESTART; | 208 | return HRTIMER_NORESTART; |
| 174 | } | 209 | } |
| 175 | 210 | ||
| 176 | static enum hrtimer_restart kvm_phys_timer_expire(struct hrtimer *hrt) | 211 | static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt) |
| 177 | { | 212 | { |
| 178 | struct arch_timer_context *ptimer; | 213 | struct arch_timer_context *ctx; |
| 179 | struct arch_timer_cpu *timer; | ||
| 180 | struct kvm_vcpu *vcpu; | 214 | struct kvm_vcpu *vcpu; |
| 181 | u64 ns; | 215 | u64 ns; |
| 182 | 216 | ||
| 183 | timer = container_of(hrt, struct arch_timer_cpu, phys_timer); | 217 | ctx = container_of(hrt, struct arch_timer_context, hrtimer); |
| 184 | vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); | 218 | vcpu = ctx->vcpu; |
| 185 | ptimer = vcpu_ptimer(vcpu); | 219 | |
| 220 | trace_kvm_timer_hrtimer_expire(ctx); | ||
| 186 | 221 | ||
| 187 | /* | 222 | /* |
| 188 | * Check that the timer has really expired from the guest's | 223 | * Check that the timer has really expired from the guest's |
| 189 | * PoV (NTP on the host may have forced it to expire | 224 | * PoV (NTP on the host may have forced it to expire |
| 190 | * early). If not ready, schedule for a later time. | 225 | * early). If not ready, schedule for a later time. |
| 191 | */ | 226 | */ |
| 192 | ns = kvm_timer_compute_delta(ptimer); | 227 | ns = kvm_timer_compute_delta(ctx); |
| 193 | if (unlikely(ns)) { | 228 | if (unlikely(ns)) { |
| 194 | hrtimer_forward_now(hrt, ns_to_ktime(ns)); | 229 | hrtimer_forward_now(hrt, ns_to_ktime(ns)); |
| 195 | return HRTIMER_RESTART; | 230 | return HRTIMER_RESTART; |
| 196 | } | 231 | } |
| 197 | 232 | ||
| 198 | kvm_timer_update_irq(vcpu, true, ptimer); | 233 | kvm_timer_update_irq(vcpu, true, ctx); |
| 199 | return HRTIMER_NORESTART; | 234 | return HRTIMER_NORESTART; |
| 200 | } | 235 | } |
| 201 | 236 | ||
| 202 | static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) | 237 | static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) |
| 203 | { | 238 | { |
| 239 | enum kvm_arch_timers index; | ||
| 204 | u64 cval, now; | 240 | u64 cval, now; |
| 205 | 241 | ||
| 242 | if (!timer_ctx) | ||
| 243 | return false; | ||
| 244 | |||
| 245 | index = arch_timer_ctx_index(timer_ctx); | ||
| 246 | |||
| 206 | if (timer_ctx->loaded) { | 247 | if (timer_ctx->loaded) { |
| 207 | u32 cnt_ctl; | 248 | u32 cnt_ctl = 0; |
| 249 | |||
| 250 | switch (index) { | ||
| 251 | case TIMER_VTIMER: | ||
| 252 | cnt_ctl = read_sysreg_el0(cntv_ctl); | ||
| 253 | break; | ||
| 254 | case TIMER_PTIMER: | ||
| 255 | cnt_ctl = read_sysreg_el0(cntp_ctl); | ||
| 256 | break; | ||
| 257 | case NR_KVM_TIMERS: | ||
| 258 | /* GCC is braindead */ | ||
| 259 | cnt_ctl = 0; | ||
| 260 | break; | ||
| 261 | } | ||
| 208 | 262 | ||
| 209 | /* Only the virtual timer can be loaded so far */ | ||
| 210 | cnt_ctl = read_sysreg_el0(cntv_ctl); | ||
| 211 | return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) && | 263 | return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) && |
| 212 | (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) && | 264 | (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) && |
| 213 | !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK); | 265 | !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK); |
| @@ -224,13 +276,13 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) | |||
| 224 | 276 | ||
| 225 | bool kvm_timer_is_pending(struct kvm_vcpu *vcpu) | 277 | bool kvm_timer_is_pending(struct kvm_vcpu *vcpu) |
| 226 | { | 278 | { |
| 227 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 279 | struct timer_map map; |
| 228 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
| 229 | 280 | ||
| 230 | if (kvm_timer_should_fire(vtimer)) | 281 | get_timer_map(vcpu, &map); |
| 231 | return true; | ||
| 232 | 282 | ||
| 233 | return kvm_timer_should_fire(ptimer); | 283 | return kvm_timer_should_fire(map.direct_vtimer) || |
| 284 | kvm_timer_should_fire(map.direct_ptimer) || | ||
| 285 | kvm_timer_should_fire(map.emul_ptimer); | ||
| 234 | } | 286 | } |
| 235 | 287 | ||
| 236 | /* | 288 | /* |
| @@ -269,77 +321,70 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, | |||
| 269 | } | 321 | } |
| 270 | } | 322 | } |
| 271 | 323 | ||
| 272 | /* Schedule the background timer for the emulated timer. */ | 324 | static void timer_emulate(struct arch_timer_context *ctx) |
| 273 | static void phys_timer_emulate(struct kvm_vcpu *vcpu) | ||
| 274 | { | 325 | { |
| 275 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 326 | bool should_fire = kvm_timer_should_fire(ctx); |
| 276 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | 327 | |
| 328 | trace_kvm_timer_emulate(ctx, should_fire); | ||
| 329 | |||
| 330 | if (should_fire) { | ||
| 331 | kvm_timer_update_irq(ctx->vcpu, true, ctx); | ||
| 332 | return; | ||
| 333 | } | ||
| 277 | 334 | ||
| 278 | /* | 335 | /* |
| 279 | * If the timer can fire now, we don't need to have a soft timer | 336 | * If the timer can fire now, we don't need to have a soft timer |
| 280 | * scheduled for the future. If the timer cannot fire at all, | 337 | * scheduled for the future. If the timer cannot fire at all, |
| 281 | * then we also don't need a soft timer. | 338 | * then we also don't need a soft timer. |
| 282 | */ | 339 | */ |
| 283 | if (kvm_timer_should_fire(ptimer) || !kvm_timer_irq_can_fire(ptimer)) { | 340 | if (!kvm_timer_irq_can_fire(ctx)) { |
| 284 | soft_timer_cancel(&timer->phys_timer); | 341 | soft_timer_cancel(&ctx->hrtimer); |
| 285 | return; | 342 | return; |
| 286 | } | 343 | } |
| 287 | 344 | ||
| 288 | soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(ptimer)); | 345 | soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx)); |
| 289 | } | 346 | } |
| 290 | 347 | ||
| 291 | /* | 348 | static void timer_save_state(struct arch_timer_context *ctx) |
| 292 | * Check if there was a change in the timer state, so that we should either | ||
| 293 | * raise or lower the line level to the GIC or schedule a background timer to | ||
| 294 | * emulate the physical timer. | ||
| 295 | */ | ||
| 296 | static void kvm_timer_update_state(struct kvm_vcpu *vcpu) | ||
| 297 | { | 349 | { |
| 298 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 350 | struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); |
| 299 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 351 | enum kvm_arch_timers index = arch_timer_ctx_index(ctx); |
| 300 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | 352 | unsigned long flags; |
| 301 | bool level; | ||
| 302 | 353 | ||
| 303 | if (unlikely(!timer->enabled)) | 354 | if (!timer->enabled) |
| 304 | return; | 355 | return; |
| 305 | 356 | ||
| 306 | /* | 357 | local_irq_save(flags); |
| 307 | * The vtimer virtual interrupt is a 'mapped' interrupt, meaning part | ||
| 308 | * of its lifecycle is offloaded to the hardware, and we therefore may | ||
| 309 | * not have lowered the irq.level value before having to signal a new | ||
| 310 | * interrupt, but have to signal an interrupt every time the level is | ||
| 311 | * asserted. | ||
| 312 | */ | ||
| 313 | level = kvm_timer_should_fire(vtimer); | ||
| 314 | kvm_timer_update_irq(vcpu, level, vtimer); | ||
| 315 | 358 | ||
| 316 | phys_timer_emulate(vcpu); | 359 | if (!ctx->loaded) |
| 360 | goto out; | ||
| 317 | 361 | ||
| 318 | if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) | 362 | switch (index) { |
| 319 | kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); | 363 | case TIMER_VTIMER: |
| 320 | } | 364 | ctx->cnt_ctl = read_sysreg_el0(cntv_ctl); |
| 365 | ctx->cnt_cval = read_sysreg_el0(cntv_cval); | ||
| 321 | 366 | ||
| 322 | static void vtimer_save_state(struct kvm_vcpu *vcpu) | 367 | /* Disable the timer */ |
| 323 | { | 368 | write_sysreg_el0(0, cntv_ctl); |
| 324 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 369 | isb(); |
| 325 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | ||
| 326 | unsigned long flags; | ||
| 327 | 370 | ||
| 328 | local_irq_save(flags); | 371 | break; |
| 372 | case TIMER_PTIMER: | ||
| 373 | ctx->cnt_ctl = read_sysreg_el0(cntp_ctl); | ||
| 374 | ctx->cnt_cval = read_sysreg_el0(cntp_cval); | ||
| 329 | 375 | ||
| 330 | if (!vtimer->loaded) | 376 | /* Disable the timer */ |
| 331 | goto out; | 377 | write_sysreg_el0(0, cntp_ctl); |
| 378 | isb(); | ||
| 332 | 379 | ||
| 333 | if (timer->enabled) { | 380 | break; |
| 334 | vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); | 381 | case NR_KVM_TIMERS: |
| 335 | vtimer->cnt_cval = read_sysreg_el0(cntv_cval); | 382 | BUG(); |
| 336 | } | 383 | } |
| 337 | 384 | ||
| 338 | /* Disable the virtual timer */ | 385 | trace_kvm_timer_save_state(ctx); |
| 339 | write_sysreg_el0(0, cntv_ctl); | ||
| 340 | isb(); | ||
| 341 | 386 | ||
| 342 | vtimer->loaded = false; | 387 | ctx->loaded = false; |
| 343 | out: | 388 | out: |
| 344 | local_irq_restore(flags); | 389 | local_irq_restore(flags); |
| 345 | } | 390 | } |
| @@ -349,67 +394,72 @@ out: | |||
| 349 | * thread is removed from its waitqueue and made runnable when there's a timer | 394 | * thread is removed from its waitqueue and made runnable when there's a timer |
| 350 | * interrupt to handle. | 395 | * interrupt to handle. |
| 351 | */ | 396 | */ |
| 352 | void kvm_timer_schedule(struct kvm_vcpu *vcpu) | 397 | static void kvm_timer_blocking(struct kvm_vcpu *vcpu) |
| 353 | { | 398 | { |
| 354 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 399 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
| 355 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 400 | struct timer_map map; |
| 356 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
| 357 | |||
| 358 | vtimer_save_state(vcpu); | ||
| 359 | 401 | ||
| 360 | /* | 402 | get_timer_map(vcpu, &map); |
| 361 | * No need to schedule a background timer if any guest timer has | ||
| 362 | * already expired, because kvm_vcpu_block will return before putting | ||
| 363 | * the thread to sleep. | ||
| 364 | */ | ||
| 365 | if (kvm_timer_should_fire(vtimer) || kvm_timer_should_fire(ptimer)) | ||
| 366 | return; | ||
| 367 | 403 | ||
| 368 | /* | 404 | /* |
| 369 | * If both timers are not capable of raising interrupts (disabled or | 405 | * If no timers are capable of raising interrupts (disabled or |
| 370 | * masked), then there's no more work for us to do. | 406 | * masked), then there's no more work for us to do. |
| 371 | */ | 407 | */ |
| 372 | if (!kvm_timer_irq_can_fire(vtimer) && !kvm_timer_irq_can_fire(ptimer)) | 408 | if (!kvm_timer_irq_can_fire(map.direct_vtimer) && |
| 409 | !kvm_timer_irq_can_fire(map.direct_ptimer) && | ||
| 410 | !kvm_timer_irq_can_fire(map.emul_ptimer)) | ||
| 373 | return; | 411 | return; |
| 374 | 412 | ||
| 375 | /* | 413 | /* |
| 376 | * The guest timers have not yet expired, schedule a background timer. | 414 | * At least one guest time will expire. Schedule a background timer. |
| 377 | * Set the earliest expiration time among the guest timers. | 415 | * Set the earliest expiration time among the guest timers. |
| 378 | */ | 416 | */ |
| 379 | soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu)); | 417 | soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu)); |
| 380 | } | 418 | } |
| 381 | 419 | ||
| 382 | static void vtimer_restore_state(struct kvm_vcpu *vcpu) | 420 | static void kvm_timer_unblocking(struct kvm_vcpu *vcpu) |
| 383 | { | 421 | { |
| 384 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 422 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
| 385 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 423 | |
| 424 | soft_timer_cancel(&timer->bg_timer); | ||
| 425 | } | ||
| 426 | |||
| 427 | static void timer_restore_state(struct arch_timer_context *ctx) | ||
| 428 | { | ||
| 429 | struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); | ||
| 430 | enum kvm_arch_timers index = arch_timer_ctx_index(ctx); | ||
| 386 | unsigned long flags; | 431 | unsigned long flags; |
| 387 | 432 | ||
| 433 | if (!timer->enabled) | ||
| 434 | return; | ||
| 435 | |||
| 388 | local_irq_save(flags); | 436 | local_irq_save(flags); |
| 389 | 437 | ||
| 390 | if (vtimer->loaded) | 438 | if (ctx->loaded) |
| 391 | goto out; | 439 | goto out; |
| 392 | 440 | ||
| 393 | if (timer->enabled) { | 441 | switch (index) { |
| 394 | write_sysreg_el0(vtimer->cnt_cval, cntv_cval); | 442 | case TIMER_VTIMER: |
| 443 | write_sysreg_el0(ctx->cnt_cval, cntv_cval); | ||
| 395 | isb(); | 444 | isb(); |
| 396 | write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl); | 445 | write_sysreg_el0(ctx->cnt_ctl, cntv_ctl); |
| 446 | break; | ||
| 447 | case TIMER_PTIMER: | ||
| 448 | write_sysreg_el0(ctx->cnt_cval, cntp_cval); | ||
| 449 | isb(); | ||
| 450 | write_sysreg_el0(ctx->cnt_ctl, cntp_ctl); | ||
| 451 | break; | ||
| 452 | case NR_KVM_TIMERS: | ||
| 453 | BUG(); | ||
| 397 | } | 454 | } |
| 398 | 455 | ||
| 399 | vtimer->loaded = true; | 456 | trace_kvm_timer_restore_state(ctx); |
| 457 | |||
| 458 | ctx->loaded = true; | ||
| 400 | out: | 459 | out: |
| 401 | local_irq_restore(flags); | 460 | local_irq_restore(flags); |
| 402 | } | 461 | } |
| 403 | 462 | ||
| 404 | void kvm_timer_unschedule(struct kvm_vcpu *vcpu) | ||
| 405 | { | ||
| 406 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | ||
| 407 | |||
| 408 | vtimer_restore_state(vcpu); | ||
| 409 | |||
| 410 | soft_timer_cancel(&timer->bg_timer); | ||
| 411 | } | ||
| 412 | |||
| 413 | static void set_cntvoff(u64 cntvoff) | 463 | static void set_cntvoff(u64 cntvoff) |
| 414 | { | 464 | { |
| 415 | u32 low = lower_32_bits(cntvoff); | 465 | u32 low = lower_32_bits(cntvoff); |
| @@ -425,23 +475,32 @@ static void set_cntvoff(u64 cntvoff) | |||
| 425 | kvm_call_hyp(__kvm_timer_set_cntvoff, low, high); | 475 | kvm_call_hyp(__kvm_timer_set_cntvoff, low, high); |
| 426 | } | 476 | } |
| 427 | 477 | ||
| 428 | static inline void set_vtimer_irq_phys_active(struct kvm_vcpu *vcpu, bool active) | 478 | static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active) |
| 429 | { | 479 | { |
| 430 | int r; | 480 | int r; |
| 431 | r = irq_set_irqchip_state(host_vtimer_irq, IRQCHIP_STATE_ACTIVE, active); | 481 | r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active); |
| 432 | WARN_ON(r); | 482 | WARN_ON(r); |
| 433 | } | 483 | } |
| 434 | 484 | ||
| 435 | static void kvm_timer_vcpu_load_gic(struct kvm_vcpu *vcpu) | 485 | static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) |
| 436 | { | 486 | { |
| 437 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 487 | struct kvm_vcpu *vcpu = ctx->vcpu; |
| 438 | bool phys_active; | 488 | bool phys_active = false; |
| 489 | |||
| 490 | /* | ||
| 491 | * Update the timer output so that it is likely to match the | ||
| 492 | * state we're about to restore. If the timer expires between | ||
| 493 | * this point and the register restoration, we'll take the | ||
| 494 | * interrupt anyway. | ||
| 495 | */ | ||
| 496 | kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx); | ||
| 439 | 497 | ||
| 440 | if (irqchip_in_kernel(vcpu->kvm)) | 498 | if (irqchip_in_kernel(vcpu->kvm)) |
| 441 | phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); | 499 | phys_active = kvm_vgic_map_is_active(vcpu, ctx->irq.irq); |
| 442 | else | 500 | |
| 443 | phys_active = vtimer->irq.level; | 501 | phys_active |= ctx->irq.level; |
| 444 | set_vtimer_irq_phys_active(vcpu, phys_active); | 502 | |
| 503 | set_timer_irq_phys_active(ctx, phys_active); | ||
| 445 | } | 504 | } |
| 446 | 505 | ||
| 447 | static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) | 506 | static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) |
| @@ -466,28 +525,32 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) | |||
| 466 | 525 | ||
| 467 | void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) | 526 | void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) |
| 468 | { | 527 | { |
| 469 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 528 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
| 470 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 529 | struct timer_map map; |
| 471 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
| 472 | 530 | ||
| 473 | if (unlikely(!timer->enabled)) | 531 | if (unlikely(!timer->enabled)) |
| 474 | return; | 532 | return; |
| 475 | 533 | ||
| 476 | if (static_branch_likely(&has_gic_active_state)) | 534 | get_timer_map(vcpu, &map); |
| 477 | kvm_timer_vcpu_load_gic(vcpu); | 535 | |
| 478 | else | 536 | if (static_branch_likely(&has_gic_active_state)) { |
| 537 | kvm_timer_vcpu_load_gic(map.direct_vtimer); | ||
| 538 | if (map.direct_ptimer) | ||
| 539 | kvm_timer_vcpu_load_gic(map.direct_ptimer); | ||
| 540 | } else { | ||
| 479 | kvm_timer_vcpu_load_nogic(vcpu); | 541 | kvm_timer_vcpu_load_nogic(vcpu); |
| 542 | } | ||
| 480 | 543 | ||
| 481 | set_cntvoff(vtimer->cntvoff); | 544 | set_cntvoff(map.direct_vtimer->cntvoff); |
| 482 | 545 | ||
| 483 | vtimer_restore_state(vcpu); | 546 | kvm_timer_unblocking(vcpu); |
| 484 | 547 | ||
| 485 | /* Set the background timer for the physical timer emulation. */ | 548 | timer_restore_state(map.direct_vtimer); |
| 486 | phys_timer_emulate(vcpu); | 549 | if (map.direct_ptimer) |
| 550 | timer_restore_state(map.direct_ptimer); | ||
| 487 | 551 | ||
| 488 | /* If the timer fired while we weren't running, inject it now */ | 552 | if (map.emul_ptimer) |
| 489 | if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) | 553 | timer_emulate(map.emul_ptimer); |
| 490 | kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); | ||
| 491 | } | 554 | } |
| 492 | 555 | ||
| 493 | bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) | 556 | bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) |
| @@ -509,15 +572,20 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) | |||
| 509 | 572 | ||
| 510 | void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) | 573 | void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) |
| 511 | { | 574 | { |
| 512 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 575 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
| 576 | struct timer_map map; | ||
| 513 | 577 | ||
| 514 | if (unlikely(!timer->enabled)) | 578 | if (unlikely(!timer->enabled)) |
| 515 | return; | 579 | return; |
| 516 | 580 | ||
| 517 | vtimer_save_state(vcpu); | 581 | get_timer_map(vcpu, &map); |
| 582 | |||
| 583 | timer_save_state(map.direct_vtimer); | ||
| 584 | if (map.direct_ptimer) | ||
| 585 | timer_save_state(map.direct_ptimer); | ||
| 518 | 586 | ||
| 519 | /* | 587 | /* |
| 520 | * Cancel the physical timer emulation, because the only case where we | 588 | * Cancel soft timer emulation, because the only case where we |
| 521 | * need it after a vcpu_put is in the context of a sleeping VCPU, and | 589 | * need it after a vcpu_put is in the context of a sleeping VCPU, and |
| 522 | * in that case we already factor in the deadline for the physical | 590 | * in that case we already factor in the deadline for the physical |
| 523 | * timer when scheduling the bg_timer. | 591 | * timer when scheduling the bg_timer. |
| @@ -525,7 +593,11 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) | |||
| 525 | * In any case, we re-schedule the hrtimer for the physical timer when | 593 | * In any case, we re-schedule the hrtimer for the physical timer when |
| 526 | * coming back to the VCPU thread in kvm_timer_vcpu_load(). | 594 | * coming back to the VCPU thread in kvm_timer_vcpu_load(). |
| 527 | */ | 595 | */ |
| 528 | soft_timer_cancel(&timer->phys_timer); | 596 | if (map.emul_ptimer) |
| 597 | soft_timer_cancel(&map.emul_ptimer->hrtimer); | ||
| 598 | |||
| 599 | if (swait_active(kvm_arch_vcpu_wq(vcpu))) | ||
| 600 | kvm_timer_blocking(vcpu); | ||
| 529 | 601 | ||
| 530 | /* | 602 | /* |
| 531 | * The kernel may decide to run userspace after calling vcpu_put, so | 603 | * The kernel may decide to run userspace after calling vcpu_put, so |
| @@ -534,8 +606,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) | |||
| 534 | * counter of non-VHE case. For VHE, the virtual counter uses a fixed | 606 | * counter of non-VHE case. For VHE, the virtual counter uses a fixed |
| 535 | * virtual offset of zero, so no need to zero CNTVOFF_EL2 register. | 607 | * virtual offset of zero, so no need to zero CNTVOFF_EL2 register. |
| 536 | */ | 608 | */ |
| 537 | if (!has_vhe()) | 609 | set_cntvoff(0); |
| 538 | set_cntvoff(0); | ||
| 539 | } | 610 | } |
| 540 | 611 | ||
| 541 | /* | 612 | /* |
| @@ -550,7 +621,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) | |||
| 550 | if (!kvm_timer_should_fire(vtimer)) { | 621 | if (!kvm_timer_should_fire(vtimer)) { |
| 551 | kvm_timer_update_irq(vcpu, false, vtimer); | 622 | kvm_timer_update_irq(vcpu, false, vtimer); |
| 552 | if (static_branch_likely(&has_gic_active_state)) | 623 | if (static_branch_likely(&has_gic_active_state)) |
| 553 | set_vtimer_irq_phys_active(vcpu, false); | 624 | set_timer_irq_phys_active(vtimer, false); |
| 554 | else | 625 | else |
| 555 | enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); | 626 | enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); |
| 556 | } | 627 | } |
| @@ -558,7 +629,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) | |||
| 558 | 629 | ||
| 559 | void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) | 630 | void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) |
| 560 | { | 631 | { |
| 561 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 632 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
| 562 | 633 | ||
| 563 | if (unlikely(!timer->enabled)) | 634 | if (unlikely(!timer->enabled)) |
| 564 | return; | 635 | return; |
| @@ -569,9 +640,10 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) | |||
| 569 | 640 | ||
| 570 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) | 641 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) |
| 571 | { | 642 | { |
| 572 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 643 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
| 573 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 644 | struct timer_map map; |
| 574 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | 645 | |
| 646 | get_timer_map(vcpu, &map); | ||
| 575 | 647 | ||
| 576 | /* | 648 | /* |
| 577 | * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 | 649 | * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 |
| @@ -579,12 +651,22 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 579 | * resets the timer to be disabled and unmasked and is compliant with | 651 | * resets the timer to be disabled and unmasked and is compliant with |
| 580 | * the ARMv7 architecture. | 652 | * the ARMv7 architecture. |
| 581 | */ | 653 | */ |
| 582 | vtimer->cnt_ctl = 0; | 654 | vcpu_vtimer(vcpu)->cnt_ctl = 0; |
| 583 | ptimer->cnt_ctl = 0; | 655 | vcpu_ptimer(vcpu)->cnt_ctl = 0; |
| 584 | kvm_timer_update_state(vcpu); | ||
| 585 | 656 | ||
| 586 | if (timer->enabled && irqchip_in_kernel(vcpu->kvm)) | 657 | if (timer->enabled) { |
| 587 | kvm_vgic_reset_mapped_irq(vcpu, vtimer->irq.irq); | 658 | kvm_timer_update_irq(vcpu, false, vcpu_vtimer(vcpu)); |
| 659 | kvm_timer_update_irq(vcpu, false, vcpu_ptimer(vcpu)); | ||
| 660 | |||
| 661 | if (irqchip_in_kernel(vcpu->kvm)) { | ||
| 662 | kvm_vgic_reset_mapped_irq(vcpu, map.direct_vtimer->irq.irq); | ||
| 663 | if (map.direct_ptimer) | ||
| 664 | kvm_vgic_reset_mapped_irq(vcpu, map.direct_ptimer->irq.irq); | ||
| 665 | } | ||
| 666 | } | ||
| 667 | |||
| 668 | if (map.emul_ptimer) | ||
| 669 | soft_timer_cancel(&map.emul_ptimer->hrtimer); | ||
| 588 | 670 | ||
| 589 | return 0; | 671 | return 0; |
| 590 | } | 672 | } |
| @@ -610,56 +692,76 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff) | |||
| 610 | 692 | ||
| 611 | void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) | 693 | void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) |
| 612 | { | 694 | { |
| 613 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 695 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
| 614 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 696 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); |
| 615 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | 697 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); |
| 616 | 698 | ||
| 617 | /* Synchronize cntvoff across all vtimers of a VM. */ | 699 | /* Synchronize cntvoff across all vtimers of a VM. */ |
| 618 | update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); | 700 | update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); |
| 619 | vcpu_ptimer(vcpu)->cntvoff = 0; | 701 | ptimer->cntvoff = 0; |
| 620 | 702 | ||
| 621 | hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 703 | hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
| 622 | timer->bg_timer.function = kvm_bg_timer_expire; | 704 | timer->bg_timer.function = kvm_bg_timer_expire; |
| 623 | 705 | ||
| 624 | hrtimer_init(&timer->phys_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 706 | hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
| 625 | timer->phys_timer.function = kvm_phys_timer_expire; | 707 | hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
| 708 | vtimer->hrtimer.function = kvm_hrtimer_expire; | ||
| 709 | ptimer->hrtimer.function = kvm_hrtimer_expire; | ||
| 626 | 710 | ||
| 627 | vtimer->irq.irq = default_vtimer_irq.irq; | 711 | vtimer->irq.irq = default_vtimer_irq.irq; |
| 628 | ptimer->irq.irq = default_ptimer_irq.irq; | 712 | ptimer->irq.irq = default_ptimer_irq.irq; |
| 713 | |||
| 714 | vtimer->host_timer_irq = host_vtimer_irq; | ||
| 715 | ptimer->host_timer_irq = host_ptimer_irq; | ||
| 716 | |||
| 717 | vtimer->host_timer_irq_flags = host_vtimer_irq_flags; | ||
| 718 | ptimer->host_timer_irq_flags = host_ptimer_irq_flags; | ||
| 719 | |||
| 720 | vtimer->vcpu = vcpu; | ||
| 721 | ptimer->vcpu = vcpu; | ||
| 629 | } | 722 | } |
| 630 | 723 | ||
| 631 | static void kvm_timer_init_interrupt(void *info) | 724 | static void kvm_timer_init_interrupt(void *info) |
| 632 | { | 725 | { |
| 633 | enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); | 726 | enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); |
| 727 | enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags); | ||
| 634 | } | 728 | } |
| 635 | 729 | ||
| 636 | int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) | 730 | int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) |
| 637 | { | 731 | { |
| 638 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 732 | struct arch_timer_context *timer; |
| 639 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | 733 | bool level; |
| 640 | 734 | ||
| 641 | switch (regid) { | 735 | switch (regid) { |
| 642 | case KVM_REG_ARM_TIMER_CTL: | 736 | case KVM_REG_ARM_TIMER_CTL: |
| 643 | vtimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT; | 737 | timer = vcpu_vtimer(vcpu); |
| 738 | kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); | ||
| 644 | break; | 739 | break; |
| 645 | case KVM_REG_ARM_TIMER_CNT: | 740 | case KVM_REG_ARM_TIMER_CNT: |
| 741 | timer = vcpu_vtimer(vcpu); | ||
| 646 | update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value); | 742 | update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value); |
| 647 | break; | 743 | break; |
| 648 | case KVM_REG_ARM_TIMER_CVAL: | 744 | case KVM_REG_ARM_TIMER_CVAL: |
| 649 | vtimer->cnt_cval = value; | 745 | timer = vcpu_vtimer(vcpu); |
| 746 | kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); | ||
| 650 | break; | 747 | break; |
| 651 | case KVM_REG_ARM_PTIMER_CTL: | 748 | case KVM_REG_ARM_PTIMER_CTL: |
| 652 | ptimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT; | 749 | timer = vcpu_ptimer(vcpu); |
| 750 | kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); | ||
| 653 | break; | 751 | break; |
| 654 | case KVM_REG_ARM_PTIMER_CVAL: | 752 | case KVM_REG_ARM_PTIMER_CVAL: |
| 655 | ptimer->cnt_cval = value; | 753 | timer = vcpu_ptimer(vcpu); |
| 754 | kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); | ||
| 656 | break; | 755 | break; |
| 657 | 756 | ||
| 658 | default: | 757 | default: |
| 659 | return -1; | 758 | return -1; |
| 660 | } | 759 | } |
| 661 | 760 | ||
| 662 | kvm_timer_update_state(vcpu); | 761 | level = kvm_timer_should_fire(timer); |
| 762 | kvm_timer_update_irq(vcpu, level, timer); | ||
| 763 | timer_emulate(timer); | ||
| 764 | |||
| 663 | return 0; | 765 | return 0; |
| 664 | } | 766 | } |
| 665 | 767 | ||
| @@ -679,26 +781,113 @@ static u64 read_timer_ctl(struct arch_timer_context *timer) | |||
| 679 | 781 | ||
| 680 | u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) | 782 | u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) |
| 681 | { | 783 | { |
| 682 | struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); | ||
| 683 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | ||
| 684 | |||
| 685 | switch (regid) { | 784 | switch (regid) { |
| 686 | case KVM_REG_ARM_TIMER_CTL: | 785 | case KVM_REG_ARM_TIMER_CTL: |
| 687 | return read_timer_ctl(vtimer); | 786 | return kvm_arm_timer_read(vcpu, |
| 787 | vcpu_vtimer(vcpu), TIMER_REG_CTL); | ||
| 688 | case KVM_REG_ARM_TIMER_CNT: | 788 | case KVM_REG_ARM_TIMER_CNT: |
| 689 | return kvm_phys_timer_read() - vtimer->cntvoff; | 789 | return kvm_arm_timer_read(vcpu, |
| 790 | vcpu_vtimer(vcpu), TIMER_REG_CNT); | ||
| 690 | case KVM_REG_ARM_TIMER_CVAL: | 791 | case KVM_REG_ARM_TIMER_CVAL: |
| 691 | return vtimer->cnt_cval; | 792 | return kvm_arm_timer_read(vcpu, |
| 793 | vcpu_vtimer(vcpu), TIMER_REG_CVAL); | ||
| 692 | case KVM_REG_ARM_PTIMER_CTL: | 794 | case KVM_REG_ARM_PTIMER_CTL: |
| 693 | return read_timer_ctl(ptimer); | 795 | return kvm_arm_timer_read(vcpu, |
| 694 | case KVM_REG_ARM_PTIMER_CVAL: | 796 | vcpu_ptimer(vcpu), TIMER_REG_CTL); |
| 695 | return ptimer->cnt_cval; | ||
| 696 | case KVM_REG_ARM_PTIMER_CNT: | 797 | case KVM_REG_ARM_PTIMER_CNT: |
| 697 | return kvm_phys_timer_read(); | 798 | return kvm_arm_timer_read(vcpu, |
| 799 | vcpu_vtimer(vcpu), TIMER_REG_CNT); | ||
| 800 | case KVM_REG_ARM_PTIMER_CVAL: | ||
| 801 | return kvm_arm_timer_read(vcpu, | ||
| 802 | vcpu_ptimer(vcpu), TIMER_REG_CVAL); | ||
| 698 | } | 803 | } |
| 699 | return (u64)-1; | 804 | return (u64)-1; |
| 700 | } | 805 | } |
| 701 | 806 | ||
| 807 | static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, | ||
| 808 | struct arch_timer_context *timer, | ||
| 809 | enum kvm_arch_timer_regs treg) | ||
| 810 | { | ||
| 811 | u64 val; | ||
| 812 | |||
| 813 | switch (treg) { | ||
| 814 | case TIMER_REG_TVAL: | ||
| 815 | val = kvm_phys_timer_read() - timer->cntvoff - timer->cnt_cval; | ||
| 816 | break; | ||
| 817 | |||
| 818 | case TIMER_REG_CTL: | ||
| 819 | val = read_timer_ctl(timer); | ||
| 820 | break; | ||
| 821 | |||
| 822 | case TIMER_REG_CVAL: | ||
| 823 | val = timer->cnt_cval; | ||
| 824 | break; | ||
| 825 | |||
| 826 | case TIMER_REG_CNT: | ||
| 827 | val = kvm_phys_timer_read() - timer->cntvoff; | ||
| 828 | break; | ||
| 829 | |||
| 830 | default: | ||
| 831 | BUG(); | ||
| 832 | } | ||
| 833 | |||
| 834 | return val; | ||
| 835 | } | ||
| 836 | |||
| 837 | u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, | ||
| 838 | enum kvm_arch_timers tmr, | ||
| 839 | enum kvm_arch_timer_regs treg) | ||
| 840 | { | ||
| 841 | u64 val; | ||
| 842 | |||
| 843 | preempt_disable(); | ||
| 844 | kvm_timer_vcpu_put(vcpu); | ||
| 845 | |||
| 846 | val = kvm_arm_timer_read(vcpu, vcpu_get_timer(vcpu, tmr), treg); | ||
| 847 | |||
| 848 | kvm_timer_vcpu_load(vcpu); | ||
| 849 | preempt_enable(); | ||
| 850 | |||
| 851 | return val; | ||
| 852 | } | ||
| 853 | |||
| 854 | static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, | ||
| 855 | struct arch_timer_context *timer, | ||
| 856 | enum kvm_arch_timer_regs treg, | ||
| 857 | u64 val) | ||
| 858 | { | ||
| 859 | switch (treg) { | ||
| 860 | case TIMER_REG_TVAL: | ||
| 861 | timer->cnt_cval = val - kvm_phys_timer_read() - timer->cntvoff; | ||
| 862 | break; | ||
| 863 | |||
| 864 | case TIMER_REG_CTL: | ||
| 865 | timer->cnt_ctl = val & ~ARCH_TIMER_CTRL_IT_STAT; | ||
| 866 | break; | ||
| 867 | |||
| 868 | case TIMER_REG_CVAL: | ||
| 869 | timer->cnt_cval = val; | ||
| 870 | break; | ||
| 871 | |||
| 872 | default: | ||
| 873 | BUG(); | ||
| 874 | } | ||
| 875 | } | ||
| 876 | |||
| 877 | void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, | ||
| 878 | enum kvm_arch_timers tmr, | ||
| 879 | enum kvm_arch_timer_regs treg, | ||
| 880 | u64 val) | ||
| 881 | { | ||
| 882 | preempt_disable(); | ||
| 883 | kvm_timer_vcpu_put(vcpu); | ||
| 884 | |||
| 885 | kvm_arm_timer_write(vcpu, vcpu_get_timer(vcpu, tmr), treg, val); | ||
| 886 | |||
| 887 | kvm_timer_vcpu_load(vcpu); | ||
| 888 | preempt_enable(); | ||
| 889 | } | ||
| 890 | |||
| 702 | static int kvm_timer_starting_cpu(unsigned int cpu) | 891 | static int kvm_timer_starting_cpu(unsigned int cpu) |
| 703 | { | 892 | { |
| 704 | kvm_timer_init_interrupt(NULL); | 893 | kvm_timer_init_interrupt(NULL); |
| @@ -724,6 +913,8 @@ int kvm_timer_hyp_init(bool has_gic) | |||
| 724 | return -ENODEV; | 913 | return -ENODEV; |
| 725 | } | 914 | } |
| 726 | 915 | ||
| 916 | /* First, do the virtual EL1 timer irq */ | ||
| 917 | |||
| 727 | if (info->virtual_irq <= 0) { | 918 | if (info->virtual_irq <= 0) { |
| 728 | kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", | 919 | kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", |
| 729 | info->virtual_irq); | 920 | info->virtual_irq); |
| @@ -734,15 +925,15 @@ int kvm_timer_hyp_init(bool has_gic) | |||
| 734 | host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq); | 925 | host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq); |
| 735 | if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH && | 926 | if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH && |
| 736 | host_vtimer_irq_flags != IRQF_TRIGGER_LOW) { | 927 | host_vtimer_irq_flags != IRQF_TRIGGER_LOW) { |
| 737 | kvm_err("Invalid trigger for IRQ%d, assuming level low\n", | 928 | kvm_err("Invalid trigger for vtimer IRQ%d, assuming level low\n", |
| 738 | host_vtimer_irq); | 929 | host_vtimer_irq); |
| 739 | host_vtimer_irq_flags = IRQF_TRIGGER_LOW; | 930 | host_vtimer_irq_flags = IRQF_TRIGGER_LOW; |
| 740 | } | 931 | } |
| 741 | 932 | ||
| 742 | err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, | 933 | err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, |
| 743 | "kvm guest timer", kvm_get_running_vcpus()); | 934 | "kvm guest vtimer", kvm_get_running_vcpus()); |
| 744 | if (err) { | 935 | if (err) { |
| 745 | kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n", | 936 | kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n", |
| 746 | host_vtimer_irq, err); | 937 | host_vtimer_irq, err); |
| 747 | return err; | 938 | return err; |
| 748 | } | 939 | } |
| @@ -760,6 +951,43 @@ int kvm_timer_hyp_init(bool has_gic) | |||
| 760 | 951 | ||
| 761 | kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq); | 952 | kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq); |
| 762 | 953 | ||
| 954 | /* Now let's do the physical EL1 timer irq */ | ||
| 955 | |||
| 956 | if (info->physical_irq > 0) { | ||
| 957 | host_ptimer_irq = info->physical_irq; | ||
| 958 | host_ptimer_irq_flags = irq_get_trigger_type(host_ptimer_irq); | ||
| 959 | if (host_ptimer_irq_flags != IRQF_TRIGGER_HIGH && | ||
| 960 | host_ptimer_irq_flags != IRQF_TRIGGER_LOW) { | ||
| 961 | kvm_err("Invalid trigger for ptimer IRQ%d, assuming level low\n", | ||
| 962 | host_ptimer_irq); | ||
| 963 | host_ptimer_irq_flags = IRQF_TRIGGER_LOW; | ||
| 964 | } | ||
| 965 | |||
| 966 | err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler, | ||
| 967 | "kvm guest ptimer", kvm_get_running_vcpus()); | ||
| 968 | if (err) { | ||
| 969 | kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n", | ||
| 970 | host_ptimer_irq, err); | ||
| 971 | return err; | ||
| 972 | } | ||
| 973 | |||
| 974 | if (has_gic) { | ||
| 975 | err = irq_set_vcpu_affinity(host_ptimer_irq, | ||
| 976 | kvm_get_running_vcpus()); | ||
| 977 | if (err) { | ||
| 978 | kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); | ||
| 979 | goto out_free_irq; | ||
| 980 | } | ||
| 981 | } | ||
| 982 | |||
| 983 | kvm_debug("physical timer IRQ%d\n", host_ptimer_irq); | ||
| 984 | } else if (has_vhe()) { | ||
| 985 | kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n", | ||
| 986 | info->physical_irq); | ||
| 987 | err = -ENODEV; | ||
| 988 | goto out_free_irq; | ||
| 989 | } | ||
| 990 | |||
| 763 | cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING, | 991 | cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING, |
| 764 | "kvm/arm/timer:starting", kvm_timer_starting_cpu, | 992 | "kvm/arm/timer:starting", kvm_timer_starting_cpu, |
| 765 | kvm_timer_dying_cpu); | 993 | kvm_timer_dying_cpu); |
| @@ -771,7 +999,7 @@ out_free_irq: | |||
| 771 | 999 | ||
| 772 | void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) | 1000 | void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) |
| 773 | { | 1001 | { |
| 774 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 1002 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
| 775 | 1003 | ||
| 776 | soft_timer_cancel(&timer->bg_timer); | 1004 | soft_timer_cancel(&timer->bg_timer); |
| 777 | } | 1005 | } |
| @@ -807,16 +1035,18 @@ bool kvm_arch_timer_get_input_level(int vintid) | |||
| 807 | 1035 | ||
| 808 | if (vintid == vcpu_vtimer(vcpu)->irq.irq) | 1036 | if (vintid == vcpu_vtimer(vcpu)->irq.irq) |
| 809 | timer = vcpu_vtimer(vcpu); | 1037 | timer = vcpu_vtimer(vcpu); |
| 1038 | else if (vintid == vcpu_ptimer(vcpu)->irq.irq) | ||
| 1039 | timer = vcpu_ptimer(vcpu); | ||
| 810 | else | 1040 | else |
| 811 | BUG(); /* We only map the vtimer so far */ | 1041 | BUG(); |
| 812 | 1042 | ||
| 813 | return kvm_timer_should_fire(timer); | 1043 | return kvm_timer_should_fire(timer); |
| 814 | } | 1044 | } |
| 815 | 1045 | ||
| 816 | int kvm_timer_enable(struct kvm_vcpu *vcpu) | 1046 | int kvm_timer_enable(struct kvm_vcpu *vcpu) |
| 817 | { | 1047 | { |
| 818 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 1048 | struct arch_timer_cpu *timer = vcpu_timer(vcpu); |
| 819 | struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); | 1049 | struct timer_map map; |
| 820 | int ret; | 1050 | int ret; |
| 821 | 1051 | ||
| 822 | if (timer->enabled) | 1052 | if (timer->enabled) |
| @@ -834,19 +1064,33 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) | |||
| 834 | return -EINVAL; | 1064 | return -EINVAL; |
| 835 | } | 1065 | } |
| 836 | 1066 | ||
| 837 | ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq, | 1067 | get_timer_map(vcpu, &map); |
| 1068 | |||
| 1069 | ret = kvm_vgic_map_phys_irq(vcpu, | ||
| 1070 | map.direct_vtimer->host_timer_irq, | ||
| 1071 | map.direct_vtimer->irq.irq, | ||
| 838 | kvm_arch_timer_get_input_level); | 1072 | kvm_arch_timer_get_input_level); |
| 839 | if (ret) | 1073 | if (ret) |
| 840 | return ret; | 1074 | return ret; |
| 841 | 1075 | ||
| 1076 | if (map.direct_ptimer) { | ||
| 1077 | ret = kvm_vgic_map_phys_irq(vcpu, | ||
| 1078 | map.direct_ptimer->host_timer_irq, | ||
| 1079 | map.direct_ptimer->irq.irq, | ||
| 1080 | kvm_arch_timer_get_input_level); | ||
| 1081 | } | ||
| 1082 | |||
| 1083 | if (ret) | ||
| 1084 | return ret; | ||
| 1085 | |||
| 842 | no_vgic: | 1086 | no_vgic: |
| 843 | timer->enabled = 1; | 1087 | timer->enabled = 1; |
| 844 | return 0; | 1088 | return 0; |
| 845 | } | 1089 | } |
| 846 | 1090 | ||
| 847 | /* | 1091 | /* |
| 848 | * On VHE system, we only need to configure trap on physical timer and counter | 1092 | * On VHE system, we only need to configure the EL2 timer trap register once, |
| 849 | * accesses in EL0 and EL1 once, not for every world switch. | 1093 | * not for every world switch. |
| 850 | * The host kernel runs at EL2 with HCR_EL2.TGE == 1, | 1094 | * The host kernel runs at EL2 with HCR_EL2.TGE == 1, |
| 851 | * and this makes those bits have no effect for the host kernel execution. | 1095 | * and this makes those bits have no effect for the host kernel execution. |
| 852 | */ | 1096 | */ |
| @@ -857,11 +1101,11 @@ void kvm_timer_init_vhe(void) | |||
| 857 | u64 val; | 1101 | u64 val; |
| 858 | 1102 | ||
| 859 | /* | 1103 | /* |
| 860 | * Disallow physical timer access for the guest. | 1104 | * VHE systems allow the guest direct access to the EL1 physical |
| 861 | * Physical counter access is allowed. | 1105 | * timer/counter. |
| 862 | */ | 1106 | */ |
| 863 | val = read_sysreg(cnthctl_el2); | 1107 | val = read_sysreg(cnthctl_el2); |
| 864 | val &= ~(CNTHCTL_EL1PCEN << cnthctl_shift); | 1108 | val |= (CNTHCTL_EL1PCEN << cnthctl_shift); |
| 865 | val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); | 1109 | val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); |
| 866 | write_sysreg(val, cnthctl_el2); | 1110 | write_sysreg(val, cnthctl_el2); |
| 867 | } | 1111 | } |
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 9c486fad3f9f..99c37384ba7b 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c | |||
| @@ -65,7 +65,6 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu); | |||
| 65 | /* The VMID used in the VTTBR */ | 65 | /* The VMID used in the VTTBR */ |
| 66 | static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); | 66 | static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); |
| 67 | static u32 kvm_next_vmid; | 67 | static u32 kvm_next_vmid; |
| 68 | static unsigned int kvm_vmid_bits __read_mostly; | ||
| 69 | static DEFINE_SPINLOCK(kvm_vmid_lock); | 68 | static DEFINE_SPINLOCK(kvm_vmid_lock); |
| 70 | 69 | ||
| 71 | static bool vgic_present; | 70 | static bool vgic_present; |
| @@ -142,7 +141,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
| 142 | kvm_vgic_early_init(kvm); | 141 | kvm_vgic_early_init(kvm); |
| 143 | 142 | ||
| 144 | /* Mark the initial VMID generation invalid */ | 143 | /* Mark the initial VMID generation invalid */ |
| 145 | kvm->arch.vmid_gen = 0; | 144 | kvm->arch.vmid.vmid_gen = 0; |
| 146 | 145 | ||
| 147 | /* The maximum number of VCPUs is limited by the host's GIC model */ | 146 | /* The maximum number of VCPUs is limited by the host's GIC model */ |
| 148 | kvm->arch.max_vcpus = vgic_present ? | 147 | kvm->arch.max_vcpus = vgic_present ? |
| @@ -336,13 +335,11 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | |||
| 336 | 335 | ||
| 337 | void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) | 336 | void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) |
| 338 | { | 337 | { |
| 339 | kvm_timer_schedule(vcpu); | ||
| 340 | kvm_vgic_v4_enable_doorbell(vcpu); | 338 | kvm_vgic_v4_enable_doorbell(vcpu); |
| 341 | } | 339 | } |
| 342 | 340 | ||
| 343 | void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) | 341 | void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) |
| 344 | { | 342 | { |
| 345 | kvm_timer_unschedule(vcpu); | ||
| 346 | kvm_vgic_v4_disable_doorbell(vcpu); | 343 | kvm_vgic_v4_disable_doorbell(vcpu); |
| 347 | } | 344 | } |
| 348 | 345 | ||
| @@ -472,37 +469,31 @@ void force_vm_exit(const cpumask_t *mask) | |||
| 472 | 469 | ||
| 473 | /** | 470 | /** |
| 474 | * need_new_vmid_gen - check that the VMID is still valid | 471 | * need_new_vmid_gen - check that the VMID is still valid |
| 475 | * @kvm: The VM's VMID to check | 472 | * @vmid: The VMID to check |
| 476 | * | 473 | * |
| 477 | * return true if there is a new generation of VMIDs being used | 474 | * return true if there is a new generation of VMIDs being used |
| 478 | * | 475 | * |
| 479 | * The hardware supports only 256 values with the value zero reserved for the | 476 | * The hardware supports a limited set of values with the value zero reserved |
| 480 | * host, so we check if an assigned value belongs to a previous generation, | 477 | * for the host, so we check if an assigned value belongs to a previous |
| 481 | * which which requires us to assign a new value. If we're the first to use a | 478 | * generation, which which requires us to assign a new value. If we're the |
| 482 | * VMID for the new generation, we must flush necessary caches and TLBs on all | 479 | * first to use a VMID for the new generation, we must flush necessary caches |
| 483 | * CPUs. | 480 | * and TLBs on all CPUs. |
| 484 | */ | 481 | */ |
| 485 | static bool need_new_vmid_gen(struct kvm *kvm) | 482 | static bool need_new_vmid_gen(struct kvm_vmid *vmid) |
| 486 | { | 483 | { |
| 487 | u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen); | 484 | u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen); |
| 488 | smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */ | 485 | smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */ |
| 489 | return unlikely(READ_ONCE(kvm->arch.vmid_gen) != current_vmid_gen); | 486 | return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen); |
| 490 | } | 487 | } |
| 491 | 488 | ||
| 492 | /** | 489 | /** |
| 493 | * update_vttbr - Update the VTTBR with a valid VMID before the guest runs | 490 | * update_vmid - Update the vmid with a valid VMID for the current generation |
| 494 | * @kvm The guest that we are about to run | 491 | * @kvm: The guest that struct vmid belongs to |
| 495 | * | 492 | * @vmid: The stage-2 VMID information struct |
| 496 | * Called from kvm_arch_vcpu_ioctl_run before entering the guest to ensure the | ||
| 497 | * VM has a valid VMID, otherwise assigns a new one and flushes corresponding | ||
| 498 | * caches and TLBs. | ||
| 499 | */ | 493 | */ |
| 500 | static void update_vttbr(struct kvm *kvm) | 494 | static void update_vmid(struct kvm_vmid *vmid) |
| 501 | { | 495 | { |
| 502 | phys_addr_t pgd_phys; | 496 | if (!need_new_vmid_gen(vmid)) |
| 503 | u64 vmid, cnp = kvm_cpu_has_cnp() ? VTTBR_CNP_BIT : 0; | ||
| 504 | |||
| 505 | if (!need_new_vmid_gen(kvm)) | ||
| 506 | return; | 497 | return; |
| 507 | 498 | ||
| 508 | spin_lock(&kvm_vmid_lock); | 499 | spin_lock(&kvm_vmid_lock); |
| @@ -512,7 +503,7 @@ static void update_vttbr(struct kvm *kvm) | |||
| 512 | * already allocated a valid vmid for this vm, then this vcpu should | 503 | * already allocated a valid vmid for this vm, then this vcpu should |
| 513 | * use the same vmid. | 504 | * use the same vmid. |
| 514 | */ | 505 | */ |
| 515 | if (!need_new_vmid_gen(kvm)) { | 506 | if (!need_new_vmid_gen(vmid)) { |
| 516 | spin_unlock(&kvm_vmid_lock); | 507 | spin_unlock(&kvm_vmid_lock); |
| 517 | return; | 508 | return; |
| 518 | } | 509 | } |
| @@ -536,18 +527,12 @@ static void update_vttbr(struct kvm *kvm) | |||
| 536 | kvm_call_hyp(__kvm_flush_vm_context); | 527 | kvm_call_hyp(__kvm_flush_vm_context); |
| 537 | } | 528 | } |
| 538 | 529 | ||
| 539 | kvm->arch.vmid = kvm_next_vmid; | 530 | vmid->vmid = kvm_next_vmid; |
| 540 | kvm_next_vmid++; | 531 | kvm_next_vmid++; |
| 541 | kvm_next_vmid &= (1 << kvm_vmid_bits) - 1; | 532 | kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1; |
| 542 | |||
| 543 | /* update vttbr to be used with the new vmid */ | ||
| 544 | pgd_phys = virt_to_phys(kvm->arch.pgd); | ||
| 545 | BUG_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm)); | ||
| 546 | vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); | ||
| 547 | kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid | cnp; | ||
| 548 | 533 | ||
| 549 | smp_wmb(); | 534 | smp_wmb(); |
| 550 | WRITE_ONCE(kvm->arch.vmid_gen, atomic64_read(&kvm_vmid_gen)); | 535 | WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen)); |
| 551 | 536 | ||
| 552 | spin_unlock(&kvm_vmid_lock); | 537 | spin_unlock(&kvm_vmid_lock); |
| 553 | } | 538 | } |
| @@ -700,7 +685,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
| 700 | */ | 685 | */ |
| 701 | cond_resched(); | 686 | cond_resched(); |
| 702 | 687 | ||
| 703 | update_vttbr(vcpu->kvm); | 688 | update_vmid(&vcpu->kvm->arch.vmid); |
| 704 | 689 | ||
| 705 | check_vcpu_requests(vcpu); | 690 | check_vcpu_requests(vcpu); |
| 706 | 691 | ||
| @@ -749,7 +734,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
| 749 | */ | 734 | */ |
| 750 | smp_store_mb(vcpu->mode, IN_GUEST_MODE); | 735 | smp_store_mb(vcpu->mode, IN_GUEST_MODE); |
| 751 | 736 | ||
| 752 | if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || | 737 | if (ret <= 0 || need_new_vmid_gen(&vcpu->kvm->arch.vmid) || |
| 753 | kvm_request_pending(vcpu)) { | 738 | kvm_request_pending(vcpu)) { |
| 754 | vcpu->mode = OUTSIDE_GUEST_MODE; | 739 | vcpu->mode = OUTSIDE_GUEST_MODE; |
| 755 | isb(); /* Ensure work in x_flush_hwstate is committed */ | 740 | isb(); /* Ensure work in x_flush_hwstate is committed */ |
| @@ -775,7 +760,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
| 775 | ret = kvm_vcpu_run_vhe(vcpu); | 760 | ret = kvm_vcpu_run_vhe(vcpu); |
| 776 | kvm_arm_vhe_guest_exit(); | 761 | kvm_arm_vhe_guest_exit(); |
| 777 | } else { | 762 | } else { |
| 778 | ret = kvm_call_hyp(__kvm_vcpu_run_nvhe, vcpu); | 763 | ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu); |
| 779 | } | 764 | } |
| 780 | 765 | ||
| 781 | vcpu->mode = OUTSIDE_GUEST_MODE; | 766 | vcpu->mode = OUTSIDE_GUEST_MODE; |
| @@ -1427,10 +1412,6 @@ static inline void hyp_cpu_pm_exit(void) | |||
| 1427 | 1412 | ||
| 1428 | static int init_common_resources(void) | 1413 | static int init_common_resources(void) |
| 1429 | { | 1414 | { |
| 1430 | /* set size of VMID supported by CPU */ | ||
| 1431 | kvm_vmid_bits = kvm_get_vmid_bits(); | ||
| 1432 | kvm_info("%d-bit VMID\n", kvm_vmid_bits); | ||
| 1433 | |||
| 1434 | kvm_set_ipa_limit(); | 1415 | kvm_set_ipa_limit(); |
| 1435 | 1416 | ||
| 1436 | return 0; | 1417 | return 0; |
| @@ -1571,6 +1552,7 @@ static int init_hyp_mode(void) | |||
| 1571 | kvm_cpu_context_t *cpu_ctxt; | 1552 | kvm_cpu_context_t *cpu_ctxt; |
| 1572 | 1553 | ||
| 1573 | cpu_ctxt = per_cpu_ptr(&kvm_host_cpu_state, cpu); | 1554 | cpu_ctxt = per_cpu_ptr(&kvm_host_cpu_state, cpu); |
| 1555 | kvm_init_host_cpu_context(cpu_ctxt, cpu); | ||
| 1574 | err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP); | 1556 | err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP); |
| 1575 | 1557 | ||
| 1576 | if (err) { | 1558 | if (err) { |
| @@ -1581,7 +1563,7 @@ static int init_hyp_mode(void) | |||
| 1581 | 1563 | ||
| 1582 | err = hyp_map_aux_data(); | 1564 | err = hyp_map_aux_data(); |
| 1583 | if (err) | 1565 | if (err) |
| 1584 | kvm_err("Cannot map host auxilary data: %d\n", err); | 1566 | kvm_err("Cannot map host auxiliary data: %d\n", err); |
| 1585 | 1567 | ||
| 1586 | return 0; | 1568 | return 0; |
| 1587 | 1569 | ||
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 9652c453480f..264d92da3240 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c | |||
| @@ -226,7 +226,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) | |||
| 226 | int i; | 226 | int i; |
| 227 | u32 elrsr; | 227 | u32 elrsr; |
| 228 | 228 | ||
| 229 | elrsr = read_gicreg(ICH_ELSR_EL2); | 229 | elrsr = read_gicreg(ICH_ELRSR_EL2); |
| 230 | 230 | ||
| 231 | write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2); | 231 | write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2); |
| 232 | 232 | ||
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index e9d28a7ca673..ffd7acdceac7 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c | |||
| @@ -908,6 +908,7 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, | |||
| 908 | */ | 908 | */ |
| 909 | int kvm_alloc_stage2_pgd(struct kvm *kvm) | 909 | int kvm_alloc_stage2_pgd(struct kvm *kvm) |
| 910 | { | 910 | { |
| 911 | phys_addr_t pgd_phys; | ||
| 911 | pgd_t *pgd; | 912 | pgd_t *pgd; |
| 912 | 913 | ||
| 913 | if (kvm->arch.pgd != NULL) { | 914 | if (kvm->arch.pgd != NULL) { |
| @@ -920,7 +921,12 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) | |||
| 920 | if (!pgd) | 921 | if (!pgd) |
| 921 | return -ENOMEM; | 922 | return -ENOMEM; |
| 922 | 923 | ||
| 924 | pgd_phys = virt_to_phys(pgd); | ||
| 925 | if (WARN_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm))) | ||
| 926 | return -EINVAL; | ||
| 927 | |||
| 923 | kvm->arch.pgd = pgd; | 928 | kvm->arch.pgd = pgd; |
| 929 | kvm->arch.pgd_phys = pgd_phys; | ||
| 924 | return 0; | 930 | return 0; |
| 925 | } | 931 | } |
| 926 | 932 | ||
| @@ -1008,6 +1014,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm) | |||
| 1008 | unmap_stage2_range(kvm, 0, kvm_phys_size(kvm)); | 1014 | unmap_stage2_range(kvm, 0, kvm_phys_size(kvm)); |
| 1009 | pgd = READ_ONCE(kvm->arch.pgd); | 1015 | pgd = READ_ONCE(kvm->arch.pgd); |
| 1010 | kvm->arch.pgd = NULL; | 1016 | kvm->arch.pgd = NULL; |
| 1017 | kvm->arch.pgd_phys = 0; | ||
| 1011 | } | 1018 | } |
| 1012 | spin_unlock(&kvm->mmu_lock); | 1019 | spin_unlock(&kvm->mmu_lock); |
| 1013 | 1020 | ||
| @@ -1396,14 +1403,6 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap) | |||
| 1396 | return false; | 1403 | return false; |
| 1397 | } | 1404 | } |
| 1398 | 1405 | ||
| 1399 | static bool kvm_is_write_fault(struct kvm_vcpu *vcpu) | ||
| 1400 | { | ||
| 1401 | if (kvm_vcpu_trap_is_iabt(vcpu)) | ||
| 1402 | return false; | ||
| 1403 | |||
| 1404 | return kvm_vcpu_dabt_iswrite(vcpu); | ||
| 1405 | } | ||
| 1406 | |||
| 1407 | /** | 1406 | /** |
| 1408 | * stage2_wp_ptes - write protect PMD range | 1407 | * stage2_wp_ptes - write protect PMD range |
| 1409 | * @pmd: pointer to pmd entry | 1408 | * @pmd: pointer to pmd entry |
| @@ -1598,14 +1597,13 @@ static void kvm_send_hwpoison_signal(unsigned long address, | |||
| 1598 | static bool fault_supports_stage2_pmd_mappings(struct kvm_memory_slot *memslot, | 1597 | static bool fault_supports_stage2_pmd_mappings(struct kvm_memory_slot *memslot, |
| 1599 | unsigned long hva) | 1598 | unsigned long hva) |
| 1600 | { | 1599 | { |
| 1601 | gpa_t gpa_start, gpa_end; | 1600 | gpa_t gpa_start; |
| 1602 | hva_t uaddr_start, uaddr_end; | 1601 | hva_t uaddr_start, uaddr_end; |
| 1603 | size_t size; | 1602 | size_t size; |
| 1604 | 1603 | ||
| 1605 | size = memslot->npages * PAGE_SIZE; | 1604 | size = memslot->npages * PAGE_SIZE; |
| 1606 | 1605 | ||
| 1607 | gpa_start = memslot->base_gfn << PAGE_SHIFT; | 1606 | gpa_start = memslot->base_gfn << PAGE_SHIFT; |
| 1608 | gpa_end = gpa_start + size; | ||
| 1609 | 1607 | ||
| 1610 | uaddr_start = memslot->userspace_addr; | 1608 | uaddr_start = memslot->userspace_addr; |
| 1611 | uaddr_end = uaddr_start + size; | 1609 | uaddr_end = uaddr_start + size; |
| @@ -2353,7 +2351,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, | |||
| 2353 | return 0; | 2351 | return 0; |
| 2354 | } | 2352 | } |
| 2355 | 2353 | ||
| 2356 | void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) | 2354 | void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) |
| 2357 | { | 2355 | { |
| 2358 | } | 2356 | } |
| 2359 | 2357 | ||
diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h index 3828beab93f2..204d210d01c2 100644 --- a/virt/kvm/arm/trace.h +++ b/virt/kvm/arm/trace.h | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | #if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) | 2 | #if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) |
| 3 | #define _TRACE_KVM_H | 3 | #define _TRACE_KVM_H |
| 4 | 4 | ||
| 5 | #include <kvm/arm_arch_timer.h> | ||
| 5 | #include <linux/tracepoint.h> | 6 | #include <linux/tracepoint.h> |
| 6 | 7 | ||
| 7 | #undef TRACE_SYSTEM | 8 | #undef TRACE_SYSTEM |
| @@ -262,10 +263,114 @@ TRACE_EVENT(kvm_timer_update_irq, | |||
| 262 | __entry->vcpu_id, __entry->irq, __entry->level) | 263 | __entry->vcpu_id, __entry->irq, __entry->level) |
| 263 | ); | 264 | ); |
| 264 | 265 | ||
| 266 | TRACE_EVENT(kvm_get_timer_map, | ||
| 267 | TP_PROTO(unsigned long vcpu_id, struct timer_map *map), | ||
| 268 | TP_ARGS(vcpu_id, map), | ||
| 269 | |||
| 270 | TP_STRUCT__entry( | ||
| 271 | __field( unsigned long, vcpu_id ) | ||
| 272 | __field( int, direct_vtimer ) | ||
| 273 | __field( int, direct_ptimer ) | ||
| 274 | __field( int, emul_ptimer ) | ||
| 275 | ), | ||
| 276 | |||
| 277 | TP_fast_assign( | ||
| 278 | __entry->vcpu_id = vcpu_id; | ||
| 279 | __entry->direct_vtimer = arch_timer_ctx_index(map->direct_vtimer); | ||
| 280 | __entry->direct_ptimer = | ||
| 281 | (map->direct_ptimer) ? arch_timer_ctx_index(map->direct_ptimer) : -1; | ||
| 282 | __entry->emul_ptimer = | ||
| 283 | (map->emul_ptimer) ? arch_timer_ctx_index(map->emul_ptimer) : -1; | ||
| 284 | ), | ||
| 285 | |||
| 286 | TP_printk("VCPU: %ld, dv: %d, dp: %d, ep: %d", | ||
| 287 | __entry->vcpu_id, | ||
| 288 | __entry->direct_vtimer, | ||
| 289 | __entry->direct_ptimer, | ||
| 290 | __entry->emul_ptimer) | ||
| 291 | ); | ||
| 292 | |||
| 293 | TRACE_EVENT(kvm_timer_save_state, | ||
| 294 | TP_PROTO(struct arch_timer_context *ctx), | ||
| 295 | TP_ARGS(ctx), | ||
| 296 | |||
| 297 | TP_STRUCT__entry( | ||
| 298 | __field( unsigned long, ctl ) | ||
| 299 | __field( unsigned long long, cval ) | ||
| 300 | __field( int, timer_idx ) | ||
| 301 | ), | ||
| 302 | |||
| 303 | TP_fast_assign( | ||
| 304 | __entry->ctl = ctx->cnt_ctl; | ||
| 305 | __entry->cval = ctx->cnt_cval; | ||
| 306 | __entry->timer_idx = arch_timer_ctx_index(ctx); | ||
| 307 | ), | ||
| 308 | |||
| 309 | TP_printk(" CTL: %#08lx CVAL: %#16llx arch_timer_ctx_index: %d", | ||
| 310 | __entry->ctl, | ||
| 311 | __entry->cval, | ||
| 312 | __entry->timer_idx) | ||
| 313 | ); | ||
| 314 | |||
| 315 | TRACE_EVENT(kvm_timer_restore_state, | ||
| 316 | TP_PROTO(struct arch_timer_context *ctx), | ||
| 317 | TP_ARGS(ctx), | ||
| 318 | |||
| 319 | TP_STRUCT__entry( | ||
| 320 | __field( unsigned long, ctl ) | ||
| 321 | __field( unsigned long long, cval ) | ||
| 322 | __field( int, timer_idx ) | ||
| 323 | ), | ||
| 324 | |||
| 325 | TP_fast_assign( | ||
| 326 | __entry->ctl = ctx->cnt_ctl; | ||
| 327 | __entry->cval = ctx->cnt_cval; | ||
| 328 | __entry->timer_idx = arch_timer_ctx_index(ctx); | ||
| 329 | ), | ||
| 330 | |||
| 331 | TP_printk("CTL: %#08lx CVAL: %#16llx arch_timer_ctx_index: %d", | ||
| 332 | __entry->ctl, | ||
| 333 | __entry->cval, | ||
| 334 | __entry->timer_idx) | ||
| 335 | ); | ||
| 336 | |||
| 337 | TRACE_EVENT(kvm_timer_hrtimer_expire, | ||
| 338 | TP_PROTO(struct arch_timer_context *ctx), | ||
| 339 | TP_ARGS(ctx), | ||
| 340 | |||
| 341 | TP_STRUCT__entry( | ||
| 342 | __field( int, timer_idx ) | ||
| 343 | ), | ||
| 344 | |||
| 345 | TP_fast_assign( | ||
| 346 | __entry->timer_idx = arch_timer_ctx_index(ctx); | ||
| 347 | ), | ||
| 348 | |||
| 349 | TP_printk("arch_timer_ctx_index: %d", __entry->timer_idx) | ||
| 350 | ); | ||
| 351 | |||
| 352 | TRACE_EVENT(kvm_timer_emulate, | ||
| 353 | TP_PROTO(struct arch_timer_context *ctx, bool should_fire), | ||
| 354 | TP_ARGS(ctx, should_fire), | ||
| 355 | |||
| 356 | TP_STRUCT__entry( | ||
| 357 | __field( int, timer_idx ) | ||
| 358 | __field( bool, should_fire ) | ||
| 359 | ), | ||
| 360 | |||
| 361 | TP_fast_assign( | ||
| 362 | __entry->timer_idx = arch_timer_ctx_index(ctx); | ||
| 363 | __entry->should_fire = should_fire; | ||
| 364 | ), | ||
| 365 | |||
| 366 | TP_printk("arch_timer_ctx_index: %d (should_fire: %d)", | ||
| 367 | __entry->timer_idx, __entry->should_fire) | ||
| 368 | ); | ||
| 369 | |||
| 265 | #endif /* _TRACE_KVM_H */ | 370 | #endif /* _TRACE_KVM_H */ |
| 266 | 371 | ||
| 267 | #undef TRACE_INCLUDE_PATH | 372 | #undef TRACE_INCLUDE_PATH |
| 268 | #define TRACE_INCLUDE_PATH ../../../virt/kvm/arm | 373 | #define TRACE_INCLUDE_PATH ../../virt/kvm/arm |
| 269 | #undef TRACE_INCLUDE_FILE | 374 | #undef TRACE_INCLUDE_FILE |
| 270 | #define TRACE_INCLUDE_FILE trace | 375 | #define TRACE_INCLUDE_FILE trace |
| 271 | 376 | ||
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 4ee0aeb9a905..408a78eb6a97 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c | |||
| @@ -589,7 +589,7 @@ early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); | |||
| 589 | */ | 589 | */ |
| 590 | int vgic_v3_probe(const struct gic_kvm_info *info) | 590 | int vgic_v3_probe(const struct gic_kvm_info *info) |
| 591 | { | 591 | { |
| 592 | u32 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2); | 592 | u32 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_ich_vtr_el2); |
| 593 | int ret; | 593 | int ret; |
| 594 | 594 | ||
| 595 | /* | 595 | /* |
| @@ -679,7 +679,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu) | |||
| 679 | struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; | 679 | struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; |
| 680 | 680 | ||
| 681 | if (likely(cpu_if->vgic_sre)) | 681 | if (likely(cpu_if->vgic_sre)) |
| 682 | cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr); | 682 | cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr); |
| 683 | 683 | ||
| 684 | kvm_call_hyp(__vgic_v3_save_aprs, vcpu); | 684 | kvm_call_hyp(__vgic_v3_save_aprs, vcpu); |
| 685 | 685 | ||
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 6855cce3e528..5294abb3f178 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c | |||
| @@ -144,7 +144,8 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, | |||
| 144 | if (zone->pio != 1 && zone->pio != 0) | 144 | if (zone->pio != 1 && zone->pio != 0) |
| 145 | return -EINVAL; | 145 | return -EINVAL; |
| 146 | 146 | ||
| 147 | dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); | 147 | dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), |
| 148 | GFP_KERNEL_ACCOUNT); | ||
| 148 | if (!dev) | 149 | if (!dev) |
| 149 | return -ENOMEM; | 150 | return -ENOMEM; |
| 150 | 151 | ||
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index b20b751286fc..4325250afd72 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
| @@ -297,7 +297,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | |||
| 297 | if (!kvm_arch_intc_initialized(kvm)) | 297 | if (!kvm_arch_intc_initialized(kvm)) |
| 298 | return -EAGAIN; | 298 | return -EAGAIN; |
| 299 | 299 | ||
| 300 | irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); | 300 | irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL_ACCOUNT); |
| 301 | if (!irqfd) | 301 | if (!irqfd) |
| 302 | return -ENOMEM; | 302 | return -ENOMEM; |
| 303 | 303 | ||
| @@ -345,7 +345,8 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | |||
| 345 | } | 345 | } |
| 346 | 346 | ||
| 347 | if (!irqfd->resampler) { | 347 | if (!irqfd->resampler) { |
| 348 | resampler = kzalloc(sizeof(*resampler), GFP_KERNEL); | 348 | resampler = kzalloc(sizeof(*resampler), |
| 349 | GFP_KERNEL_ACCOUNT); | ||
| 349 | if (!resampler) { | 350 | if (!resampler) { |
| 350 | ret = -ENOMEM; | 351 | ret = -ENOMEM; |
| 351 | mutex_unlock(&kvm->irqfds.resampler_lock); | 352 | mutex_unlock(&kvm->irqfds.resampler_lock); |
| @@ -797,7 +798,7 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm, | |||
| 797 | if (IS_ERR(eventfd)) | 798 | if (IS_ERR(eventfd)) |
| 798 | return PTR_ERR(eventfd); | 799 | return PTR_ERR(eventfd); |
| 799 | 800 | ||
| 800 | p = kzalloc(sizeof(*p), GFP_KERNEL); | 801 | p = kzalloc(sizeof(*p), GFP_KERNEL_ACCOUNT); |
| 801 | if (!p) { | 802 | if (!p) { |
| 802 | ret = -ENOMEM; | 803 | ret = -ENOMEM; |
| 803 | goto fail; | 804 | goto fail; |
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index b1286c4e0712..3547b0d8c91e 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c | |||
| @@ -196,7 +196,7 @@ int kvm_set_irq_routing(struct kvm *kvm, | |||
| 196 | nr_rt_entries += 1; | 196 | nr_rt_entries += 1; |
| 197 | 197 | ||
| 198 | new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)), | 198 | new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)), |
| 199 | GFP_KERNEL); | 199 | GFP_KERNEL_ACCOUNT); |
| 200 | 200 | ||
| 201 | if (!new) | 201 | if (!new) |
| 202 | return -ENOMEM; | 202 | return -ENOMEM; |
| @@ -208,7 +208,7 @@ int kvm_set_irq_routing(struct kvm *kvm, | |||
| 208 | 208 | ||
| 209 | for (i = 0; i < nr; ++i) { | 209 | for (i = 0; i < nr; ++i) { |
| 210 | r = -ENOMEM; | 210 | r = -ENOMEM; |
| 211 | e = kzalloc(sizeof(*e), GFP_KERNEL); | 211 | e = kzalloc(sizeof(*e), GFP_KERNEL_ACCOUNT); |
| 212 | if (!e) | 212 | if (!e) |
| 213 | goto out; | 213 | goto out; |
| 214 | 214 | ||
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d237d3350a99..f25aa98a94df 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
| @@ -81,6 +81,11 @@ unsigned int halt_poll_ns_grow = 2; | |||
| 81 | module_param(halt_poll_ns_grow, uint, 0644); | 81 | module_param(halt_poll_ns_grow, uint, 0644); |
| 82 | EXPORT_SYMBOL_GPL(halt_poll_ns_grow); | 82 | EXPORT_SYMBOL_GPL(halt_poll_ns_grow); |
| 83 | 83 | ||
| 84 | /* The start value to grow halt_poll_ns from */ | ||
| 85 | unsigned int halt_poll_ns_grow_start = 10000; /* 10us */ | ||
| 86 | module_param(halt_poll_ns_grow_start, uint, 0644); | ||
| 87 | EXPORT_SYMBOL_GPL(halt_poll_ns_grow_start); | ||
| 88 | |||
| 84 | /* Default resets per-vcpu halt_poll_ns . */ | 89 | /* Default resets per-vcpu halt_poll_ns . */ |
| 85 | unsigned int halt_poll_ns_shrink; | 90 | unsigned int halt_poll_ns_shrink; |
| 86 | module_param(halt_poll_ns_shrink, uint, 0644); | 91 | module_param(halt_poll_ns_shrink, uint, 0644); |
| @@ -525,7 +530,7 @@ static struct kvm_memslots *kvm_alloc_memslots(void) | |||
| 525 | int i; | 530 | int i; |
| 526 | struct kvm_memslots *slots; | 531 | struct kvm_memslots *slots; |
| 527 | 532 | ||
| 528 | slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 533 | slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT); |
| 529 | if (!slots) | 534 | if (!slots) |
| 530 | return NULL; | 535 | return NULL; |
| 531 | 536 | ||
| @@ -601,12 +606,12 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) | |||
| 601 | 606 | ||
| 602 | kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries, | 607 | kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries, |
| 603 | sizeof(*kvm->debugfs_stat_data), | 608 | sizeof(*kvm->debugfs_stat_data), |
| 604 | GFP_KERNEL); | 609 | GFP_KERNEL_ACCOUNT); |
| 605 | if (!kvm->debugfs_stat_data) | 610 | if (!kvm->debugfs_stat_data) |
| 606 | return -ENOMEM; | 611 | return -ENOMEM; |
| 607 | 612 | ||
| 608 | for (p = debugfs_entries; p->name; p++) { | 613 | for (p = debugfs_entries; p->name; p++) { |
| 609 | stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL); | 614 | stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT); |
| 610 | if (!stat_data) | 615 | if (!stat_data) |
| 611 | return -ENOMEM; | 616 | return -ENOMEM; |
| 612 | 617 | ||
| @@ -656,12 +661,8 @@ static struct kvm *kvm_create_vm(unsigned long type) | |||
| 656 | struct kvm_memslots *slots = kvm_alloc_memslots(); | 661 | struct kvm_memslots *slots = kvm_alloc_memslots(); |
| 657 | if (!slots) | 662 | if (!slots) |
| 658 | goto out_err_no_srcu; | 663 | goto out_err_no_srcu; |
| 659 | /* | 664 | /* Generations must be different for each address space. */ |
| 660 | * Generations must be different for each address space. | 665 | slots->generation = i; |
| 661 | * Init kvm generation close to the maximum to easily test the | ||
| 662 | * code of handling generation number wrap-around. | ||
| 663 | */ | ||
| 664 | slots->generation = i * 2 - 150; | ||
| 665 | rcu_assign_pointer(kvm->memslots[i], slots); | 666 | rcu_assign_pointer(kvm->memslots[i], slots); |
| 666 | } | 667 | } |
| 667 | 668 | ||
| @@ -671,7 +672,7 @@ static struct kvm *kvm_create_vm(unsigned long type) | |||
| 671 | goto out_err_no_irq_srcu; | 672 | goto out_err_no_irq_srcu; |
| 672 | for (i = 0; i < KVM_NR_BUSES; i++) { | 673 | for (i = 0; i < KVM_NR_BUSES; i++) { |
| 673 | rcu_assign_pointer(kvm->buses[i], | 674 | rcu_assign_pointer(kvm->buses[i], |
| 674 | kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL)); | 675 | kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT)); |
| 675 | if (!kvm->buses[i]) | 676 | if (!kvm->buses[i]) |
| 676 | goto out_err; | 677 | goto out_err; |
| 677 | } | 678 | } |
| @@ -789,7 +790,7 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) | |||
| 789 | { | 790 | { |
| 790 | unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); | 791 | unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); |
| 791 | 792 | ||
| 792 | memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL); | 793 | memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL_ACCOUNT); |
| 793 | if (!memslot->dirty_bitmap) | 794 | if (!memslot->dirty_bitmap) |
| 794 | return -ENOMEM; | 795 | return -ENOMEM; |
| 795 | 796 | ||
| @@ -874,31 +875,34 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, | |||
| 874 | int as_id, struct kvm_memslots *slots) | 875 | int as_id, struct kvm_memslots *slots) |
| 875 | { | 876 | { |
| 876 | struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id); | 877 | struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id); |
| 878 | u64 gen = old_memslots->generation; | ||
| 877 | 879 | ||
| 878 | /* | 880 | WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS); |
| 879 | * Set the low bit in the generation, which disables SPTE caching | 881 | slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS; |
| 880 | * until the end of synchronize_srcu_expedited. | ||
| 881 | */ | ||
| 882 | WARN_ON(old_memslots->generation & 1); | ||
| 883 | slots->generation = old_memslots->generation + 1; | ||
| 884 | 882 | ||
| 885 | rcu_assign_pointer(kvm->memslots[as_id], slots); | 883 | rcu_assign_pointer(kvm->memslots[as_id], slots); |
| 886 | synchronize_srcu_expedited(&kvm->srcu); | 884 | synchronize_srcu_expedited(&kvm->srcu); |
| 887 | 885 | ||
| 888 | /* | 886 | /* |
| 889 | * Increment the new memslot generation a second time. This prevents | 887 | * Increment the new memslot generation a second time, dropping the |
| 890 | * vm exits that race with memslot updates from caching a memslot | 888 | * update in-progress flag and incrementing then generation based on |
| 891 | * generation that will (potentially) be valid forever. | 889 | * the number of address spaces. This provides a unique and easily |
| 892 | * | 890 | * identifiable generation number while the memslots are in flux. |
| 891 | */ | ||
| 892 | gen = slots->generation & ~KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS; | ||
| 893 | |||
| 894 | /* | ||
| 893 | * Generations must be unique even across address spaces. We do not need | 895 | * Generations must be unique even across address spaces. We do not need |
| 894 | * a global counter for that, instead the generation space is evenly split | 896 | * a global counter for that, instead the generation space is evenly split |
| 895 | * across address spaces. For example, with two address spaces, address | 897 | * across address spaces. For example, with two address spaces, address |
| 896 | * space 0 will use generations 0, 4, 8, ... while * address space 1 will | 898 | * space 0 will use generations 0, 2, 4, ... while address space 1 will |
| 897 | * use generations 2, 6, 10, 14, ... | 899 | * use generations 1, 3, 5, ... |
| 898 | */ | 900 | */ |
| 899 | slots->generation += KVM_ADDRESS_SPACE_NUM * 2 - 1; | 901 | gen += KVM_ADDRESS_SPACE_NUM; |
| 902 | |||
| 903 | kvm_arch_memslots_updated(kvm, gen); | ||
| 900 | 904 | ||
| 901 | kvm_arch_memslots_updated(kvm, slots); | 905 | slots->generation = gen; |
| 902 | 906 | ||
| 903 | return old_memslots; | 907 | return old_memslots; |
| 904 | } | 908 | } |
| @@ -1018,7 +1022,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
| 1018 | goto out_free; | 1022 | goto out_free; |
| 1019 | } | 1023 | } |
| 1020 | 1024 | ||
| 1021 | slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 1025 | slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT); |
| 1022 | if (!slots) | 1026 | if (!slots) |
| 1023 | goto out_free; | 1027 | goto out_free; |
| 1024 | memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots)); | 1028 | memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots)); |
| @@ -1201,11 +1205,9 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, | |||
| 1201 | mask = xchg(&dirty_bitmap[i], 0); | 1205 | mask = xchg(&dirty_bitmap[i], 0); |
| 1202 | dirty_bitmap_buffer[i] = mask; | 1206 | dirty_bitmap_buffer[i] = mask; |
| 1203 | 1207 | ||
| 1204 | if (mask) { | 1208 | offset = i * BITS_PER_LONG; |
| 1205 | offset = i * BITS_PER_LONG; | 1209 | kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, |
| 1206 | kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, | 1210 | offset, mask); |
| 1207 | offset, mask); | ||
| 1208 | } | ||
| 1209 | } | 1211 | } |
| 1210 | spin_unlock(&kvm->mmu_lock); | 1212 | spin_unlock(&kvm->mmu_lock); |
| 1211 | } | 1213 | } |
| @@ -2185,20 +2187,23 @@ void kvm_sigset_deactivate(struct kvm_vcpu *vcpu) | |||
| 2185 | 2187 | ||
| 2186 | static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) | 2188 | static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) |
| 2187 | { | 2189 | { |
| 2188 | unsigned int old, val, grow; | 2190 | unsigned int old, val, grow, grow_start; |
| 2189 | 2191 | ||
| 2190 | old = val = vcpu->halt_poll_ns; | 2192 | old = val = vcpu->halt_poll_ns; |
| 2193 | grow_start = READ_ONCE(halt_poll_ns_grow_start); | ||
| 2191 | grow = READ_ONCE(halt_poll_ns_grow); | 2194 | grow = READ_ONCE(halt_poll_ns_grow); |
| 2192 | /* 10us base */ | 2195 | if (!grow) |
| 2193 | if (val == 0 && grow) | 2196 | goto out; |
| 2194 | val = 10000; | 2197 | |
| 2195 | else | 2198 | val *= grow; |
| 2196 | val *= grow; | 2199 | if (val < grow_start) |
| 2200 | val = grow_start; | ||
| 2197 | 2201 | ||
| 2198 | if (val > halt_poll_ns) | 2202 | if (val > halt_poll_ns) |
| 2199 | val = halt_poll_ns; | 2203 | val = halt_poll_ns; |
| 2200 | 2204 | ||
| 2201 | vcpu->halt_poll_ns = val; | 2205 | vcpu->halt_poll_ns = val; |
| 2206 | out: | ||
| 2202 | trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); | 2207 | trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); |
| 2203 | } | 2208 | } |
| 2204 | 2209 | ||
| @@ -2683,7 +2688,7 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
| 2683 | struct kvm_regs *kvm_regs; | 2688 | struct kvm_regs *kvm_regs; |
| 2684 | 2689 | ||
| 2685 | r = -ENOMEM; | 2690 | r = -ENOMEM; |
| 2686 | kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); | 2691 | kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL_ACCOUNT); |
| 2687 | if (!kvm_regs) | 2692 | if (!kvm_regs) |
| 2688 | goto out; | 2693 | goto out; |
| 2689 | r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); | 2694 | r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); |
| @@ -2711,7 +2716,8 @@ out_free1: | |||
| 2711 | break; | 2716 | break; |
| 2712 | } | 2717 | } |
| 2713 | case KVM_GET_SREGS: { | 2718 | case KVM_GET_SREGS: { |
| 2714 | kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); | 2719 | kvm_sregs = kzalloc(sizeof(struct kvm_sregs), |
| 2720 | GFP_KERNEL_ACCOUNT); | ||
| 2715 | r = -ENOMEM; | 2721 | r = -ENOMEM; |
| 2716 | if (!kvm_sregs) | 2722 | if (!kvm_sregs) |
| 2717 | goto out; | 2723 | goto out; |
| @@ -2803,7 +2809,7 @@ out_free1: | |||
| 2803 | break; | 2809 | break; |
| 2804 | } | 2810 | } |
| 2805 | case KVM_GET_FPU: { | 2811 | case KVM_GET_FPU: { |
| 2806 | fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL); | 2812 | fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL_ACCOUNT); |
| 2807 | r = -ENOMEM; | 2813 | r = -ENOMEM; |
| 2808 | if (!fpu) | 2814 | if (!fpu) |
| 2809 | goto out; | 2815 | goto out; |
| @@ -2980,7 +2986,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm, | |||
| 2980 | if (test) | 2986 | if (test) |
| 2981 | return 0; | 2987 | return 0; |
| 2982 | 2988 | ||
| 2983 | dev = kzalloc(sizeof(*dev), GFP_KERNEL); | 2989 | dev = kzalloc(sizeof(*dev), GFP_KERNEL_ACCOUNT); |
| 2984 | if (!dev) | 2990 | if (!dev) |
| 2985 | return -ENOMEM; | 2991 | return -ENOMEM; |
| 2986 | 2992 | ||
| @@ -3625,6 +3631,7 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, | |||
| 3625 | r = __kvm_io_bus_write(vcpu, bus, &range, val); | 3631 | r = __kvm_io_bus_write(vcpu, bus, &range, val); |
| 3626 | return r < 0 ? r : 0; | 3632 | return r < 0 ? r : 0; |
| 3627 | } | 3633 | } |
| 3634 | EXPORT_SYMBOL_GPL(kvm_io_bus_write); | ||
| 3628 | 3635 | ||
| 3629 | /* kvm_io_bus_write_cookie - called under kvm->slots_lock */ | 3636 | /* kvm_io_bus_write_cookie - called under kvm->slots_lock */ |
| 3630 | int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, | 3637 | int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, |
| @@ -3675,7 +3682,6 @@ static int __kvm_io_bus_read(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus, | |||
| 3675 | 3682 | ||
| 3676 | return -EOPNOTSUPP; | 3683 | return -EOPNOTSUPP; |
| 3677 | } | 3684 | } |
| 3678 | EXPORT_SYMBOL_GPL(kvm_io_bus_write); | ||
| 3679 | 3685 | ||
| 3680 | /* kvm_io_bus_read - called under kvm->slots_lock */ | 3686 | /* kvm_io_bus_read - called under kvm->slots_lock */ |
| 3681 | int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, | 3687 | int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, |
| @@ -3697,7 +3703,6 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, | |||
| 3697 | return r < 0 ? r : 0; | 3703 | return r < 0 ? r : 0; |
| 3698 | } | 3704 | } |
| 3699 | 3705 | ||
| 3700 | |||
| 3701 | /* Caller must hold slots_lock. */ | 3706 | /* Caller must hold slots_lock. */ |
| 3702 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | 3707 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
| 3703 | int len, struct kvm_io_device *dev) | 3708 | int len, struct kvm_io_device *dev) |
| @@ -3714,8 +3719,8 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
| 3714 | if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) | 3719 | if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) |
| 3715 | return -ENOSPC; | 3720 | return -ENOSPC; |
| 3716 | 3721 | ||
| 3717 | new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count + 1) * | 3722 | new_bus = kmalloc(struct_size(bus, range, bus->dev_count + 1), |
| 3718 | sizeof(struct kvm_io_range)), GFP_KERNEL); | 3723 | GFP_KERNEL_ACCOUNT); |
| 3719 | if (!new_bus) | 3724 | if (!new_bus) |
| 3720 | return -ENOMEM; | 3725 | return -ENOMEM; |
| 3721 | 3726 | ||
| @@ -3760,8 +3765,8 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | |||
| 3760 | if (i == bus->dev_count) | 3765 | if (i == bus->dev_count) |
| 3761 | return; | 3766 | return; |
| 3762 | 3767 | ||
| 3763 | new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) * | 3768 | new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1), |
| 3764 | sizeof(struct kvm_io_range)), GFP_KERNEL); | 3769 | GFP_KERNEL_ACCOUNT); |
| 3765 | if (!new_bus) { | 3770 | if (!new_bus) { |
| 3766 | pr_err("kvm: failed to shrink bus, removing it completely\n"); | 3771 | pr_err("kvm: failed to shrink bus, removing it completely\n"); |
| 3767 | goto broken; | 3772 | goto broken; |
| @@ -4029,7 +4034,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) | |||
| 4029 | active = kvm_active_vms; | 4034 | active = kvm_active_vms; |
| 4030 | spin_unlock(&kvm_lock); | 4035 | spin_unlock(&kvm_lock); |
| 4031 | 4036 | ||
| 4032 | env = kzalloc(sizeof(*env), GFP_KERNEL); | 4037 | env = kzalloc(sizeof(*env), GFP_KERNEL_ACCOUNT); |
| 4033 | if (!env) | 4038 | if (!env) |
| 4034 | return; | 4039 | return; |
| 4035 | 4040 | ||
| @@ -4045,7 +4050,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) | |||
| 4045 | add_uevent_var(env, "PID=%d", kvm->userspace_pid); | 4050 | add_uevent_var(env, "PID=%d", kvm->userspace_pid); |
| 4046 | 4051 | ||
| 4047 | if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) { | 4052 | if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) { |
| 4048 | char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL); | 4053 | char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT); |
| 4049 | 4054 | ||
| 4050 | if (p) { | 4055 | if (p) { |
| 4051 | tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX); | 4056 | tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX); |
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index d99850c462a1..524cbd20379f 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c | |||
| @@ -219,7 +219,7 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) | |||
| 219 | } | 219 | } |
| 220 | } | 220 | } |
| 221 | 221 | ||
| 222 | kvg = kzalloc(sizeof(*kvg), GFP_KERNEL); | 222 | kvg = kzalloc(sizeof(*kvg), GFP_KERNEL_ACCOUNT); |
| 223 | if (!kvg) { | 223 | if (!kvg) { |
| 224 | mutex_unlock(&kv->lock); | 224 | mutex_unlock(&kv->lock); |
| 225 | kvm_vfio_group_put_external_user(vfio_group); | 225 | kvm_vfio_group_put_external_user(vfio_group); |
| @@ -405,7 +405,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type) | |||
| 405 | if (tmp->ops == &kvm_vfio_ops) | 405 | if (tmp->ops == &kvm_vfio_ops) |
| 406 | return -EBUSY; | 406 | return -EBUSY; |
| 407 | 407 | ||
| 408 | kv = kzalloc(sizeof(*kv), GFP_KERNEL); | 408 | kv = kzalloc(sizeof(*kv), GFP_KERNEL_ACCOUNT); |
| 409 | if (!kv) | 409 | if (!kv) |
| 410 | return -ENOMEM; | 410 | return -ENOMEM; |
| 411 | 411 | ||
