diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-31 11:55:59 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-31 11:55:59 -0400 |
commit | 63fc9c23488d6cf34e4c233e24ba59b7e5548412 (patch) | |
tree | 08188f35d8625be520730e4ae106e8af2ee7b058 | |
parent | 915ee0da5ecb7ac7fd023ae36f01c47ce47a45d1 (diff) | |
parent | 690edec54cbaa0e98dc592aae6864272f48f3c84 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Paolo Bonzini:
"A collection of x86 and ARM bugfixes, and some improvements to
documentation.
On top of this, a cleanup of kvm_para.h headers, which were exported
by some architectures even though they not support KVM at all. This is
responsible for all the Kbuild changes in the diffstat"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (28 commits)
Documentation: kvm: clarify KVM_SET_USER_MEMORY_REGION
KVM: doc: Document the life cycle of a VM and its resources
KVM: selftests: complete IO before migrating guest state
KVM: selftests: disable stack protector for all KVM tests
KVM: selftests: explicitly disable PIE for tests
KVM: selftests: assert on exit reason in CR4/cpuid sync test
KVM: x86: update %rip after emulating IO
x86/kvm/hyper-v: avoid spurious pending stimer on vCPU init
kvm/x86: Move MSR_IA32_ARCH_CAPABILITIES to array emulated_msrs
KVM: x86: Emulate MSR_IA32_ARCH_CAPABILITIES on AMD hosts
kvm: don't redefine flags as something else
kvm: mmu: Used range based flushing in slot_handle_level_range
KVM: export <linux/kvm_para.h> and <asm/kvm_para.h> iif KVM is supported
KVM: x86: remove check on nr_mmu_pages in kvm_arch_commit_memory_region()
kvm: nVMX: Add a vmentry check for HOST_SYSENTER_ESP and HOST_SYSENTER_EIP fields
KVM: SVM: Workaround errata#1096 (insn_len maybe zero on SMAP violation)
KVM: Reject device ioctls from processes other than the VM's creator
KVM: doc: Fix incorrect word ordering regarding supported use of APIs
KVM: x86: fix handling of role.cr4_pae and rename it to 'gpte_size'
KVM: nVMX: Do not inherit quadrant and invalid for the root shadow EPT
...
60 files changed, 409 insertions, 201 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 7de9eee73fcd..67068c47c591 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
@@ -5,25 +5,32 @@ The Definitive KVM (Kernel-based Virtual Machine) API Documentation | |||
5 | ---------------------- | 5 | ---------------------- |
6 | 6 | ||
7 | The kvm API is a set of ioctls that are issued to control various aspects | 7 | The kvm API is a set of ioctls that are issued to control various aspects |
8 | of a virtual machine. The ioctls belong to three classes | 8 | of a virtual machine. The ioctls belong to three classes: |
9 | 9 | ||
10 | - System ioctls: These query and set global attributes which affect the | 10 | - System ioctls: These query and set global attributes which affect the |
11 | whole kvm subsystem. In addition a system ioctl is used to create | 11 | whole kvm subsystem. In addition a system ioctl is used to create |
12 | virtual machines | 12 | virtual machines. |
13 | 13 | ||
14 | - VM ioctls: These query and set attributes that affect an entire virtual | 14 | - VM ioctls: These query and set attributes that affect an entire virtual |
15 | machine, for example memory layout. In addition a VM ioctl is used to | 15 | machine, for example memory layout. In addition a VM ioctl is used to |
16 | create virtual cpus (vcpus). | 16 | create virtual cpus (vcpus) and devices. |
17 | 17 | ||
18 | Only run VM ioctls from the same process (address space) that was used | 18 | VM ioctls must be issued from the same process (address space) that was |
19 | to create the VM. | 19 | used to create the VM. |
20 | 20 | ||
21 | - vcpu ioctls: These query and set attributes that control the operation | 21 | - vcpu ioctls: These query and set attributes that control the operation |
22 | of a single virtual cpu. | 22 | of a single virtual cpu. |
23 | 23 | ||
24 | Only run vcpu ioctls from the same thread that was used to create the | 24 | vcpu ioctls should be issued from the same thread that was used to create |
25 | vcpu. | 25 | the vcpu, except for asynchronous vcpu ioctl that are marked as such in |
26 | the documentation. Otherwise, the first ioctl after switching threads | ||
27 | could see a performance impact. | ||
26 | 28 | ||
29 | - device ioctls: These query and set attributes that control the operation | ||
30 | of a single device. | ||
31 | |||
32 | device ioctls must be issued from the same process (address space) that | ||
33 | was used to create the VM. | ||
27 | 34 | ||
28 | 2. File descriptors | 35 | 2. File descriptors |
29 | ------------------- | 36 | ------------------- |
@@ -32,17 +39,34 @@ The kvm API is centered around file descriptors. An initial | |||
32 | open("/dev/kvm") obtains a handle to the kvm subsystem; this handle | 39 | open("/dev/kvm") obtains a handle to the kvm subsystem; this handle |
33 | can be used to issue system ioctls. A KVM_CREATE_VM ioctl on this | 40 | can be used to issue system ioctls. A KVM_CREATE_VM ioctl on this |
34 | handle will create a VM file descriptor which can be used to issue VM | 41 | handle will create a VM file descriptor which can be used to issue VM |
35 | ioctls. A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu | 42 | ioctls. A KVM_CREATE_VCPU or KVM_CREATE_DEVICE ioctl on a VM fd will |
36 | and return a file descriptor pointing to it. Finally, ioctls on a vcpu | 43 | create a virtual cpu or device and return a file descriptor pointing to |
37 | fd can be used to control the vcpu, including the important task of | 44 | the new resource. Finally, ioctls on a vcpu or device fd can be used |
38 | actually running guest code. | 45 | to control the vcpu or device. For vcpus, this includes the important |
46 | task of actually running guest code. | ||
39 | 47 | ||
40 | In general file descriptors can be migrated among processes by means | 48 | In general file descriptors can be migrated among processes by means |
41 | of fork() and the SCM_RIGHTS facility of unix domain socket. These | 49 | of fork() and the SCM_RIGHTS facility of unix domain socket. These |
42 | kinds of tricks are explicitly not supported by kvm. While they will | 50 | kinds of tricks are explicitly not supported by kvm. While they will |
43 | not cause harm to the host, their actual behavior is not guaranteed by | 51 | not cause harm to the host, their actual behavior is not guaranteed by |
44 | the API. The only supported use is one virtual machine per process, | 52 | the API. See "General description" for details on the ioctl usage |
45 | and one vcpu per thread. | 53 | model that is supported by KVM. |
54 | |||
55 | It is important to note that althought VM ioctls may only be issued from | ||
56 | the process that created the VM, a VM's lifecycle is associated with its | ||
57 | file descriptor, not its creator (process). In other words, the VM and | ||
58 | its resources, *including the associated address space*, are not freed | ||
59 | until the last reference to the VM's file descriptor has been released. | ||
60 | For example, if fork() is issued after ioctl(KVM_CREATE_VM), the VM will | ||
61 | not be freed until both the parent (original) process and its child have | ||
62 | put their references to the VM's file descriptor. | ||
63 | |||
64 | Because a VM's resources are not freed until the last reference to its | ||
65 | file descriptor is released, creating additional references to a VM via | ||
66 | via fork(), dup(), etc... without careful consideration is strongly | ||
67 | discouraged and may have unwanted side effects, e.g. memory allocated | ||
68 | by and on behalf of the VM's process may not be freed/unaccounted when | ||
69 | the VM is shut down. | ||
46 | 70 | ||
47 | 71 | ||
48 | It is important to note that althought VM ioctls may only be issued from | 72 | It is important to note that althought VM ioctls may only be issued from |
@@ -515,11 +539,15 @@ c) KVM_INTERRUPT_SET_LEVEL | |||
515 | Note that any value for 'irq' other than the ones stated above is invalid | 539 | Note that any value for 'irq' other than the ones stated above is invalid |
516 | and incurs unexpected behavior. | 540 | and incurs unexpected behavior. |
517 | 541 | ||
542 | This is an asynchronous vcpu ioctl and can be invoked from any thread. | ||
543 | |||
518 | MIPS: | 544 | MIPS: |
519 | 545 | ||
520 | Queues an external interrupt to be injected into the virtual CPU. A negative | 546 | Queues an external interrupt to be injected into the virtual CPU. A negative |
521 | interrupt number dequeues the interrupt. | 547 | interrupt number dequeues the interrupt. |
522 | 548 | ||
549 | This is an asynchronous vcpu ioctl and can be invoked from any thread. | ||
550 | |||
523 | 551 | ||
524 | 4.17 KVM_DEBUG_GUEST | 552 | 4.17 KVM_DEBUG_GUEST |
525 | 553 | ||
@@ -1086,14 +1114,12 @@ struct kvm_userspace_memory_region { | |||
1086 | #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) | 1114 | #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) |
1087 | #define KVM_MEM_READONLY (1UL << 1) | 1115 | #define KVM_MEM_READONLY (1UL << 1) |
1088 | 1116 | ||
1089 | This ioctl allows the user to create or modify a guest physical memory | 1117 | This ioctl allows the user to create, modify or delete a guest physical |
1090 | slot. When changing an existing slot, it may be moved in the guest | 1118 | memory slot. Bits 0-15 of "slot" specify the slot id and this value |
1091 | physical memory space, or its flags may be modified. It may not be | 1119 | should be less than the maximum number of user memory slots supported per |
1092 | resized. Slots may not overlap in guest physical address space. | 1120 | VM. The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS, |
1093 | Bits 0-15 of "slot" specifies the slot id and this value should be | 1121 | if this capability is supported by the architecture. Slots may not |
1094 | less than the maximum number of user memory slots supported per VM. | 1122 | overlap in guest physical address space. |
1095 | The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS, | ||
1096 | if this capability is supported by the architecture. | ||
1097 | 1123 | ||
1098 | If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot" | 1124 | If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot" |
1099 | specifies the address space which is being modified. They must be | 1125 | specifies the address space which is being modified. They must be |
@@ -1102,6 +1128,10 @@ KVM_CAP_MULTI_ADDRESS_SPACE capability. Slots in separate address spaces | |||
1102 | are unrelated; the restriction on overlapping slots only applies within | 1128 | are unrelated; the restriction on overlapping slots only applies within |
1103 | each address space. | 1129 | each address space. |
1104 | 1130 | ||
1131 | Deleting a slot is done by passing zero for memory_size. When changing | ||
1132 | an existing slot, it may be moved in the guest physical memory space, | ||
1133 | or its flags may be modified, but it may not be resized. | ||
1134 | |||
1105 | Memory for the region is taken starting at the address denoted by the | 1135 | Memory for the region is taken starting at the address denoted by the |
1106 | field userspace_addr, which must point at user addressable memory for | 1136 | field userspace_addr, which must point at user addressable memory for |
1107 | the entire memory slot size. Any object may back this memory, including | 1137 | the entire memory slot size. Any object may back this memory, including |
@@ -2493,7 +2523,7 @@ KVM_S390_MCHK (vm, vcpu) - machine check interrupt; cr 14 bits in parm, | |||
2493 | machine checks needing further payload are not | 2523 | machine checks needing further payload are not |
2494 | supported by this ioctl) | 2524 | supported by this ioctl) |
2495 | 2525 | ||
2496 | Note that the vcpu ioctl is asynchronous to vcpu execution. | 2526 | This is an asynchronous vcpu ioctl and can be invoked from any thread. |
2497 | 2527 | ||
2498 | 4.78 KVM_PPC_GET_HTAB_FD | 2528 | 4.78 KVM_PPC_GET_HTAB_FD |
2499 | 2529 | ||
@@ -3042,8 +3072,7 @@ KVM_S390_INT_EMERGENCY - sigp emergency; parameters in .emerg | |||
3042 | KVM_S390_INT_EXTERNAL_CALL - sigp external call; parameters in .extcall | 3072 | KVM_S390_INT_EXTERNAL_CALL - sigp external call; parameters in .extcall |
3043 | KVM_S390_MCHK - machine check interrupt; parameters in .mchk | 3073 | KVM_S390_MCHK - machine check interrupt; parameters in .mchk |
3044 | 3074 | ||
3045 | 3075 | This is an asynchronous vcpu ioctl and can be invoked from any thread. | |
3046 | Note that the vcpu ioctl is asynchronous to vcpu execution. | ||
3047 | 3076 | ||
3048 | 4.94 KVM_S390_GET_IRQ_STATE | 3077 | 4.94 KVM_S390_GET_IRQ_STATE |
3049 | 3078 | ||
diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt index f365102c80f5..2efe0efc516e 100644 --- a/Documentation/virtual/kvm/mmu.txt +++ b/Documentation/virtual/kvm/mmu.txt | |||
@@ -142,7 +142,7 @@ Shadow pages contain the following information: | |||
142 | If clear, this page corresponds to a guest page table denoted by the gfn | 142 | If clear, this page corresponds to a guest page table denoted by the gfn |
143 | field. | 143 | field. |
144 | role.quadrant: | 144 | role.quadrant: |
145 | When role.cr4_pae=0, the guest uses 32-bit gptes while the host uses 64-bit | 145 | When role.gpte_is_8_bytes=0, the guest uses 32-bit gptes while the host uses 64-bit |
146 | sptes. That means a guest page table contains more ptes than the host, | 146 | sptes. That means a guest page table contains more ptes than the host, |
147 | so multiple shadow pages are needed to shadow one guest page. | 147 | so multiple shadow pages are needed to shadow one guest page. |
148 | For first-level shadow pages, role.quadrant can be 0 or 1 and denotes the | 148 | For first-level shadow pages, role.quadrant can be 0 or 1 and denotes the |
@@ -158,9 +158,9 @@ Shadow pages contain the following information: | |||
158 | The page is invalid and should not be used. It is a root page that is | 158 | The page is invalid and should not be used. It is a root page that is |
159 | currently pinned (by a cpu hardware register pointing to it); once it is | 159 | currently pinned (by a cpu hardware register pointing to it); once it is |
160 | unpinned it will be destroyed. | 160 | unpinned it will be destroyed. |
161 | role.cr4_pae: | 161 | role.gpte_is_8_bytes: |
162 | Contains the value of cr4.pae for which the page is valid (e.g. whether | 162 | Reflects the size of the guest PTE for which the page is valid, i.e. '1' |
163 | 32-bit or 64-bit gptes are in use). | 163 | if 64-bit gptes are in use, '0' if 32-bit gptes are in use. |
164 | role.nxe: | 164 | role.nxe: |
165 | Contains the value of efer.nxe for which the page is valid. | 165 | Contains the value of efer.nxe for which the page is valid. |
166 | role.cr0_wp: | 166 | role.cr0_wp: |
@@ -173,6 +173,9 @@ Shadow pages contain the following information: | |||
173 | Contains the value of cr4.smap && !cr0.wp for which the page is valid | 173 | Contains the value of cr4.smap && !cr0.wp for which the page is valid |
174 | (pages for which this is true are different from other pages; see the | 174 | (pages for which this is true are different from other pages; see the |
175 | treatment of cr0.wp=0 below). | 175 | treatment of cr0.wp=0 below). |
176 | role.ept_sp: | ||
177 | This is a virtual flag to denote a shadowed nested EPT page. ept_sp | ||
178 | is true if "cr0_wp && smap_andnot_wp", an otherwise invalid combination. | ||
176 | role.smm: | 179 | role.smm: |
177 | Is 1 if the page is valid in system management mode. This field | 180 | Is 1 if the page is valid in system management mode. This field |
178 | determines which of the kvm_memslots array was used to build this | 181 | determines which of the kvm_memslots array was used to build this |
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index dc0ab28baca1..70b783333965 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild | |||
@@ -6,6 +6,7 @@ generic-y += exec.h | |||
6 | generic-y += export.h | 6 | generic-y += export.h |
7 | generic-y += fb.h | 7 | generic-y += fb.h |
8 | generic-y += irq_work.h | 8 | generic-y += irq_work.h |
9 | generic-y += kvm_para.h | ||
9 | generic-y += mcs_spinlock.h | 10 | generic-y += mcs_spinlock.h |
10 | generic-y += mm-arch-hooks.h | 11 | generic-y += mm-arch-hooks.h |
11 | generic-y += preempt.h | 12 | generic-y += preempt.h |
diff --git a/arch/alpha/include/uapi/asm/kvm_para.h b/arch/alpha/include/uapi/asm/kvm_para.h deleted file mode 100644 index baacc4996d18..000000000000 --- a/arch/alpha/include/uapi/asm/kvm_para.h +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ | ||
2 | #include <asm-generic/kvm_para.h> | ||
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index b41f8881ecc8..decc306a3b52 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild | |||
@@ -11,6 +11,7 @@ generic-y += hardirq.h | |||
11 | generic-y += hw_irq.h | 11 | generic-y += hw_irq.h |
12 | generic-y += irq_regs.h | 12 | generic-y += irq_regs.h |
13 | generic-y += irq_work.h | 13 | generic-y += irq_work.h |
14 | generic-y += kvm_para.h | ||
14 | generic-y += local.h | 15 | generic-y += local.h |
15 | generic-y += local64.h | 16 | generic-y += local64.h |
16 | generic-y += mcs_spinlock.h | 17 | generic-y += mcs_spinlock.h |
diff --git a/arch/arc/include/uapi/asm/Kbuild b/arch/arc/include/uapi/asm/Kbuild index 755bb11323d8..1c72f04ff75d 100644 --- a/arch/arc/include/uapi/asm/Kbuild +++ b/arch/arc/include/uapi/asm/Kbuild | |||
@@ -1,2 +1 @@ | |||
1 | generic-y += kvm_para.h | ||
2 | generic-y += ucontext.h | generic-y += ucontext.h | |
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 2de96a180166..31de4ab93005 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h | |||
@@ -381,6 +381,17 @@ static inline int kvm_read_guest_lock(struct kvm *kvm, | |||
381 | return ret; | 381 | return ret; |
382 | } | 382 | } |
383 | 383 | ||
384 | static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, | ||
385 | const void *data, unsigned long len) | ||
386 | { | ||
387 | int srcu_idx = srcu_read_lock(&kvm->srcu); | ||
388 | int ret = kvm_write_guest(kvm, gpa, data, len); | ||
389 | |||
390 | srcu_read_unlock(&kvm->srcu, srcu_idx); | ||
391 | |||
392 | return ret; | ||
393 | } | ||
394 | |||
384 | static inline void *kvm_get_hyp_vector(void) | 395 | static inline void *kvm_get_hyp_vector(void) |
385 | { | 396 | { |
386 | switch(read_cpuid_part()) { | 397 | switch(read_cpuid_part()) { |
diff --git a/arch/arm/include/asm/stage2_pgtable.h b/arch/arm/include/asm/stage2_pgtable.h index de2089501b8b..9e11dce55e06 100644 --- a/arch/arm/include/asm/stage2_pgtable.h +++ b/arch/arm/include/asm/stage2_pgtable.h | |||
@@ -75,6 +75,8 @@ static inline bool kvm_stage2_has_pud(struct kvm *kvm) | |||
75 | 75 | ||
76 | #define S2_PMD_MASK PMD_MASK | 76 | #define S2_PMD_MASK PMD_MASK |
77 | #define S2_PMD_SIZE PMD_SIZE | 77 | #define S2_PMD_SIZE PMD_SIZE |
78 | #define S2_PUD_MASK PUD_MASK | ||
79 | #define S2_PUD_SIZE PUD_SIZE | ||
78 | 80 | ||
79 | static inline bool kvm_stage2_has_pmd(struct kvm *kvm) | 81 | static inline bool kvm_stage2_has_pmd(struct kvm *kvm) |
80 | { | 82 | { |
diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild index 23b4464c0995..ce8573157774 100644 --- a/arch/arm/include/uapi/asm/Kbuild +++ b/arch/arm/include/uapi/asm/Kbuild | |||
@@ -3,3 +3,4 @@ | |||
3 | generated-y += unistd-common.h | 3 | generated-y += unistd-common.h |
4 | generated-y += unistd-oabi.h | 4 | generated-y += unistd-oabi.h |
5 | generated-y += unistd-eabi.h | 5 | generated-y += unistd-eabi.h |
6 | generic-y += kvm_para.h | ||
diff --git a/arch/arm/include/uapi/asm/kvm_para.h b/arch/arm/include/uapi/asm/kvm_para.h deleted file mode 100644 index baacc4996d18..000000000000 --- a/arch/arm/include/uapi/asm/kvm_para.h +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ | ||
2 | #include <asm-generic/kvm_para.h> | ||
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index b0742a16c6c9..ebeefcf835e8 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h | |||
@@ -445,6 +445,17 @@ static inline int kvm_read_guest_lock(struct kvm *kvm, | |||
445 | return ret; | 445 | return ret; |
446 | } | 446 | } |
447 | 447 | ||
448 | static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, | ||
449 | const void *data, unsigned long len) | ||
450 | { | ||
451 | int srcu_idx = srcu_read_lock(&kvm->srcu); | ||
452 | int ret = kvm_write_guest(kvm, gpa, data, len); | ||
453 | |||
454 | srcu_read_unlock(&kvm->srcu, srcu_idx); | ||
455 | |||
456 | return ret; | ||
457 | } | ||
458 | |||
448 | #ifdef CONFIG_KVM_INDIRECT_VECTORS | 459 | #ifdef CONFIG_KVM_INDIRECT_VECTORS |
449 | /* | 460 | /* |
450 | * EL2 vectors can be mapped and rerouted in a number of ways, | 461 | * EL2 vectors can be mapped and rerouted in a number of ways, |
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index f16a5f8ff2b4..e2a0500cd7a2 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c | |||
@@ -123,6 +123,9 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) | |||
123 | int ret = -EINVAL; | 123 | int ret = -EINVAL; |
124 | bool loaded; | 124 | bool loaded; |
125 | 125 | ||
126 | /* Reset PMU outside of the non-preemptible section */ | ||
127 | kvm_pmu_vcpu_reset(vcpu); | ||
128 | |||
126 | preempt_disable(); | 129 | preempt_disable(); |
127 | loaded = (vcpu->cpu != -1); | 130 | loaded = (vcpu->cpu != -1); |
128 | if (loaded) | 131 | if (loaded) |
@@ -170,9 +173,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) | |||
170 | vcpu->arch.reset_state.reset = false; | 173 | vcpu->arch.reset_state.reset = false; |
171 | } | 174 | } |
172 | 175 | ||
173 | /* Reset PMU */ | ||
174 | kvm_pmu_vcpu_reset(vcpu); | ||
175 | |||
176 | /* Default workaround setup is enabled (if supported) */ | 176 | /* Default workaround setup is enabled (if supported) */ |
177 | if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL) | 177 | if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL) |
178 | vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG; | 178 | vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG; |
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index 63b4a1705182..249c9f6f26dc 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild | |||
@@ -19,6 +19,7 @@ generic-y += irq_work.h | |||
19 | generic-y += kdebug.h | 19 | generic-y += kdebug.h |
20 | generic-y += kmap_types.h | 20 | generic-y += kmap_types.h |
21 | generic-y += kprobes.h | 21 | generic-y += kprobes.h |
22 | generic-y += kvm_para.h | ||
22 | generic-y += local.h | 23 | generic-y += local.h |
23 | generic-y += mcs_spinlock.h | 24 | generic-y += mcs_spinlock.h |
24 | generic-y += mm-arch-hooks.h | 25 | generic-y += mm-arch-hooks.h |
diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild index 755bb11323d8..1c72f04ff75d 100644 --- a/arch/c6x/include/uapi/asm/Kbuild +++ b/arch/c6x/include/uapi/asm/Kbuild | |||
@@ -1,2 +1 @@ | |||
1 | generic-y += kvm_para.h | ||
2 | generic-y += ucontext.h | generic-y += ucontext.h | |
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index 3e7c8ecf151e..e3dead402e5f 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild | |||
@@ -23,6 +23,7 @@ generic-y += irq_work.h | |||
23 | generic-y += kdebug.h | 23 | generic-y += kdebug.h |
24 | generic-y += kmap_types.h | 24 | generic-y += kmap_types.h |
25 | generic-y += kprobes.h | 25 | generic-y += kprobes.h |
26 | generic-y += kvm_para.h | ||
26 | generic-y += linkage.h | 27 | generic-y += linkage.h |
27 | generic-y += local.h | 28 | generic-y += local.h |
28 | generic-y += local64.h | 29 | generic-y += local64.h |
diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild index 755bb11323d8..1c72f04ff75d 100644 --- a/arch/h8300/include/uapi/asm/Kbuild +++ b/arch/h8300/include/uapi/asm/Kbuild | |||
@@ -1,2 +1 @@ | |||
1 | generic-y += kvm_para.h | ||
2 | generic-y += ucontext.h | generic-y += ucontext.h | |
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index b25fd42aa0f4..d046e8ccdf78 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild | |||
@@ -19,6 +19,7 @@ generic-y += irq_work.h | |||
19 | generic-y += kdebug.h | 19 | generic-y += kdebug.h |
20 | generic-y += kmap_types.h | 20 | generic-y += kmap_types.h |
21 | generic-y += kprobes.h | 21 | generic-y += kprobes.h |
22 | generic-y += kvm_para.h | ||
22 | generic-y += local.h | 23 | generic-y += local.h |
23 | generic-y += local64.h | 24 | generic-y += local64.h |
24 | generic-y += mcs_spinlock.h | 25 | generic-y += mcs_spinlock.h |
diff --git a/arch/hexagon/include/uapi/asm/kvm_para.h b/arch/hexagon/include/uapi/asm/kvm_para.h deleted file mode 100644 index baacc4996d18..000000000000 --- a/arch/hexagon/include/uapi/asm/kvm_para.h +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ | ||
2 | #include <asm-generic/kvm_para.h> | ||
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index 43e21fe3499c..11f191689c9e 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild | |||
@@ -2,6 +2,7 @@ generated-y += syscall_table.h | |||
2 | generic-y += compat.h | 2 | generic-y += compat.h |
3 | generic-y += exec.h | 3 | generic-y += exec.h |
4 | generic-y += irq_work.h | 4 | generic-y += irq_work.h |
5 | generic-y += kvm_para.h | ||
5 | generic-y += mcs_spinlock.h | 6 | generic-y += mcs_spinlock.h |
6 | generic-y += mm-arch-hooks.h | 7 | generic-y += mm-arch-hooks.h |
7 | generic-y += preempt.h | 8 | generic-y += preempt.h |
diff --git a/arch/ia64/include/uapi/asm/Kbuild b/arch/ia64/include/uapi/asm/Kbuild index 20018cb883a9..62a9522af51e 100644 --- a/arch/ia64/include/uapi/asm/Kbuild +++ b/arch/ia64/include/uapi/asm/Kbuild | |||
@@ -1,2 +1 @@ | |||
1 | generated-y += unistd_64.h | generated-y += unistd_64.h | |
2 | generic-y += kvm_para.h | ||
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 95f8f631c4df..2c359d9e80f6 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild | |||
@@ -13,6 +13,7 @@ generic-y += irq_work.h | |||
13 | generic-y += kdebug.h | 13 | generic-y += kdebug.h |
14 | generic-y += kmap_types.h | 14 | generic-y += kmap_types.h |
15 | generic-y += kprobes.h | 15 | generic-y += kprobes.h |
16 | generic-y += kvm_para.h | ||
16 | generic-y += local.h | 17 | generic-y += local.h |
17 | generic-y += local64.h | 18 | generic-y += local64.h |
18 | generic-y += mcs_spinlock.h | 19 | generic-y += mcs_spinlock.h |
diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild index 8a7ad40be463..7417847dc438 100644 --- a/arch/m68k/include/uapi/asm/Kbuild +++ b/arch/m68k/include/uapi/asm/Kbuild | |||
@@ -1,2 +1 @@ | |||
1 | generated-y += unistd_32.h | generated-y += unistd_32.h | |
2 | generic-y += kvm_para.h | ||
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 791cc8d54d0a..1a8285c3f693 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild | |||
@@ -17,6 +17,7 @@ generic-y += irq_work.h | |||
17 | generic-y += kdebug.h | 17 | generic-y += kdebug.h |
18 | generic-y += kmap_types.h | 18 | generic-y += kmap_types.h |
19 | generic-y += kprobes.h | 19 | generic-y += kprobes.h |
20 | generic-y += kvm_para.h | ||
20 | generic-y += linkage.h | 21 | generic-y += linkage.h |
21 | generic-y += local.h | 22 | generic-y += local.h |
22 | generic-y += local64.h | 23 | generic-y += local64.h |
diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild index 3ce84fbb2678..13f59631c576 100644 --- a/arch/microblaze/include/uapi/asm/Kbuild +++ b/arch/microblaze/include/uapi/asm/Kbuild | |||
@@ -1,3 +1,2 @@ | |||
1 | generated-y += unistd_32.h | 1 | generated-y += unistd_32.h |
2 | generic-y += kvm_para.h | ||
3 | generic-y += ucontext.h | 2 | generic-y += ucontext.h |
diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild index 8fde4fa2c34f..88a667d12aaa 100644 --- a/arch/nios2/include/asm/Kbuild +++ b/arch/nios2/include/asm/Kbuild | |||
@@ -23,6 +23,7 @@ generic-y += irq_work.h | |||
23 | generic-y += kdebug.h | 23 | generic-y += kdebug.h |
24 | generic-y += kmap_types.h | 24 | generic-y += kmap_types.h |
25 | generic-y += kprobes.h | 25 | generic-y += kprobes.h |
26 | generic-y += kvm_para.h | ||
26 | generic-y += local.h | 27 | generic-y += local.h |
27 | generic-y += mcs_spinlock.h | 28 | generic-y += mcs_spinlock.h |
28 | generic-y += mm-arch-hooks.h | 29 | generic-y += mm-arch-hooks.h |
diff --git a/arch/nios2/include/uapi/asm/Kbuild b/arch/nios2/include/uapi/asm/Kbuild index 755bb11323d8..1c72f04ff75d 100644 --- a/arch/nios2/include/uapi/asm/Kbuild +++ b/arch/nios2/include/uapi/asm/Kbuild | |||
@@ -1,2 +1 @@ | |||
1 | generic-y += kvm_para.h | ||
2 | generic-y += ucontext.h | generic-y += ucontext.h | |
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 5a73e2956ac4..22aa97136c01 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild | |||
@@ -20,6 +20,7 @@ generic-y += irq_work.h | |||
20 | generic-y += kdebug.h | 20 | generic-y += kdebug.h |
21 | generic-y += kmap_types.h | 21 | generic-y += kmap_types.h |
22 | generic-y += kprobes.h | 22 | generic-y += kprobes.h |
23 | generic-y += kvm_para.h | ||
23 | generic-y += local.h | 24 | generic-y += local.h |
24 | generic-y += mcs_spinlock.h | 25 | generic-y += mcs_spinlock.h |
25 | generic-y += mm-arch-hooks.h | 26 | generic-y += mm-arch-hooks.h |
diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild index 755bb11323d8..1c72f04ff75d 100644 --- a/arch/openrisc/include/uapi/asm/Kbuild +++ b/arch/openrisc/include/uapi/asm/Kbuild | |||
@@ -1,2 +1 @@ | |||
1 | generic-y += kvm_para.h | ||
2 | generic-y += ucontext.h | generic-y += ucontext.h | |
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index 6f49e77d82a2..9bcd0c903dbb 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild | |||
@@ -11,6 +11,7 @@ generic-y += irq_regs.h | |||
11 | generic-y += irq_work.h | 11 | generic-y += irq_work.h |
12 | generic-y += kdebug.h | 12 | generic-y += kdebug.h |
13 | generic-y += kprobes.h | 13 | generic-y += kprobes.h |
14 | generic-y += kvm_para.h | ||
14 | generic-y += local.h | 15 | generic-y += local.h |
15 | generic-y += local64.h | 16 | generic-y += local64.h |
16 | generic-y += mcs_spinlock.h | 17 | generic-y += mcs_spinlock.h |
diff --git a/arch/parisc/include/uapi/asm/Kbuild b/arch/parisc/include/uapi/asm/Kbuild index 22fdbd08cdc8..2bd5b392277c 100644 --- a/arch/parisc/include/uapi/asm/Kbuild +++ b/arch/parisc/include/uapi/asm/Kbuild | |||
@@ -1,3 +1,2 @@ | |||
1 | generated-y += unistd_32.h | 1 | generated-y += unistd_32.h |
2 | generated-y += unistd_64.h | 2 | generated-y += unistd_64.h |
3 | generic-y += kvm_para.h | ||
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index a6ef3fee5f85..7bf2cb680d32 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild | |||
@@ -9,6 +9,7 @@ generic-y += emergency-restart.h | |||
9 | generic-y += exec.h | 9 | generic-y += exec.h |
10 | generic-y += irq_regs.h | 10 | generic-y += irq_regs.h |
11 | generic-y += irq_work.h | 11 | generic-y += irq_work.h |
12 | generic-y += kvm_para.h | ||
12 | generic-y += local.h | 13 | generic-y += local.h |
13 | generic-y += local64.h | 14 | generic-y += local64.h |
14 | generic-y += mcs_spinlock.h | 15 | generic-y += mcs_spinlock.h |
diff --git a/arch/sh/include/uapi/asm/Kbuild b/arch/sh/include/uapi/asm/Kbuild index ecfbd40924dd..b8812c74c1de 100644 --- a/arch/sh/include/uapi/asm/Kbuild +++ b/arch/sh/include/uapi/asm/Kbuild | |||
@@ -1,5 +1,4 @@ | |||
1 | # SPDX-License-Identifier: GPL-2.0 | 1 | # SPDX-License-Identifier: GPL-2.0 |
2 | 2 | ||
3 | generated-y += unistd_32.h | 3 | generated-y += unistd_32.h |
4 | generic-y += kvm_para.h | ||
5 | generic-y += ucontext.h | 4 | generic-y += ucontext.h |
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index b82f64e28f55..a22cfd5c0ee8 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild | |||
@@ -9,6 +9,7 @@ generic-y += exec.h | |||
9 | generic-y += export.h | 9 | generic-y += export.h |
10 | generic-y += irq_regs.h | 10 | generic-y += irq_regs.h |
11 | generic-y += irq_work.h | 11 | generic-y += irq_work.h |
12 | generic-y += kvm_para.h | ||
12 | generic-y += linkage.h | 13 | generic-y += linkage.h |
13 | generic-y += local.h | 14 | generic-y += local.h |
14 | generic-y += local64.h | 15 | generic-y += local64.h |
diff --git a/arch/sparc/include/uapi/asm/kvm_para.h b/arch/sparc/include/uapi/asm/kvm_para.h deleted file mode 100644 index baacc4996d18..000000000000 --- a/arch/sparc/include/uapi/asm/kvm_para.h +++ /dev/null | |||
@@ -1,2 +0,0 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ | ||
2 | #include <asm-generic/kvm_para.h> | ||
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index 1d1544b6ca74..d77d953c04c1 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild | |||
@@ -18,6 +18,7 @@ generic-y += irq_work.h | |||
18 | generic-y += kdebug.h | 18 | generic-y += kdebug.h |
19 | generic-y += kmap_types.h | 19 | generic-y += kmap_types.h |
20 | generic-y += kprobes.h | 20 | generic-y += kprobes.h |
21 | generic-y += kvm_para.h | ||
21 | generic-y += local.h | 22 | generic-y += local.h |
22 | generic-y += mcs_spinlock.h | 23 | generic-y += mcs_spinlock.h |
23 | generic-y += mm-arch-hooks.h | 24 | generic-y += mm-arch-hooks.h |
diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild index 755bb11323d8..1c72f04ff75d 100644 --- a/arch/unicore32/include/uapi/asm/Kbuild +++ b/arch/unicore32/include/uapi/asm/Kbuild | |||
@@ -1,2 +1 @@ | |||
1 | generic-y += kvm_para.h | ||
2 | generic-y += ucontext.h | generic-y += ucontext.h | |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a5db4475e72d..159b5988292f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -253,14 +253,14 @@ struct kvm_mmu_memory_cache { | |||
253 | * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used | 253 | * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used |
254 | * by indirect shadow page can not be more than 15 bits. | 254 | * by indirect shadow page can not be more than 15 bits. |
255 | * | 255 | * |
256 | * Currently, we used 14 bits that are @level, @cr4_pae, @quadrant, @access, | 256 | * Currently, we used 14 bits that are @level, @gpte_is_8_bytes, @quadrant, @access, |
257 | * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp. | 257 | * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp. |
258 | */ | 258 | */ |
259 | union kvm_mmu_page_role { | 259 | union kvm_mmu_page_role { |
260 | u32 word; | 260 | u32 word; |
261 | struct { | 261 | struct { |
262 | unsigned level:4; | 262 | unsigned level:4; |
263 | unsigned cr4_pae:1; | 263 | unsigned gpte_is_8_bytes:1; |
264 | unsigned quadrant:2; | 264 | unsigned quadrant:2; |
265 | unsigned direct:1; | 265 | unsigned direct:1; |
266 | unsigned access:3; | 266 | unsigned access:3; |
@@ -350,6 +350,7 @@ struct kvm_mmu_page { | |||
350 | }; | 350 | }; |
351 | 351 | ||
352 | struct kvm_pio_request { | 352 | struct kvm_pio_request { |
353 | unsigned long linear_rip; | ||
353 | unsigned long count; | 354 | unsigned long count; |
354 | int in; | 355 | int in; |
355 | int port; | 356 | int port; |
@@ -568,6 +569,7 @@ struct kvm_vcpu_arch { | |||
568 | bool tpr_access_reporting; | 569 | bool tpr_access_reporting; |
569 | u64 ia32_xss; | 570 | u64 ia32_xss; |
570 | u64 microcode_version; | 571 | u64 microcode_version; |
572 | u64 arch_capabilities; | ||
571 | 573 | ||
572 | /* | 574 | /* |
573 | * Paging state of the vcpu | 575 | * Paging state of the vcpu |
@@ -1192,6 +1194,8 @@ struct kvm_x86_ops { | |||
1192 | int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu, | 1194 | int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu, |
1193 | uint16_t *vmcs_version); | 1195 | uint16_t *vmcs_version); |
1194 | uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu); | 1196 | uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu); |
1197 | |||
1198 | bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu); | ||
1195 | }; | 1199 | }; |
1196 | 1200 | ||
1197 | struct kvm_arch_async_pf { | 1201 | struct kvm_arch_async_pf { |
@@ -1252,7 +1256,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, | |||
1252 | gfn_t gfn_offset, unsigned long mask); | 1256 | gfn_t gfn_offset, unsigned long mask); |
1253 | void kvm_mmu_zap_all(struct kvm *kvm); | 1257 | void kvm_mmu_zap_all(struct kvm *kvm); |
1254 | void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); | 1258 | void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); |
1255 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); | 1259 | unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm); |
1256 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); | 1260 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); |
1257 | 1261 | ||
1258 | int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3); | 1262 | int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3); |
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 27c43525a05f..421899f6ad7b 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c | |||
@@ -526,7 +526,9 @@ static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, | |||
526 | new_config.enable = 0; | 526 | new_config.enable = 0; |
527 | stimer->config.as_uint64 = new_config.as_uint64; | 527 | stimer->config.as_uint64 = new_config.as_uint64; |
528 | 528 | ||
529 | stimer_mark_pending(stimer, false); | 529 | if (stimer->config.enable) |
530 | stimer_mark_pending(stimer, false); | ||
531 | |||
530 | return 0; | 532 | return 0; |
531 | } | 533 | } |
532 | 534 | ||
@@ -542,7 +544,10 @@ static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, | |||
542 | stimer->config.enable = 0; | 544 | stimer->config.enable = 0; |
543 | else if (stimer->config.auto_enable) | 545 | else if (stimer->config.auto_enable) |
544 | stimer->config.enable = 1; | 546 | stimer->config.enable = 1; |
545 | stimer_mark_pending(stimer, false); | 547 | |
548 | if (stimer->config.enable) | ||
549 | stimer_mark_pending(stimer, false); | ||
550 | |||
546 | return 0; | 551 | return 0; |
547 | } | 552 | } |
548 | 553 | ||
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7837ab001d80..eee455a8a612 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -182,7 +182,7 @@ struct kvm_shadow_walk_iterator { | |||
182 | 182 | ||
183 | static const union kvm_mmu_page_role mmu_base_role_mask = { | 183 | static const union kvm_mmu_page_role mmu_base_role_mask = { |
184 | .cr0_wp = 1, | 184 | .cr0_wp = 1, |
185 | .cr4_pae = 1, | 185 | .gpte_is_8_bytes = 1, |
186 | .nxe = 1, | 186 | .nxe = 1, |
187 | .smep_andnot_wp = 1, | 187 | .smep_andnot_wp = 1, |
188 | .smap_andnot_wp = 1, | 188 | .smap_andnot_wp = 1, |
@@ -2205,6 +2205,7 @@ static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
2205 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | 2205 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, |
2206 | struct list_head *invalid_list); | 2206 | struct list_head *invalid_list); |
2207 | 2207 | ||
2208 | |||
2208 | #define for_each_valid_sp(_kvm, _sp, _gfn) \ | 2209 | #define for_each_valid_sp(_kvm, _sp, _gfn) \ |
2209 | hlist_for_each_entry(_sp, \ | 2210 | hlist_for_each_entry(_sp, \ |
2210 | &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ | 2211 | &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ |
@@ -2215,12 +2216,17 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, | |||
2215 | for_each_valid_sp(_kvm, _sp, _gfn) \ | 2216 | for_each_valid_sp(_kvm, _sp, _gfn) \ |
2216 | if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else | 2217 | if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else |
2217 | 2218 | ||
2219 | static inline bool is_ept_sp(struct kvm_mmu_page *sp) | ||
2220 | { | ||
2221 | return sp->role.cr0_wp && sp->role.smap_andnot_wp; | ||
2222 | } | ||
2223 | |||
2218 | /* @sp->gfn should be write-protected at the call site */ | 2224 | /* @sp->gfn should be write-protected at the call site */ |
2219 | static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 2225 | static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
2220 | struct list_head *invalid_list) | 2226 | struct list_head *invalid_list) |
2221 | { | 2227 | { |
2222 | if (sp->role.cr4_pae != !!is_pae(vcpu) | 2228 | if ((!is_ept_sp(sp) && sp->role.gpte_is_8_bytes != !!is_pae(vcpu)) || |
2223 | || vcpu->arch.mmu->sync_page(vcpu, sp) == 0) { | 2229 | vcpu->arch.mmu->sync_page(vcpu, sp) == 0) { |
2224 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); | 2230 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); |
2225 | return false; | 2231 | return false; |
2226 | } | 2232 | } |
@@ -2423,7 +2429,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
2423 | role.level = level; | 2429 | role.level = level; |
2424 | role.direct = direct; | 2430 | role.direct = direct; |
2425 | if (role.direct) | 2431 | if (role.direct) |
2426 | role.cr4_pae = 0; | 2432 | role.gpte_is_8_bytes = true; |
2427 | role.access = access; | 2433 | role.access = access; |
2428 | if (!vcpu->arch.mmu->direct_map | 2434 | if (!vcpu->arch.mmu->direct_map |
2429 | && vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) { | 2435 | && vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) { |
@@ -4794,7 +4800,6 @@ static union kvm_mmu_role kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu, | |||
4794 | 4800 | ||
4795 | role.base.access = ACC_ALL; | 4801 | role.base.access = ACC_ALL; |
4796 | role.base.nxe = !!is_nx(vcpu); | 4802 | role.base.nxe = !!is_nx(vcpu); |
4797 | role.base.cr4_pae = !!is_pae(vcpu); | ||
4798 | role.base.cr0_wp = is_write_protection(vcpu); | 4803 | role.base.cr0_wp = is_write_protection(vcpu); |
4799 | role.base.smm = is_smm(vcpu); | 4804 | role.base.smm = is_smm(vcpu); |
4800 | role.base.guest_mode = is_guest_mode(vcpu); | 4805 | role.base.guest_mode = is_guest_mode(vcpu); |
@@ -4815,6 +4820,7 @@ kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only) | |||
4815 | role.base.ad_disabled = (shadow_accessed_mask == 0); | 4820 | role.base.ad_disabled = (shadow_accessed_mask == 0); |
4816 | role.base.level = kvm_x86_ops->get_tdp_level(vcpu); | 4821 | role.base.level = kvm_x86_ops->get_tdp_level(vcpu); |
4817 | role.base.direct = true; | 4822 | role.base.direct = true; |
4823 | role.base.gpte_is_8_bytes = true; | ||
4818 | 4824 | ||
4819 | return role; | 4825 | return role; |
4820 | } | 4826 | } |
@@ -4879,6 +4885,7 @@ kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only) | |||
4879 | role.base.smap_andnot_wp = role.ext.cr4_smap && | 4885 | role.base.smap_andnot_wp = role.ext.cr4_smap && |
4880 | !is_write_protection(vcpu); | 4886 | !is_write_protection(vcpu); |
4881 | role.base.direct = !is_paging(vcpu); | 4887 | role.base.direct = !is_paging(vcpu); |
4888 | role.base.gpte_is_8_bytes = !!is_pae(vcpu); | ||
4882 | 4889 | ||
4883 | if (!is_long_mode(vcpu)) | 4890 | if (!is_long_mode(vcpu)) |
4884 | role.base.level = PT32E_ROOT_LEVEL; | 4891 | role.base.level = PT32E_ROOT_LEVEL; |
@@ -4918,18 +4925,26 @@ static union kvm_mmu_role | |||
4918 | kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty, | 4925 | kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty, |
4919 | bool execonly) | 4926 | bool execonly) |
4920 | { | 4927 | { |
4921 | union kvm_mmu_role role; | 4928 | union kvm_mmu_role role = {0}; |
4922 | 4929 | ||
4923 | /* Base role is inherited from root_mmu */ | 4930 | /* SMM flag is inherited from root_mmu */ |
4924 | role.base.word = vcpu->arch.root_mmu.mmu_role.base.word; | 4931 | role.base.smm = vcpu->arch.root_mmu.mmu_role.base.smm; |
4925 | role.ext = kvm_calc_mmu_role_ext(vcpu); | ||
4926 | 4932 | ||
4927 | role.base.level = PT64_ROOT_4LEVEL; | 4933 | role.base.level = PT64_ROOT_4LEVEL; |
4934 | role.base.gpte_is_8_bytes = true; | ||
4928 | role.base.direct = false; | 4935 | role.base.direct = false; |
4929 | role.base.ad_disabled = !accessed_dirty; | 4936 | role.base.ad_disabled = !accessed_dirty; |
4930 | role.base.guest_mode = true; | 4937 | role.base.guest_mode = true; |
4931 | role.base.access = ACC_ALL; | 4938 | role.base.access = ACC_ALL; |
4932 | 4939 | ||
4940 | /* | ||
4941 | * WP=1 and NOT_WP=1 is an impossible combination, use WP and the | ||
4942 | * SMAP variation to denote shadow EPT entries. | ||
4943 | */ | ||
4944 | role.base.cr0_wp = true; | ||
4945 | role.base.smap_andnot_wp = true; | ||
4946 | |||
4947 | role.ext = kvm_calc_mmu_role_ext(vcpu); | ||
4933 | role.ext.execonly = execonly; | 4948 | role.ext.execonly = execonly; |
4934 | 4949 | ||
4935 | return role; | 4950 | return role; |
@@ -5179,7 +5194,7 @@ static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa, | |||
5179 | gpa, bytes, sp->role.word); | 5194 | gpa, bytes, sp->role.word); |
5180 | 5195 | ||
5181 | offset = offset_in_page(gpa); | 5196 | offset = offset_in_page(gpa); |
5182 | pte_size = sp->role.cr4_pae ? 8 : 4; | 5197 | pte_size = sp->role.gpte_is_8_bytes ? 8 : 4; |
5183 | 5198 | ||
5184 | /* | 5199 | /* |
5185 | * Sometimes, the OS only writes the last one bytes to update status | 5200 | * Sometimes, the OS only writes the last one bytes to update status |
@@ -5203,7 +5218,7 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte) | |||
5203 | page_offset = offset_in_page(gpa); | 5218 | page_offset = offset_in_page(gpa); |
5204 | level = sp->role.level; | 5219 | level = sp->role.level; |
5205 | *nspte = 1; | 5220 | *nspte = 1; |
5206 | if (!sp->role.cr4_pae) { | 5221 | if (!sp->role.gpte_is_8_bytes) { |
5207 | page_offset <<= 1; /* 32->64 */ | 5222 | page_offset <<= 1; /* 32->64 */ |
5208 | /* | 5223 | /* |
5209 | * A 32-bit pde maps 4MB while the shadow pdes map | 5224 | * A 32-bit pde maps 4MB while the shadow pdes map |
@@ -5393,10 +5408,12 @@ emulate: | |||
5393 | * This can happen if a guest gets a page-fault on data access but the HW | 5408 | * This can happen if a guest gets a page-fault on data access but the HW |
5394 | * table walker is not able to read the instruction page (e.g instruction | 5409 | * table walker is not able to read the instruction page (e.g instruction |
5395 | * page is not present in memory). In those cases we simply restart the | 5410 | * page is not present in memory). In those cases we simply restart the |
5396 | * guest. | 5411 | * guest, with the exception of AMD Erratum 1096 which is unrecoverable. |
5397 | */ | 5412 | */ |
5398 | if (unlikely(insn && !insn_len)) | 5413 | if (unlikely(insn && !insn_len)) { |
5399 | return 1; | 5414 | if (!kvm_x86_ops->need_emulation_on_page_fault(vcpu)) |
5415 | return 1; | ||
5416 | } | ||
5400 | 5417 | ||
5401 | er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len); | 5418 | er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len); |
5402 | 5419 | ||
@@ -5509,7 +5526,9 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, | |||
5509 | 5526 | ||
5510 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { | 5527 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { |
5511 | if (flush && lock_flush_tlb) { | 5528 | if (flush && lock_flush_tlb) { |
5512 | kvm_flush_remote_tlbs(kvm); | 5529 | kvm_flush_remote_tlbs_with_address(kvm, |
5530 | start_gfn, | ||
5531 | iterator.gfn - start_gfn + 1); | ||
5513 | flush = false; | 5532 | flush = false; |
5514 | } | 5533 | } |
5515 | cond_resched_lock(&kvm->mmu_lock); | 5534 | cond_resched_lock(&kvm->mmu_lock); |
@@ -5517,7 +5536,8 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, | |||
5517 | } | 5536 | } |
5518 | 5537 | ||
5519 | if (flush && lock_flush_tlb) { | 5538 | if (flush && lock_flush_tlb) { |
5520 | kvm_flush_remote_tlbs(kvm); | 5539 | kvm_flush_remote_tlbs_with_address(kvm, start_gfn, |
5540 | end_gfn - start_gfn + 1); | ||
5521 | flush = false; | 5541 | flush = false; |
5522 | } | 5542 | } |
5523 | 5543 | ||
@@ -6011,7 +6031,7 @@ out: | |||
6011 | /* | 6031 | /* |
6012 | * Calculate mmu pages needed for kvm. | 6032 | * Calculate mmu pages needed for kvm. |
6013 | */ | 6033 | */ |
6014 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) | 6034 | unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm) |
6015 | { | 6035 | { |
6016 | unsigned int nr_mmu_pages; | 6036 | unsigned int nr_mmu_pages; |
6017 | unsigned int nr_pages = 0; | 6037 | unsigned int nr_pages = 0; |
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 9f6c855a0043..dd30dccd2ad5 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
@@ -29,10 +29,10 @@ | |||
29 | \ | 29 | \ |
30 | role.word = __entry->role; \ | 30 | role.word = __entry->role; \ |
31 | \ | 31 | \ |
32 | trace_seq_printf(p, "sp gfn %llx l%u%s q%u%s %s%s" \ | 32 | trace_seq_printf(p, "sp gfn %llx l%u %u-byte q%u%s %s%s" \ |
33 | " %snxe %sad root %u %s%c", \ | 33 | " %snxe %sad root %u %s%c", \ |
34 | __entry->gfn, role.level, \ | 34 | __entry->gfn, role.level, \ |
35 | role.cr4_pae ? " pae" : "", \ | 35 | role.gpte_is_8_bytes ? 8 : 4, \ |
36 | role.quadrant, \ | 36 | role.quadrant, \ |
37 | role.direct ? " direct" : "", \ | 37 | role.direct ? " direct" : "", \ |
38 | access_str[role.access], \ | 38 | access_str[role.access], \ |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b5b128a0a051..426039285fd1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -7098,6 +7098,36 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu, | |||
7098 | return -ENODEV; | 7098 | return -ENODEV; |
7099 | } | 7099 | } |
7100 | 7100 | ||
7101 | static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) | ||
7102 | { | ||
7103 | bool is_user, smap; | ||
7104 | |||
7105 | is_user = svm_get_cpl(vcpu) == 3; | ||
7106 | smap = !kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); | ||
7107 | |||
7108 | /* | ||
7109 | * Detect and workaround Errata 1096 Fam_17h_00_0Fh | ||
7110 | * | ||
7111 | * In non SEV guest, hypervisor will be able to read the guest | ||
7112 | * memory to decode the instruction pointer when insn_len is zero | ||
7113 | * so we return true to indicate that decoding is possible. | ||
7114 | * | ||
7115 | * But in the SEV guest, the guest memory is encrypted with the | ||
7116 | * guest specific key and hypervisor will not be able to decode the | ||
7117 | * instruction pointer so we will not able to workaround it. Lets | ||
7118 | * print the error and request to kill the guest. | ||
7119 | */ | ||
7120 | if (is_user && smap) { | ||
7121 | if (!sev_guest(vcpu->kvm)) | ||
7122 | return true; | ||
7123 | |||
7124 | pr_err_ratelimited("KVM: Guest triggered AMD Erratum 1096\n"); | ||
7125 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); | ||
7126 | } | ||
7127 | |||
7128 | return false; | ||
7129 | } | ||
7130 | |||
7101 | static struct kvm_x86_ops svm_x86_ops __ro_after_init = { | 7131 | static struct kvm_x86_ops svm_x86_ops __ro_after_init = { |
7102 | .cpu_has_kvm_support = has_svm, | 7132 | .cpu_has_kvm_support = has_svm, |
7103 | .disabled_by_bios = is_disabled, | 7133 | .disabled_by_bios = is_disabled, |
@@ -7231,6 +7261,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { | |||
7231 | 7261 | ||
7232 | .nested_enable_evmcs = nested_enable_evmcs, | 7262 | .nested_enable_evmcs = nested_enable_evmcs, |
7233 | .nested_get_evmcs_version = nested_get_evmcs_version, | 7263 | .nested_get_evmcs_version = nested_get_evmcs_version, |
7264 | |||
7265 | .need_emulation_on_page_fault = svm_need_emulation_on_page_fault, | ||
7234 | }; | 7266 | }; |
7235 | 7267 | ||
7236 | static int __init svm_init(void) | 7268 | static int __init svm_init(void) |
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index f24a2c225070..153e539c29c9 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c | |||
@@ -2585,6 +2585,11 @@ static int nested_check_host_control_regs(struct kvm_vcpu *vcpu, | |||
2585 | !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) || | 2585 | !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) || |
2586 | !nested_cr3_valid(vcpu, vmcs12->host_cr3)) | 2586 | !nested_cr3_valid(vcpu, vmcs12->host_cr3)) |
2587 | return -EINVAL; | 2587 | return -EINVAL; |
2588 | |||
2589 | if (is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu) || | ||
2590 | is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu)) | ||
2591 | return -EINVAL; | ||
2592 | |||
2588 | /* | 2593 | /* |
2589 | * If the load IA32_EFER VM-exit control is 1, bits reserved in the | 2594 | * If the load IA32_EFER VM-exit control is 1, bits reserved in the |
2590 | * IA32_EFER MSR must be 0 in the field for that register. In addition, | 2595 | * IA32_EFER MSR must be 0 in the field for that register. In addition, |
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c73375e01ab8..ab432a930ae8 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c | |||
@@ -1683,12 +1683,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
1683 | 1683 | ||
1684 | msr_info->data = to_vmx(vcpu)->spec_ctrl; | 1684 | msr_info->data = to_vmx(vcpu)->spec_ctrl; |
1685 | break; | 1685 | break; |
1686 | case MSR_IA32_ARCH_CAPABILITIES: | ||
1687 | if (!msr_info->host_initiated && | ||
1688 | !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) | ||
1689 | return 1; | ||
1690 | msr_info->data = to_vmx(vcpu)->arch_capabilities; | ||
1691 | break; | ||
1692 | case MSR_IA32_SYSENTER_CS: | 1686 | case MSR_IA32_SYSENTER_CS: |
1693 | msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); | 1687 | msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); |
1694 | break; | 1688 | break; |
@@ -1895,11 +1889,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
1895 | vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, | 1889 | vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, |
1896 | MSR_TYPE_W); | 1890 | MSR_TYPE_W); |
1897 | break; | 1891 | break; |
1898 | case MSR_IA32_ARCH_CAPABILITIES: | ||
1899 | if (!msr_info->host_initiated) | ||
1900 | return 1; | ||
1901 | vmx->arch_capabilities = data; | ||
1902 | break; | ||
1903 | case MSR_IA32_CR_PAT: | 1892 | case MSR_IA32_CR_PAT: |
1904 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { | 1893 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { |
1905 | if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) | 1894 | if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) |
@@ -4088,8 +4077,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
4088 | ++vmx->nmsrs; | 4077 | ++vmx->nmsrs; |
4089 | } | 4078 | } |
4090 | 4079 | ||
4091 | vmx->arch_capabilities = kvm_get_arch_capabilities(); | ||
4092 | |||
4093 | vm_exit_controls_init(vmx, vmx_vmexit_ctrl()); | 4080 | vm_exit_controls_init(vmx, vmx_vmexit_ctrl()); |
4094 | 4081 | ||
4095 | /* 22.2.1, 20.8.1 */ | 4082 | /* 22.2.1, 20.8.1 */ |
@@ -7409,6 +7396,11 @@ static int enable_smi_window(struct kvm_vcpu *vcpu) | |||
7409 | return 0; | 7396 | return 0; |
7410 | } | 7397 | } |
7411 | 7398 | ||
7399 | static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) | ||
7400 | { | ||
7401 | return 0; | ||
7402 | } | ||
7403 | |||
7412 | static __init int hardware_setup(void) | 7404 | static __init int hardware_setup(void) |
7413 | { | 7405 | { |
7414 | unsigned long host_bndcfgs; | 7406 | unsigned long host_bndcfgs; |
@@ -7711,6 +7703,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { | |||
7711 | .set_nested_state = NULL, | 7703 | .set_nested_state = NULL, |
7712 | .get_vmcs12_pages = NULL, | 7704 | .get_vmcs12_pages = NULL, |
7713 | .nested_enable_evmcs = NULL, | 7705 | .nested_enable_evmcs = NULL, |
7706 | .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault, | ||
7714 | }; | 7707 | }; |
7715 | 7708 | ||
7716 | static void vmx_cleanup_l1d_flush(void) | 7709 | static void vmx_cleanup_l1d_flush(void) |
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 1554cb45b393..a1e00d0a2482 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h | |||
@@ -190,7 +190,6 @@ struct vcpu_vmx { | |||
190 | u64 msr_guest_kernel_gs_base; | 190 | u64 msr_guest_kernel_gs_base; |
191 | #endif | 191 | #endif |
192 | 192 | ||
193 | u64 arch_capabilities; | ||
194 | u64 spec_ctrl; | 193 | u64 spec_ctrl; |
195 | 194 | ||
196 | u32 vm_entry_controls_shadow; | 195 | u32 vm_entry_controls_shadow; |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 65e4559eef2f..099b851dabaf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -1125,7 +1125,7 @@ static u32 msrs_to_save[] = { | |||
1125 | #endif | 1125 | #endif |
1126 | MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, | 1126 | MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, |
1127 | MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, | 1127 | MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, |
1128 | MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES, | 1128 | MSR_IA32_SPEC_CTRL, |
1129 | MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH, | 1129 | MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH, |
1130 | MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK, | 1130 | MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK, |
1131 | MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B, | 1131 | MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B, |
@@ -1158,6 +1158,7 @@ static u32 emulated_msrs[] = { | |||
1158 | 1158 | ||
1159 | MSR_IA32_TSC_ADJUST, | 1159 | MSR_IA32_TSC_ADJUST, |
1160 | MSR_IA32_TSCDEADLINE, | 1160 | MSR_IA32_TSCDEADLINE, |
1161 | MSR_IA32_ARCH_CAPABILITIES, | ||
1161 | MSR_IA32_MISC_ENABLE, | 1162 | MSR_IA32_MISC_ENABLE, |
1162 | MSR_IA32_MCG_STATUS, | 1163 | MSR_IA32_MCG_STATUS, |
1163 | MSR_IA32_MCG_CTL, | 1164 | MSR_IA32_MCG_CTL, |
@@ -2443,6 +2444,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2443 | if (msr_info->host_initiated) | 2444 | if (msr_info->host_initiated) |
2444 | vcpu->arch.microcode_version = data; | 2445 | vcpu->arch.microcode_version = data; |
2445 | break; | 2446 | break; |
2447 | case MSR_IA32_ARCH_CAPABILITIES: | ||
2448 | if (!msr_info->host_initiated) | ||
2449 | return 1; | ||
2450 | vcpu->arch.arch_capabilities = data; | ||
2451 | break; | ||
2446 | case MSR_EFER: | 2452 | case MSR_EFER: |
2447 | return set_efer(vcpu, data); | 2453 | return set_efer(vcpu, data); |
2448 | case MSR_K7_HWCR: | 2454 | case MSR_K7_HWCR: |
@@ -2747,6 +2753,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2747 | case MSR_IA32_UCODE_REV: | 2753 | case MSR_IA32_UCODE_REV: |
2748 | msr_info->data = vcpu->arch.microcode_version; | 2754 | msr_info->data = vcpu->arch.microcode_version; |
2749 | break; | 2755 | break; |
2756 | case MSR_IA32_ARCH_CAPABILITIES: | ||
2757 | if (!msr_info->host_initiated && | ||
2758 | !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) | ||
2759 | return 1; | ||
2760 | msr_info->data = vcpu->arch.arch_capabilities; | ||
2761 | break; | ||
2750 | case MSR_IA32_TSC: | 2762 | case MSR_IA32_TSC: |
2751 | msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset; | 2763 | msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset; |
2752 | break; | 2764 | break; |
@@ -6523,14 +6535,27 @@ int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, | |||
6523 | } | 6535 | } |
6524 | EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer); | 6536 | EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer); |
6525 | 6537 | ||
6538 | static int complete_fast_pio_out(struct kvm_vcpu *vcpu) | ||
6539 | { | ||
6540 | vcpu->arch.pio.count = 0; | ||
6541 | |||
6542 | if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip))) | ||
6543 | return 1; | ||
6544 | |||
6545 | return kvm_skip_emulated_instruction(vcpu); | ||
6546 | } | ||
6547 | |||
6526 | static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, | 6548 | static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, |
6527 | unsigned short port) | 6549 | unsigned short port) |
6528 | { | 6550 | { |
6529 | unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); | 6551 | unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); |
6530 | int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, | 6552 | int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, |
6531 | size, port, &val, 1); | 6553 | size, port, &val, 1); |
6532 | /* do not return to emulator after return from userspace */ | 6554 | |
6533 | vcpu->arch.pio.count = 0; | 6555 | if (!ret) { |
6556 | vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu); | ||
6557 | vcpu->arch.complete_userspace_io = complete_fast_pio_out; | ||
6558 | } | ||
6534 | return ret; | 6559 | return ret; |
6535 | } | 6560 | } |
6536 | 6561 | ||
@@ -6541,6 +6566,11 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu) | |||
6541 | /* We should only ever be called with arch.pio.count equal to 1 */ | 6566 | /* We should only ever be called with arch.pio.count equal to 1 */ |
6542 | BUG_ON(vcpu->arch.pio.count != 1); | 6567 | BUG_ON(vcpu->arch.pio.count != 1); |
6543 | 6568 | ||
6569 | if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip))) { | ||
6570 | vcpu->arch.pio.count = 0; | ||
6571 | return 1; | ||
6572 | } | ||
6573 | |||
6544 | /* For size less than 4 we merge, else we zero extend */ | 6574 | /* For size less than 4 we merge, else we zero extend */ |
6545 | val = (vcpu->arch.pio.size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX) | 6575 | val = (vcpu->arch.pio.size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX) |
6546 | : 0; | 6576 | : 0; |
@@ -6553,7 +6583,7 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu) | |||
6553 | vcpu->arch.pio.port, &val, 1); | 6583 | vcpu->arch.pio.port, &val, 1); |
6554 | kvm_register_write(vcpu, VCPU_REGS_RAX, val); | 6584 | kvm_register_write(vcpu, VCPU_REGS_RAX, val); |
6555 | 6585 | ||
6556 | return 1; | 6586 | return kvm_skip_emulated_instruction(vcpu); |
6557 | } | 6587 | } |
6558 | 6588 | ||
6559 | static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, | 6589 | static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, |
@@ -6572,6 +6602,7 @@ static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, | |||
6572 | return ret; | 6602 | return ret; |
6573 | } | 6603 | } |
6574 | 6604 | ||
6605 | vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu); | ||
6575 | vcpu->arch.complete_userspace_io = complete_fast_pio_in; | 6606 | vcpu->arch.complete_userspace_io = complete_fast_pio_in; |
6576 | 6607 | ||
6577 | return 0; | 6608 | return 0; |
@@ -6579,16 +6610,13 @@ static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, | |||
6579 | 6610 | ||
6580 | int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in) | 6611 | int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in) |
6581 | { | 6612 | { |
6582 | int ret = kvm_skip_emulated_instruction(vcpu); | 6613 | int ret; |
6583 | 6614 | ||
6584 | /* | ||
6585 | * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered | ||
6586 | * KVM_EXIT_DEBUG here. | ||
6587 | */ | ||
6588 | if (in) | 6615 | if (in) |
6589 | return kvm_fast_pio_in(vcpu, size, port) && ret; | 6616 | ret = kvm_fast_pio_in(vcpu, size, port); |
6590 | else | 6617 | else |
6591 | return kvm_fast_pio_out(vcpu, size, port) && ret; | 6618 | ret = kvm_fast_pio_out(vcpu, size, port); |
6619 | return ret && kvm_skip_emulated_instruction(vcpu); | ||
6592 | } | 6620 | } |
6593 | EXPORT_SYMBOL_GPL(kvm_fast_pio); | 6621 | EXPORT_SYMBOL_GPL(kvm_fast_pio); |
6594 | 6622 | ||
@@ -8733,6 +8761,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, | |||
8733 | 8761 | ||
8734 | int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | 8762 | int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) |
8735 | { | 8763 | { |
8764 | vcpu->arch.arch_capabilities = kvm_get_arch_capabilities(); | ||
8736 | vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT; | 8765 | vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT; |
8737 | kvm_vcpu_mtrr_init(vcpu); | 8766 | kvm_vcpu_mtrr_init(vcpu); |
8738 | vcpu_load(vcpu); | 8767 | vcpu_load(vcpu); |
@@ -9429,13 +9458,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
9429 | const struct kvm_memory_slot *new, | 9458 | const struct kvm_memory_slot *new, |
9430 | enum kvm_mr_change change) | 9459 | enum kvm_mr_change change) |
9431 | { | 9460 | { |
9432 | int nr_mmu_pages = 0; | ||
9433 | |||
9434 | if (!kvm->arch.n_requested_mmu_pages) | 9461 | if (!kvm->arch.n_requested_mmu_pages) |
9435 | nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); | 9462 | kvm_mmu_change_mmu_pages(kvm, |
9436 | 9463 | kvm_mmu_calculate_default_mmu_pages(kvm)); | |
9437 | if (nr_mmu_pages) | ||
9438 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); | ||
9439 | 9464 | ||
9440 | /* | 9465 | /* |
9441 | * Dirty logging tracks sptes in 4k granularity, meaning that large | 9466 | * Dirty logging tracks sptes in 4k granularity, meaning that large |
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 42b6cb3d16f7..3843198e03d4 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild | |||
@@ -15,6 +15,7 @@ generic-y += irq_work.h | |||
15 | generic-y += kdebug.h | 15 | generic-y += kdebug.h |
16 | generic-y += kmap_types.h | 16 | generic-y += kmap_types.h |
17 | generic-y += kprobes.h | 17 | generic-y += kprobes.h |
18 | generic-y += kvm_para.h | ||
18 | generic-y += local.h | 19 | generic-y += local.h |
19 | generic-y += local64.h | 20 | generic-y += local64.h |
20 | generic-y += mcs_spinlock.h | 21 | generic-y += mcs_spinlock.h |
diff --git a/arch/xtensa/include/uapi/asm/Kbuild b/arch/xtensa/include/uapi/asm/Kbuild index 8a7ad40be463..7417847dc438 100644 --- a/arch/xtensa/include/uapi/asm/Kbuild +++ b/arch/xtensa/include/uapi/asm/Kbuild | |||
@@ -1,2 +1 @@ | |||
1 | generated-y += unistd_32.h | generated-y += unistd_32.h | |
2 | generic-y += kvm_para.h | ||
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 5f24b50c9e88..059dc2bedaf6 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild | |||
@@ -7,5 +7,7 @@ no-export-headers += kvm.h | |||
7 | endif | 7 | endif |
8 | 8 | ||
9 | ifeq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/kvm_para.h),) | 9 | ifeq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/kvm_para.h),) |
10 | ifeq ($(wildcard $(objtree)/arch/$(SRCARCH)/include/generated/uapi/asm/kvm_para.h),) | ||
10 | no-export-headers += kvm_para.h | 11 | no-export-headers += kvm_para.h |
11 | endif | 12 | endif |
13 | endif | ||
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 3c1f4bdf9000..7514fcea91a7 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile | |||
@@ -29,8 +29,8 @@ LIBKVM += $(LIBKVM_$(UNAME_M)) | |||
29 | INSTALL_HDR_PATH = $(top_srcdir)/usr | 29 | INSTALL_HDR_PATH = $(top_srcdir)/usr |
30 | LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ | 30 | LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ |
31 | LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include | 31 | LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include |
32 | CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I.. | 32 | CFLAGS += -O2 -g -std=gnu99 -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I.. |
33 | LDFLAGS += -pthread | 33 | LDFLAGS += -pthread -no-pie |
34 | 34 | ||
35 | # After inclusion, $(OUTPUT) is defined and | 35 | # After inclusion, $(OUTPUT) is defined and |
36 | # $(TEST_GEN_PROGS) starts with $(OUTPUT)/ | 36 | # $(TEST_GEN_PROGS) starts with $(OUTPUT)/ |
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index a84785b02557..07b71ad9734a 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h | |||
@@ -102,6 +102,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); | |||
102 | struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid); | 102 | struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid); |
103 | void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); | 103 | void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); |
104 | int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); | 104 | int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); |
105 | void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid); | ||
105 | void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, | 106 | void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, |
106 | struct kvm_mp_state *mp_state); | 107 | struct kvm_mp_state *mp_state); |
107 | void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); | 108 | void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); |
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index b52cfdefecbf..efa0aad8b3c6 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c | |||
@@ -1121,6 +1121,22 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) | |||
1121 | return rc; | 1121 | return rc; |
1122 | } | 1122 | } |
1123 | 1123 | ||
1124 | void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid) | ||
1125 | { | ||
1126 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
1127 | int ret; | ||
1128 | |||
1129 | TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||
1130 | |||
1131 | vcpu->state->immediate_exit = 1; | ||
1132 | ret = ioctl(vcpu->fd, KVM_RUN, NULL); | ||
1133 | vcpu->state->immediate_exit = 0; | ||
1134 | |||
1135 | TEST_ASSERT(ret == -1 && errno == EINTR, | ||
1136 | "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i", | ||
1137 | ret, errno); | ||
1138 | } | ||
1139 | |||
1124 | /* | 1140 | /* |
1125 | * VM VCPU Set MP State | 1141 | * VM VCPU Set MP State |
1126 | * | 1142 | * |
diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c index d503a51fad30..7c2c4d4055a8 100644 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c | |||
@@ -87,22 +87,25 @@ int main(int argc, char *argv[]) | |||
87 | while (1) { | 87 | while (1) { |
88 | rc = _vcpu_run(vm, VCPU_ID); | 88 | rc = _vcpu_run(vm, VCPU_ID); |
89 | 89 | ||
90 | if (run->exit_reason == KVM_EXIT_IO) { | 90 | TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, |
91 | switch (get_ucall(vm, VCPU_ID, &uc)) { | 91 | "Unexpected exit reason: %u (%s),\n", |
92 | case UCALL_SYNC: | 92 | run->exit_reason, |
93 | /* emulate hypervisor clearing CR4.OSXSAVE */ | 93 | exit_reason_str(run->exit_reason)); |
94 | vcpu_sregs_get(vm, VCPU_ID, &sregs); | 94 | |
95 | sregs.cr4 &= ~X86_CR4_OSXSAVE; | 95 | switch (get_ucall(vm, VCPU_ID, &uc)) { |
96 | vcpu_sregs_set(vm, VCPU_ID, &sregs); | 96 | case UCALL_SYNC: |
97 | break; | 97 | /* emulate hypervisor clearing CR4.OSXSAVE */ |
98 | case UCALL_ABORT: | 98 | vcpu_sregs_get(vm, VCPU_ID, &sregs); |
99 | TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit."); | 99 | sregs.cr4 &= ~X86_CR4_OSXSAVE; |
100 | break; | 100 | vcpu_sregs_set(vm, VCPU_ID, &sregs); |
101 | case UCALL_DONE: | 101 | break; |
102 | goto done; | 102 | case UCALL_ABORT: |
103 | default: | 103 | TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit."); |
104 | TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); | 104 | break; |
105 | } | 105 | case UCALL_DONE: |
106 | goto done; | ||
107 | default: | ||
108 | TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); | ||
106 | } | 109 | } |
107 | } | 110 | } |
108 | 111 | ||
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 4b3f556265f1..30f75856cf39 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c | |||
@@ -134,6 +134,11 @@ int main(int argc, char *argv[]) | |||
134 | 134 | ||
135 | struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); | 135 | struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); |
136 | 136 | ||
137 | if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) { | ||
138 | fprintf(stderr, "immediate_exit not available, skipping test\n"); | ||
139 | exit(KSFT_SKIP); | ||
140 | } | ||
141 | |||
137 | /* Create VM */ | 142 | /* Create VM */ |
138 | vm = vm_create_default(VCPU_ID, 0, guest_code); | 143 | vm = vm_create_default(VCPU_ID, 0, guest_code); |
139 | vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); | 144 | vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); |
@@ -156,8 +161,6 @@ int main(int argc, char *argv[]) | |||
156 | stage, run->exit_reason, | 161 | stage, run->exit_reason, |
157 | exit_reason_str(run->exit_reason)); | 162 | exit_reason_str(run->exit_reason)); |
158 | 163 | ||
159 | memset(®s1, 0, sizeof(regs1)); | ||
160 | vcpu_regs_get(vm, VCPU_ID, ®s1); | ||
161 | switch (get_ucall(vm, VCPU_ID, &uc)) { | 164 | switch (get_ucall(vm, VCPU_ID, &uc)) { |
162 | case UCALL_ABORT: | 165 | case UCALL_ABORT: |
163 | TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0], | 166 | TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0], |
@@ -176,6 +179,17 @@ int main(int argc, char *argv[]) | |||
176 | uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx", | 179 | uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx", |
177 | stage, (ulong)uc.args[1]); | 180 | stage, (ulong)uc.args[1]); |
178 | 181 | ||
182 | /* | ||
183 | * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees | ||
184 | * guest state is consistent only after userspace re-enters the | ||
185 | * kernel with KVM_RUN. Complete IO prior to migrating state | ||
186 | * to a new VM. | ||
187 | */ | ||
188 | vcpu_run_complete_io(vm, VCPU_ID); | ||
189 | |||
190 | memset(®s1, 0, sizeof(regs1)); | ||
191 | vcpu_regs_get(vm, VCPU_ID, ®s1); | ||
192 | |||
179 | state = vcpu_save_state(vm, VCPU_ID); | 193 | state = vcpu_save_state(vm, VCPU_ID); |
180 | kvm_vm_release(vm); | 194 | kvm_vm_release(vm); |
181 | 195 | ||
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 264d92da3240..370bd6c5e6cb 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c | |||
@@ -222,7 +222,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) | |||
222 | } | 222 | } |
223 | } | 223 | } |
224 | 224 | ||
225 | if (used_lrs) { | 225 | if (used_lrs || cpu_if->its_vpe.its_vm) { |
226 | int i; | 226 | int i; |
227 | u32 elrsr; | 227 | u32 elrsr; |
228 | 228 | ||
@@ -247,7 +247,7 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) | |||
247 | u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; | 247 | u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; |
248 | int i; | 248 | int i; |
249 | 249 | ||
250 | if (used_lrs) { | 250 | if (used_lrs || cpu_if->its_vpe.its_vm) { |
251 | write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); | 251 | write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); |
252 | 252 | ||
253 | for (i = 0; i < used_lrs; i++) | 253 | for (i = 0; i < used_lrs; i++) |
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index ffd7acdceac7..27c958306449 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c | |||
@@ -102,8 +102,7 @@ static bool kvm_is_device_pfn(unsigned long pfn) | |||
102 | * @addr: IPA | 102 | * @addr: IPA |
103 | * @pmd: pmd pointer for IPA | 103 | * @pmd: pmd pointer for IPA |
104 | * | 104 | * |
105 | * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. Marks all | 105 | * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. |
106 | * pages in the range dirty. | ||
107 | */ | 106 | */ |
108 | static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd) | 107 | static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd) |
109 | { | 108 | { |
@@ -121,8 +120,7 @@ static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd) | |||
121 | * @addr: IPA | 120 | * @addr: IPA |
122 | * @pud: pud pointer for IPA | 121 | * @pud: pud pointer for IPA |
123 | * | 122 | * |
124 | * Function clears a PUD entry, flushes addr 1st and 2nd stage TLBs. Marks all | 123 | * Function clears a PUD entry, flushes addr 1st and 2nd stage TLBs. |
125 | * pages in the range dirty. | ||
126 | */ | 124 | */ |
127 | static void stage2_dissolve_pud(struct kvm *kvm, phys_addr_t addr, pud_t *pudp) | 125 | static void stage2_dissolve_pud(struct kvm *kvm, phys_addr_t addr, pud_t *pudp) |
128 | { | 126 | { |
@@ -899,9 +897,8 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, | |||
899 | * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. | 897 | * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. |
900 | * @kvm: The KVM struct pointer for the VM. | 898 | * @kvm: The KVM struct pointer for the VM. |
901 | * | 899 | * |
902 | * Allocates only the stage-2 HW PGD level table(s) (can support either full | 900 | * Allocates only the stage-2 HW PGD level table(s) of size defined by |
903 | * 40-bit input addresses or limited to 32-bit input addresses). Clears the | 901 | * stage2_pgd_size(kvm). |
904 | * allocated pages. | ||
905 | * | 902 | * |
906 | * Note we don't need locking here as this is only called when the VM is | 903 | * Note we don't need locking here as this is only called when the VM is |
907 | * created, which can only be done once. | 904 | * created, which can only be done once. |
@@ -1067,25 +1064,43 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache | |||
1067 | { | 1064 | { |
1068 | pmd_t *pmd, old_pmd; | 1065 | pmd_t *pmd, old_pmd; |
1069 | 1066 | ||
1067 | retry: | ||
1070 | pmd = stage2_get_pmd(kvm, cache, addr); | 1068 | pmd = stage2_get_pmd(kvm, cache, addr); |
1071 | VM_BUG_ON(!pmd); | 1069 | VM_BUG_ON(!pmd); |
1072 | 1070 | ||
1073 | old_pmd = *pmd; | 1071 | old_pmd = *pmd; |
1072 | /* | ||
1073 | * Multiple vcpus faulting on the same PMD entry, can | ||
1074 | * lead to them sequentially updating the PMD with the | ||
1075 | * same value. Following the break-before-make | ||
1076 | * (pmd_clear() followed by tlb_flush()) process can | ||
1077 | * hinder forward progress due to refaults generated | ||
1078 | * on missing translations. | ||
1079 | * | ||
1080 | * Skip updating the page table if the entry is | ||
1081 | * unchanged. | ||
1082 | */ | ||
1083 | if (pmd_val(old_pmd) == pmd_val(*new_pmd)) | ||
1084 | return 0; | ||
1085 | |||
1074 | if (pmd_present(old_pmd)) { | 1086 | if (pmd_present(old_pmd)) { |
1075 | /* | 1087 | /* |
1076 | * Multiple vcpus faulting on the same PMD entry, can | 1088 | * If we already have PTE level mapping for this block, |
1077 | * lead to them sequentially updating the PMD with the | 1089 | * we must unmap it to avoid inconsistent TLB state and |
1078 | * same value. Following the break-before-make | 1090 | * leaking the table page. We could end up in this situation |
1079 | * (pmd_clear() followed by tlb_flush()) process can | 1091 | * if the memory slot was marked for dirty logging and was |
1080 | * hinder forward progress due to refaults generated | 1092 | * reverted, leaving PTE level mappings for the pages accessed |
1081 | * on missing translations. | 1093 | * during the period. So, unmap the PTE level mapping for this |
1094 | * block and retry, as we could have released the upper level | ||
1095 | * table in the process. | ||
1082 | * | 1096 | * |
1083 | * Skip updating the page table if the entry is | 1097 | * Normal THP split/merge follows mmu_notifier callbacks and do |
1084 | * unchanged. | 1098 | * get handled accordingly. |
1085 | */ | 1099 | */ |
1086 | if (pmd_val(old_pmd) == pmd_val(*new_pmd)) | 1100 | if (!pmd_thp_or_huge(old_pmd)) { |
1087 | return 0; | 1101 | unmap_stage2_range(kvm, addr & S2_PMD_MASK, S2_PMD_SIZE); |
1088 | 1102 | goto retry; | |
1103 | } | ||
1089 | /* | 1104 | /* |
1090 | * Mapping in huge pages should only happen through a | 1105 | * Mapping in huge pages should only happen through a |
1091 | * fault. If a page is merged into a transparent huge | 1106 | * fault. If a page is merged into a transparent huge |
@@ -1097,8 +1112,7 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache | |||
1097 | * should become splitting first, unmapped, merged, | 1112 | * should become splitting first, unmapped, merged, |
1098 | * and mapped back in on-demand. | 1113 | * and mapped back in on-demand. |
1099 | */ | 1114 | */ |
1100 | VM_BUG_ON(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd)); | 1115 | WARN_ON_ONCE(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd)); |
1101 | |||
1102 | pmd_clear(pmd); | 1116 | pmd_clear(pmd); |
1103 | kvm_tlb_flush_vmid_ipa(kvm, addr); | 1117 | kvm_tlb_flush_vmid_ipa(kvm, addr); |
1104 | } else { | 1118 | } else { |
@@ -1114,6 +1128,7 @@ static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cac | |||
1114 | { | 1128 | { |
1115 | pud_t *pudp, old_pud; | 1129 | pud_t *pudp, old_pud; |
1116 | 1130 | ||
1131 | retry: | ||
1117 | pudp = stage2_get_pud(kvm, cache, addr); | 1132 | pudp = stage2_get_pud(kvm, cache, addr); |
1118 | VM_BUG_ON(!pudp); | 1133 | VM_BUG_ON(!pudp); |
1119 | 1134 | ||
@@ -1121,14 +1136,23 @@ static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cac | |||
1121 | 1136 | ||
1122 | /* | 1137 | /* |
1123 | * A large number of vcpus faulting on the same stage 2 entry, | 1138 | * A large number of vcpus faulting on the same stage 2 entry, |
1124 | * can lead to a refault due to the | 1139 | * can lead to a refault due to the stage2_pud_clear()/tlb_flush(). |
1125 | * stage2_pud_clear()/tlb_flush(). Skip updating the page | 1140 | * Skip updating the page tables if there is no change. |
1126 | * tables if there is no change. | ||
1127 | */ | 1141 | */ |
1128 | if (pud_val(old_pud) == pud_val(*new_pudp)) | 1142 | if (pud_val(old_pud) == pud_val(*new_pudp)) |
1129 | return 0; | 1143 | return 0; |
1130 | 1144 | ||
1131 | if (stage2_pud_present(kvm, old_pud)) { | 1145 | if (stage2_pud_present(kvm, old_pud)) { |
1146 | /* | ||
1147 | * If we already have table level mapping for this block, unmap | ||
1148 | * the range for this block and retry. | ||
1149 | */ | ||
1150 | if (!stage2_pud_huge(kvm, old_pud)) { | ||
1151 | unmap_stage2_range(kvm, addr & S2_PUD_MASK, S2_PUD_SIZE); | ||
1152 | goto retry; | ||
1153 | } | ||
1154 | |||
1155 | WARN_ON_ONCE(kvm_pud_pfn(old_pud) != kvm_pud_pfn(*new_pudp)); | ||
1132 | stage2_pud_clear(kvm, pudp); | 1156 | stage2_pud_clear(kvm, pudp); |
1133 | kvm_tlb_flush_vmid_ipa(kvm, addr); | 1157 | kvm_tlb_flush_vmid_ipa(kvm, addr); |
1134 | } else { | 1158 | } else { |
@@ -1451,13 +1475,11 @@ static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud, | |||
1451 | } | 1475 | } |
1452 | 1476 | ||
1453 | /** | 1477 | /** |
1454 | * stage2_wp_puds - write protect PGD range | 1478 | * stage2_wp_puds - write protect PGD range |
1455 | * @pgd: pointer to pgd entry | 1479 | * @pgd: pointer to pgd entry |
1456 | * @addr: range start address | 1480 | * @addr: range start address |
1457 | * @end: range end address | 1481 | * @end: range end address |
1458 | * | 1482 | */ |
1459 | * Process PUD entries, for a huge PUD we cause a panic. | ||
1460 | */ | ||
1461 | static void stage2_wp_puds(struct kvm *kvm, pgd_t *pgd, | 1483 | static void stage2_wp_puds(struct kvm *kvm, pgd_t *pgd, |
1462 | phys_addr_t addr, phys_addr_t end) | 1484 | phys_addr_t addr, phys_addr_t end) |
1463 | { | 1485 | { |
@@ -1594,8 +1616,9 @@ static void kvm_send_hwpoison_signal(unsigned long address, | |||
1594 | send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current); | 1616 | send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current); |
1595 | } | 1617 | } |
1596 | 1618 | ||
1597 | static bool fault_supports_stage2_pmd_mappings(struct kvm_memory_slot *memslot, | 1619 | static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot, |
1598 | unsigned long hva) | 1620 | unsigned long hva, |
1621 | unsigned long map_size) | ||
1599 | { | 1622 | { |
1600 | gpa_t gpa_start; | 1623 | gpa_t gpa_start; |
1601 | hva_t uaddr_start, uaddr_end; | 1624 | hva_t uaddr_start, uaddr_end; |
@@ -1610,34 +1633,34 @@ static bool fault_supports_stage2_pmd_mappings(struct kvm_memory_slot *memslot, | |||
1610 | 1633 | ||
1611 | /* | 1634 | /* |
1612 | * Pages belonging to memslots that don't have the same alignment | 1635 | * Pages belonging to memslots that don't have the same alignment |
1613 | * within a PMD for userspace and IPA cannot be mapped with stage-2 | 1636 | * within a PMD/PUD for userspace and IPA cannot be mapped with stage-2 |
1614 | * PMD entries, because we'll end up mapping the wrong pages. | 1637 | * PMD/PUD entries, because we'll end up mapping the wrong pages. |
1615 | * | 1638 | * |
1616 | * Consider a layout like the following: | 1639 | * Consider a layout like the following: |
1617 | * | 1640 | * |
1618 | * memslot->userspace_addr: | 1641 | * memslot->userspace_addr: |
1619 | * +-----+--------------------+--------------------+---+ | 1642 | * +-----+--------------------+--------------------+---+ |
1620 | * |abcde|fgh Stage-1 PMD | Stage-1 PMD tv|xyz| | 1643 | * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz| |
1621 | * +-----+--------------------+--------------------+---+ | 1644 | * +-----+--------------------+--------------------+---+ |
1622 | * | 1645 | * |
1623 | * memslot->base_gfn << PAGE_SIZE: | 1646 | * memslot->base_gfn << PAGE_SIZE: |
1624 | * +---+--------------------+--------------------+-----+ | 1647 | * +---+--------------------+--------------------+-----+ |
1625 | * |abc|def Stage-2 PMD | Stage-2 PMD |tvxyz| | 1648 | * |abc|def Stage-2 block | Stage-2 block |tvxyz| |
1626 | * +---+--------------------+--------------------+-----+ | 1649 | * +---+--------------------+--------------------+-----+ |
1627 | * | 1650 | * |
1628 | * If we create those stage-2 PMDs, we'll end up with this incorrect | 1651 | * If we create those stage-2 blocks, we'll end up with this incorrect |
1629 | * mapping: | 1652 | * mapping: |
1630 | * d -> f | 1653 | * d -> f |
1631 | * e -> g | 1654 | * e -> g |
1632 | * f -> h | 1655 | * f -> h |
1633 | */ | 1656 | */ |
1634 | if ((gpa_start & ~S2_PMD_MASK) != (uaddr_start & ~S2_PMD_MASK)) | 1657 | if ((gpa_start & (map_size - 1)) != (uaddr_start & (map_size - 1))) |
1635 | return false; | 1658 | return false; |
1636 | 1659 | ||
1637 | /* | 1660 | /* |
1638 | * Next, let's make sure we're not trying to map anything not covered | 1661 | * Next, let's make sure we're not trying to map anything not covered |
1639 | * by the memslot. This means we have to prohibit PMD size mappings | 1662 | * by the memslot. This means we have to prohibit block size mappings |
1640 | * for the beginning and end of a non-PMD aligned and non-PMD sized | 1663 | * for the beginning and end of a non-block aligned and non-block sized |
1641 | * memory slot (illustrated by the head and tail parts of the | 1664 | * memory slot (illustrated by the head and tail parts of the |
1642 | * userspace view above containing pages 'abcde' and 'xyz', | 1665 | * userspace view above containing pages 'abcde' and 'xyz', |
1643 | * respectively). | 1666 | * respectively). |
@@ -1646,8 +1669,8 @@ static bool fault_supports_stage2_pmd_mappings(struct kvm_memory_slot *memslot, | |||
1646 | * userspace_addr or the base_gfn, as both are equally aligned (per | 1669 | * userspace_addr or the base_gfn, as both are equally aligned (per |
1647 | * the check above) and equally sized. | 1670 | * the check above) and equally sized. |
1648 | */ | 1671 | */ |
1649 | return (hva & S2_PMD_MASK) >= uaddr_start && | 1672 | return (hva & ~(map_size - 1)) >= uaddr_start && |
1650 | (hva & S2_PMD_MASK) + S2_PMD_SIZE <= uaddr_end; | 1673 | (hva & ~(map_size - 1)) + map_size <= uaddr_end; |
1651 | } | 1674 | } |
1652 | 1675 | ||
1653 | static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | 1676 | static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, |
@@ -1676,12 +1699,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | |||
1676 | return -EFAULT; | 1699 | return -EFAULT; |
1677 | } | 1700 | } |
1678 | 1701 | ||
1679 | if (!fault_supports_stage2_pmd_mappings(memslot, hva)) | ||
1680 | force_pte = true; | ||
1681 | |||
1682 | if (logging_active) | ||
1683 | force_pte = true; | ||
1684 | |||
1685 | /* Let's check if we will get back a huge page backed by hugetlbfs */ | 1702 | /* Let's check if we will get back a huge page backed by hugetlbfs */ |
1686 | down_read(¤t->mm->mmap_sem); | 1703 | down_read(¤t->mm->mmap_sem); |
1687 | vma = find_vma_intersection(current->mm, hva, hva + 1); | 1704 | vma = find_vma_intersection(current->mm, hva, hva + 1); |
@@ -1692,6 +1709,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | |||
1692 | } | 1709 | } |
1693 | 1710 | ||
1694 | vma_pagesize = vma_kernel_pagesize(vma); | 1711 | vma_pagesize = vma_kernel_pagesize(vma); |
1712 | if (logging_active || | ||
1713 | !fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) { | ||
1714 | force_pte = true; | ||
1715 | vma_pagesize = PAGE_SIZE; | ||
1716 | } | ||
1717 | |||
1695 | /* | 1718 | /* |
1696 | * The stage2 has a minimum of 2 level table (For arm64 see | 1719 | * The stage2 has a minimum of 2 level table (For arm64 see |
1697 | * kvm_arm_setup_stage2()). Hence, we are guaranteed that we can | 1720 | * kvm_arm_setup_stage2()). Hence, we are guaranteed that we can |
@@ -1699,11 +1722,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, | |||
1699 | * As for PUD huge maps, we must make sure that we have at least | 1722 | * As for PUD huge maps, we must make sure that we have at least |
1700 | * 3 levels, i.e, PMD is not folded. | 1723 | * 3 levels, i.e, PMD is not folded. |
1701 | */ | 1724 | */ |
1702 | if ((vma_pagesize == PMD_SIZE || | 1725 | if (vma_pagesize == PMD_SIZE || |
1703 | (vma_pagesize == PUD_SIZE && kvm_stage2_has_pmd(kvm))) && | 1726 | (vma_pagesize == PUD_SIZE && kvm_stage2_has_pmd(kvm))) |
1704 | !force_pte) { | ||
1705 | gfn = (fault_ipa & huge_page_mask(hstate_vma(vma))) >> PAGE_SHIFT; | 1727 | gfn = (fault_ipa & huge_page_mask(hstate_vma(vma))) >> PAGE_SHIFT; |
1706 | } | ||
1707 | up_read(¤t->mm->mmap_sem); | 1728 | up_read(¤t->mm->mmap_sem); |
1708 | 1729 | ||
1709 | /* We need minimum second+third level pages */ | 1730 | /* We need minimum second+third level pages */ |
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index ab3f47745d9c..44ceaccb18cf 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c | |||
@@ -754,8 +754,9 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, | |||
754 | u64 indirect_ptr, type = GITS_BASER_TYPE(baser); | 754 | u64 indirect_ptr, type = GITS_BASER_TYPE(baser); |
755 | phys_addr_t base = GITS_BASER_ADDR_48_to_52(baser); | 755 | phys_addr_t base = GITS_BASER_ADDR_48_to_52(baser); |
756 | int esz = GITS_BASER_ENTRY_SIZE(baser); | 756 | int esz = GITS_BASER_ENTRY_SIZE(baser); |
757 | int index; | 757 | int index, idx; |
758 | gfn_t gfn; | 758 | gfn_t gfn; |
759 | bool ret; | ||
759 | 760 | ||
760 | switch (type) { | 761 | switch (type) { |
761 | case GITS_BASER_TYPE_DEVICE: | 762 | case GITS_BASER_TYPE_DEVICE: |
@@ -782,7 +783,8 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, | |||
782 | 783 | ||
783 | if (eaddr) | 784 | if (eaddr) |
784 | *eaddr = addr; | 785 | *eaddr = addr; |
785 | return kvm_is_visible_gfn(its->dev->kvm, gfn); | 786 | |
787 | goto out; | ||
786 | } | 788 | } |
787 | 789 | ||
788 | /* calculate and check the index into the 1st level */ | 790 | /* calculate and check the index into the 1st level */ |
@@ -812,7 +814,12 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, | |||
812 | 814 | ||
813 | if (eaddr) | 815 | if (eaddr) |
814 | *eaddr = indirect_ptr; | 816 | *eaddr = indirect_ptr; |
815 | return kvm_is_visible_gfn(its->dev->kvm, gfn); | 817 | |
818 | out: | ||
819 | idx = srcu_read_lock(&its->dev->kvm->srcu); | ||
820 | ret = kvm_is_visible_gfn(its->dev->kvm, gfn); | ||
821 | srcu_read_unlock(&its->dev->kvm->srcu, idx); | ||
822 | return ret; | ||
816 | } | 823 | } |
817 | 824 | ||
818 | static int vgic_its_alloc_collection(struct vgic_its *its, | 825 | static int vgic_its_alloc_collection(struct vgic_its *its, |
@@ -1729,8 +1736,8 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev) | |||
1729 | kfree(its); | 1736 | kfree(its); |
1730 | } | 1737 | } |
1731 | 1738 | ||
1732 | int vgic_its_has_attr_regs(struct kvm_device *dev, | 1739 | static int vgic_its_has_attr_regs(struct kvm_device *dev, |
1733 | struct kvm_device_attr *attr) | 1740 | struct kvm_device_attr *attr) |
1734 | { | 1741 | { |
1735 | const struct vgic_register_region *region; | 1742 | const struct vgic_register_region *region; |
1736 | gpa_t offset = attr->attr; | 1743 | gpa_t offset = attr->attr; |
@@ -1750,9 +1757,9 @@ int vgic_its_has_attr_regs(struct kvm_device *dev, | |||
1750 | return 0; | 1757 | return 0; |
1751 | } | 1758 | } |
1752 | 1759 | ||
1753 | int vgic_its_attr_regs_access(struct kvm_device *dev, | 1760 | static int vgic_its_attr_regs_access(struct kvm_device *dev, |
1754 | struct kvm_device_attr *attr, | 1761 | struct kvm_device_attr *attr, |
1755 | u64 *reg, bool is_write) | 1762 | u64 *reg, bool is_write) |
1756 | { | 1763 | { |
1757 | const struct vgic_register_region *region; | 1764 | const struct vgic_register_region *region; |
1758 | struct vgic_its *its; | 1765 | struct vgic_its *its; |
@@ -1919,7 +1926,7 @@ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, | |||
1919 | ((u64)ite->irq->intid << KVM_ITS_ITE_PINTID_SHIFT) | | 1926 | ((u64)ite->irq->intid << KVM_ITS_ITE_PINTID_SHIFT) | |
1920 | ite->collection->collection_id; | 1927 | ite->collection->collection_id; |
1921 | val = cpu_to_le64(val); | 1928 | val = cpu_to_le64(val); |
1922 | return kvm_write_guest(kvm, gpa, &val, ite_esz); | 1929 | return kvm_write_guest_lock(kvm, gpa, &val, ite_esz); |
1923 | } | 1930 | } |
1924 | 1931 | ||
1925 | /** | 1932 | /** |
@@ -2066,7 +2073,7 @@ static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev, | |||
2066 | (itt_addr_field << KVM_ITS_DTE_ITTADDR_SHIFT) | | 2073 | (itt_addr_field << KVM_ITS_DTE_ITTADDR_SHIFT) | |
2067 | (dev->num_eventid_bits - 1)); | 2074 | (dev->num_eventid_bits - 1)); |
2068 | val = cpu_to_le64(val); | 2075 | val = cpu_to_le64(val); |
2069 | return kvm_write_guest(kvm, ptr, &val, dte_esz); | 2076 | return kvm_write_guest_lock(kvm, ptr, &val, dte_esz); |
2070 | } | 2077 | } |
2071 | 2078 | ||
2072 | /** | 2079 | /** |
@@ -2246,7 +2253,7 @@ static int vgic_its_save_cte(struct vgic_its *its, | |||
2246 | ((u64)collection->target_addr << KVM_ITS_CTE_RDBASE_SHIFT) | | 2253 | ((u64)collection->target_addr << KVM_ITS_CTE_RDBASE_SHIFT) | |
2247 | collection->collection_id); | 2254 | collection->collection_id); |
2248 | val = cpu_to_le64(val); | 2255 | val = cpu_to_le64(val); |
2249 | return kvm_write_guest(its->dev->kvm, gpa, &val, esz); | 2256 | return kvm_write_guest_lock(its->dev->kvm, gpa, &val, esz); |
2250 | } | 2257 | } |
2251 | 2258 | ||
2252 | static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) | 2259 | static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) |
@@ -2317,7 +2324,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its) | |||
2317 | */ | 2324 | */ |
2318 | val = 0; | 2325 | val = 0; |
2319 | BUG_ON(cte_esz > sizeof(val)); | 2326 | BUG_ON(cte_esz > sizeof(val)); |
2320 | ret = kvm_write_guest(its->dev->kvm, gpa, &val, cte_esz); | 2327 | ret = kvm_write_guest_lock(its->dev->kvm, gpa, &val, cte_esz); |
2321 | return ret; | 2328 | return ret; |
2322 | } | 2329 | } |
2323 | 2330 | ||
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 408a78eb6a97..9f87e58dbd4a 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c | |||
@@ -358,7 +358,7 @@ retry: | |||
358 | if (status) { | 358 | if (status) { |
359 | /* clear consumed data */ | 359 | /* clear consumed data */ |
360 | val &= ~(1 << bit_nr); | 360 | val &= ~(1 << bit_nr); |
361 | ret = kvm_write_guest(kvm, ptr, &val, 1); | 361 | ret = kvm_write_guest_lock(kvm, ptr, &val, 1); |
362 | if (ret) | 362 | if (ret) |
363 | return ret; | 363 | return ret; |
364 | } | 364 | } |
@@ -409,7 +409,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) | |||
409 | else | 409 | else |
410 | val &= ~(1 << bit_nr); | 410 | val &= ~(1 << bit_nr); |
411 | 411 | ||
412 | ret = kvm_write_guest(kvm, ptr, &val, 1); | 412 | ret = kvm_write_guest_lock(kvm, ptr, &val, 1); |
413 | if (ret) | 413 | if (ret) |
414 | return ret; | 414 | return ret; |
415 | } | 415 | } |
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index abd9c7352677..3af69f2a3866 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c | |||
@@ -867,15 +867,21 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) | |||
867 | * either observe the new interrupt before or after doing this check, | 867 | * either observe the new interrupt before or after doing this check, |
868 | * and introducing additional synchronization mechanism doesn't change | 868 | * and introducing additional synchronization mechanism doesn't change |
869 | * this. | 869 | * this. |
870 | * | ||
871 | * Note that we still need to go through the whole thing if anything | ||
872 | * can be directly injected (GICv4). | ||
870 | */ | 873 | */ |
871 | if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) | 874 | if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) && |
875 | !vgic_supports_direct_msis(vcpu->kvm)) | ||
872 | return; | 876 | return; |
873 | 877 | ||
874 | DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); | 878 | DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); |
875 | 879 | ||
876 | raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); | 880 | if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) { |
877 | vgic_flush_lr_state(vcpu); | 881 | raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); |
878 | raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); | 882 | vgic_flush_lr_state(vcpu); |
883 | raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); | ||
884 | } | ||
879 | 885 | ||
880 | if (can_access_vgic_from_kernel()) | 886 | if (can_access_vgic_from_kernel()) |
881 | vgic_restore_state(vcpu); | 887 | vgic_restore_state(vcpu); |
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 4325250afd72..001aeda4c154 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
@@ -214,9 +214,9 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) | |||
214 | 214 | ||
215 | if (flags & EPOLLHUP) { | 215 | if (flags & EPOLLHUP) { |
216 | /* The eventfd is closing, detach from KVM */ | 216 | /* The eventfd is closing, detach from KVM */ |
217 | unsigned long flags; | 217 | unsigned long iflags; |
218 | 218 | ||
219 | spin_lock_irqsave(&kvm->irqfds.lock, flags); | 219 | spin_lock_irqsave(&kvm->irqfds.lock, iflags); |
220 | 220 | ||
221 | /* | 221 | /* |
222 | * We must check if someone deactivated the irqfd before | 222 | * We must check if someone deactivated the irqfd before |
@@ -230,7 +230,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) | |||
230 | if (irqfd_is_active(irqfd)) | 230 | if (irqfd_is_active(irqfd)) |
231 | irqfd_deactivate(irqfd); | 231 | irqfd_deactivate(irqfd); |
232 | 232 | ||
233 | spin_unlock_irqrestore(&kvm->irqfds.lock, flags); | 233 | spin_unlock_irqrestore(&kvm->irqfds.lock, iflags); |
234 | } | 234 | } |
235 | 235 | ||
236 | return 0; | 236 | return 0; |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f25aa98a94df..55fe8e20d8fd 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -2905,6 +2905,9 @@ static long kvm_device_ioctl(struct file *filp, unsigned int ioctl, | |||
2905 | { | 2905 | { |
2906 | struct kvm_device *dev = filp->private_data; | 2906 | struct kvm_device *dev = filp->private_data; |
2907 | 2907 | ||
2908 | if (dev->kvm->mm != current->mm) | ||
2909 | return -EIO; | ||
2910 | |||
2908 | switch (ioctl) { | 2911 | switch (ioctl) { |
2909 | case KVM_SET_DEVICE_ATTR: | 2912 | case KVM_SET_DEVICE_ATTR: |
2910 | return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg); | 2913 | return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg); |