diff options
60 files changed, 2449 insertions, 1369 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 93413ce96883..27e0488d54d2 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1201,6 +1201,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1201 | [KVM,Intel] Disable FlexPriority feature (TPR shadow). | 1201 | [KVM,Intel] Disable FlexPriority feature (TPR shadow). |
1202 | Default is 1 (enabled) | 1202 | Default is 1 (enabled) |
1203 | 1203 | ||
1204 | kvm-intel.nested= | ||
1205 | [KVM,Intel] Enable VMX nesting (nVMX). | ||
1206 | Default is 0 (disabled) | ||
1207 | |||
1204 | kvm-intel.unrestricted_guest= | 1208 | kvm-intel.unrestricted_guest= |
1205 | [KVM,Intel] Disable unrestricted guest feature | 1209 | [KVM,Intel] Disable unrestricted guest feature |
1206 | (virtualized real and unpaged mode) on capable | 1210 | (virtualized real and unpaged mode) on capable |
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index b0e4b9cd6a66..7945b0bd35e2 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
@@ -175,10 +175,30 @@ Parameters: vcpu id (apic id on x86) | |||
175 | Returns: vcpu fd on success, -1 on error | 175 | Returns: vcpu fd on success, -1 on error |
176 | 176 | ||
177 | This API adds a vcpu to a virtual machine. The vcpu id is a small integer | 177 | This API adds a vcpu to a virtual machine. The vcpu id is a small integer |
178 | in the range [0, max_vcpus). You can use KVM_CAP_NR_VCPUS of the | 178 | in the range [0, max_vcpus). |
179 | KVM_CHECK_EXTENSION ioctl() to determine the value for max_vcpus at run-time. | 179 | |
180 | The recommended max_vcpus value can be retrieved using the KVM_CAP_NR_VCPUS of | ||
181 | the KVM_CHECK_EXTENSION ioctl() at run-time. | ||
182 | The maximum possible value for max_vcpus can be retrieved using the | ||
183 | KVM_CAP_MAX_VCPUS of the KVM_CHECK_EXTENSION ioctl() at run-time. | ||
184 | |||
180 | If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4 | 185 | If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4 |
181 | cpus max. | 186 | cpus max. |
187 | If the KVM_CAP_MAX_VCPUS does not exist, you should assume that max_vcpus is | ||
188 | same as the value returned from KVM_CAP_NR_VCPUS. | ||
189 | |||
190 | On powerpc using book3s_hv mode, the vcpus are mapped onto virtual | ||
191 | threads in one or more virtual CPU cores. (This is because the | ||
192 | hardware requires all the hardware threads in a CPU core to be in the | ||
193 | same partition.) The KVM_CAP_PPC_SMT capability indicates the number | ||
194 | of vcpus per virtual core (vcore). The vcore id is obtained by | ||
195 | dividing the vcpu id by the number of vcpus per vcore. The vcpus in a | ||
196 | given vcore will always be in the same physical core as each other | ||
197 | (though that might be a different physical core from time to time). | ||
198 | Userspace can control the threading (SMT) mode of the guest by its | ||
199 | allocation of vcpu ids. For example, if userspace wants | ||
200 | single-threaded guest vcpus, it should make all vcpu ids be a multiple | ||
201 | of the number of vcpus per vcore. | ||
182 | 202 | ||
183 | On powerpc using book3s_hv mode, the vcpus are mapped onto virtual | 203 | On powerpc using book3s_hv mode, the vcpus are mapped onto virtual |
184 | threads in one or more virtual CPU cores. (This is because the | 204 | threads in one or more virtual CPU cores. (This is because the |
@@ -1633,3 +1653,50 @@ developer registration required to access it). | |||
1633 | char padding[256]; | 1653 | char padding[256]; |
1634 | }; | 1654 | }; |
1635 | }; | 1655 | }; |
1656 | |||
1657 | 6. Capabilities that can be enabled | ||
1658 | |||
1659 | There are certain capabilities that change the behavior of the virtual CPU when | ||
1660 | enabled. To enable them, please see section 4.37. Below you can find a list of | ||
1661 | capabilities and what their effect on the vCPU is when enabling them. | ||
1662 | |||
1663 | The following information is provided along with the description: | ||
1664 | |||
1665 | Architectures: which instruction set architectures provide this ioctl. | ||
1666 | x86 includes both i386 and x86_64. | ||
1667 | |||
1668 | Parameters: what parameters are accepted by the capability. | ||
1669 | |||
1670 | Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL) | ||
1671 | are not detailed, but errors with specific meanings are. | ||
1672 | |||
1673 | 6.1 KVM_CAP_PPC_OSI | ||
1674 | |||
1675 | Architectures: ppc | ||
1676 | Parameters: none | ||
1677 | Returns: 0 on success; -1 on error | ||
1678 | |||
1679 | This capability enables interception of OSI hypercalls that otherwise would | ||
1680 | be treated as normal system calls to be injected into the guest. OSI hypercalls | ||
1681 | were invented by Mac-on-Linux to have a standardized communication mechanism | ||
1682 | between the guest and the host. | ||
1683 | |||
1684 | When this capability is enabled, KVM_EXIT_OSI can occur. | ||
1685 | |||
1686 | 6.2 KVM_CAP_PPC_PAPR | ||
1687 | |||
1688 | Architectures: ppc | ||
1689 | Parameters: none | ||
1690 | Returns: 0 on success; -1 on error | ||
1691 | |||
1692 | This capability enables interception of PAPR hypercalls. PAPR hypercalls are | ||
1693 | done using the hypercall instruction "sc 1". | ||
1694 | |||
1695 | It also sets the guest privilege level to "supervisor" mode. Usually the guest | ||
1696 | runs in "hypervisor" privilege mode with a few missing features. | ||
1697 | |||
1698 | In addition to the above, it changes the semantics of SDR1. In this mode, the | ||
1699 | HTAB address part of SDR1 contains an HVA instead of a GPA, as PAPR keeps the | ||
1700 | HTAB invisible to the guest. | ||
1701 | |||
1702 | When this capability is enabled, KVM_EXIT_PAPR_HCALL can occur. | ||
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index a4f6c85431f8..08fe69edcd10 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h | |||
@@ -149,6 +149,12 @@ struct kvm_regs { | |||
149 | #define KVM_SREGS_E_UPDATE_DBSR (1 << 3) | 149 | #define KVM_SREGS_E_UPDATE_DBSR (1 << 3) |
150 | 150 | ||
151 | /* | 151 | /* |
152 | * Book3S special bits to indicate contents in the struct by maintaining | ||
153 | * backwards compatibility with older structs. If adding a new field, | ||
154 | * please make sure to add a flag for that new field */ | ||
155 | #define KVM_SREGS_S_HIOR (1 << 0) | ||
156 | |||
157 | /* | ||
152 | * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a | 158 | * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a |
153 | * previous KVM_GET_REGS. | 159 | * previous KVM_GET_REGS. |
154 | * | 160 | * |
@@ -173,6 +179,8 @@ struct kvm_sregs { | |||
173 | __u64 ibat[8]; | 179 | __u64 ibat[8]; |
174 | __u64 dbat[8]; | 180 | __u64 dbat[8]; |
175 | } ppc32; | 181 | } ppc32; |
182 | __u64 flags; /* KVM_SREGS_S_ */ | ||
183 | __u64 hior; | ||
176 | } s; | 184 | } s; |
177 | struct { | 185 | struct { |
178 | union { | 186 | union { |
@@ -276,6 +284,11 @@ struct kvm_guest_debug_arch { | |||
276 | #define KVM_INTERRUPT_UNSET -2U | 284 | #define KVM_INTERRUPT_UNSET -2U |
277 | #define KVM_INTERRUPT_SET_LEVEL -3U | 285 | #define KVM_INTERRUPT_SET_LEVEL -3U |
278 | 286 | ||
287 | #define KVM_CPU_440 1 | ||
288 | #define KVM_CPU_E500V2 2 | ||
289 | #define KVM_CPU_3S_32 3 | ||
290 | #define KVM_CPU_3S_64 4 | ||
291 | |||
279 | /* for KVM_CAP_SPAPR_TCE */ | 292 | /* for KVM_CAP_SPAPR_TCE */ |
280 | struct kvm_create_spapr_tce { | 293 | struct kvm_create_spapr_tce { |
281 | __u64 liobn; | 294 | __u64 liobn; |
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 98da010252a3..a384ffdf33de 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h | |||
@@ -90,6 +90,8 @@ struct kvmppc_vcpu_book3s { | |||
90 | #endif | 90 | #endif |
91 | int context_id[SID_CONTEXTS]; | 91 | int context_id[SID_CONTEXTS]; |
92 | 92 | ||
93 | bool hior_sregs; /* HIOR is set by SREGS, not PVR */ | ||
94 | |||
93 | struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE]; | 95 | struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE]; |
94 | struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG]; | 96 | struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG]; |
95 | struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE]; | 97 | struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE]; |
@@ -139,15 +141,14 @@ extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); | |||
139 | extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); | 141 | extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); |
140 | extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); | 142 | extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); |
141 | 143 | ||
142 | extern void kvmppc_handler_lowmem_trampoline(void); | 144 | extern void kvmppc_entry_trampoline(void); |
143 | extern void kvmppc_handler_trampoline_enter(void); | ||
144 | extern void kvmppc_rmcall(ulong srr0, ulong srr1); | ||
145 | extern void kvmppc_hv_entry_trampoline(void); | 145 | extern void kvmppc_hv_entry_trampoline(void); |
146 | extern void kvmppc_load_up_fpu(void); | 146 | extern void kvmppc_load_up_fpu(void); |
147 | extern void kvmppc_load_up_altivec(void); | 147 | extern void kvmppc_load_up_altivec(void); |
148 | extern void kvmppc_load_up_vsx(void); | 148 | extern void kvmppc_load_up_vsx(void); |
149 | extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst); | 149 | extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst); |
150 | extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst); | 150 | extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst); |
151 | extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd); | ||
151 | 152 | ||
152 | static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) | 153 | static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) |
153 | { | 154 | { |
@@ -382,6 +383,39 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) | |||
382 | } | 383 | } |
383 | #endif | 384 | #endif |
384 | 385 | ||
386 | static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, | ||
387 | unsigned long pte_index) | ||
388 | { | ||
389 | unsigned long rb, va_low; | ||
390 | |||
391 | rb = (v & ~0x7fUL) << 16; /* AVA field */ | ||
392 | va_low = pte_index >> 3; | ||
393 | if (v & HPTE_V_SECONDARY) | ||
394 | va_low = ~va_low; | ||
395 | /* xor vsid from AVA */ | ||
396 | if (!(v & HPTE_V_1TB_SEG)) | ||
397 | va_low ^= v >> 12; | ||
398 | else | ||
399 | va_low ^= v >> 24; | ||
400 | va_low &= 0x7ff; | ||
401 | if (v & HPTE_V_LARGE) { | ||
402 | rb |= 1; /* L field */ | ||
403 | if (cpu_has_feature(CPU_FTR_ARCH_206) && | ||
404 | (r & 0xff000)) { | ||
405 | /* non-16MB large page, must be 64k */ | ||
406 | /* (masks depend on page size) */ | ||
407 | rb |= 0x1000; /* page encoding in LP field */ | ||
408 | rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */ | ||
409 | rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */ | ||
410 | } | ||
411 | } else { | ||
412 | /* 4kB page */ | ||
413 | rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */ | ||
414 | } | ||
415 | rb |= (v >> 54) & 0x300; /* B field */ | ||
416 | return rb; | ||
417 | } | ||
418 | |||
385 | /* Magic register values loaded into r3 and r4 before the 'sc' assembly | 419 | /* Magic register values loaded into r3 and r4 before the 'sc' assembly |
386 | * instruction for the OSI hypercalls */ | 420 | * instruction for the OSI hypercalls */ |
387 | #define OSI_SC_MAGIC_R3 0x113724FA | 421 | #define OSI_SC_MAGIC_R3 0x113724FA |
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index ef7b3688c3b6..1f2f5b6156bd 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h | |||
@@ -75,6 +75,8 @@ struct kvmppc_host_state { | |||
75 | ulong scratch0; | 75 | ulong scratch0; |
76 | ulong scratch1; | 76 | ulong scratch1; |
77 | u8 in_guest; | 77 | u8 in_guest; |
78 | u8 restore_hid5; | ||
79 | u8 napping; | ||
78 | 80 | ||
79 | #ifdef CONFIG_KVM_BOOK3S_64_HV | 81 | #ifdef CONFIG_KVM_BOOK3S_64_HV |
80 | struct kvm_vcpu *kvm_vcpu; | 82 | struct kvm_vcpu *kvm_vcpu; |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index cc22b282d755..bf8af5d5d5dc 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -198,21 +198,29 @@ struct kvm_arch { | |||
198 | */ | 198 | */ |
199 | struct kvmppc_vcore { | 199 | struct kvmppc_vcore { |
200 | int n_runnable; | 200 | int n_runnable; |
201 | int n_blocked; | 201 | int n_busy; |
202 | int num_threads; | 202 | int num_threads; |
203 | int entry_exit_count; | 203 | int entry_exit_count; |
204 | int n_woken; | 204 | int n_woken; |
205 | int nap_count; | 205 | int nap_count; |
206 | int napping_threads; | ||
206 | u16 pcpu; | 207 | u16 pcpu; |
207 | u8 vcore_running; | 208 | u8 vcore_state; |
208 | u8 in_guest; | 209 | u8 in_guest; |
209 | struct list_head runnable_threads; | 210 | struct list_head runnable_threads; |
210 | spinlock_t lock; | 211 | spinlock_t lock; |
212 | wait_queue_head_t wq; | ||
211 | }; | 213 | }; |
212 | 214 | ||
213 | #define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) | 215 | #define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) |
214 | #define VCORE_EXIT_COUNT(vc) ((vc)->entry_exit_count >> 8) | 216 | #define VCORE_EXIT_COUNT(vc) ((vc)->entry_exit_count >> 8) |
215 | 217 | ||
218 | /* Values for vcore_state */ | ||
219 | #define VCORE_INACTIVE 0 | ||
220 | #define VCORE_RUNNING 1 | ||
221 | #define VCORE_EXITING 2 | ||
222 | #define VCORE_SLEEPING 3 | ||
223 | |||
216 | struct kvmppc_pte { | 224 | struct kvmppc_pte { |
217 | ulong eaddr; | 225 | ulong eaddr; |
218 | u64 vpage; | 226 | u64 vpage; |
@@ -258,14 +266,6 @@ struct kvm_vcpu_arch { | |||
258 | ulong host_stack; | 266 | ulong host_stack; |
259 | u32 host_pid; | 267 | u32 host_pid; |
260 | #ifdef CONFIG_PPC_BOOK3S | 268 | #ifdef CONFIG_PPC_BOOK3S |
261 | ulong host_msr; | ||
262 | ulong host_r2; | ||
263 | void *host_retip; | ||
264 | ulong trampoline_lowmem; | ||
265 | ulong trampoline_enter; | ||
266 | ulong highmem_handler; | ||
267 | ulong rmcall; | ||
268 | ulong host_paca_phys; | ||
269 | struct kvmppc_slb slb[64]; | 269 | struct kvmppc_slb slb[64]; |
270 | int slb_max; /* 1 + index of last valid entry in slb[] */ | 270 | int slb_max; /* 1 + index of last valid entry in slb[] */ |
271 | int slb_nr; /* total number of entries in SLB */ | 271 | int slb_nr; /* total number of entries in SLB */ |
@@ -389,6 +389,9 @@ struct kvm_vcpu_arch { | |||
389 | u8 dcr_is_write; | 389 | u8 dcr_is_write; |
390 | u8 osi_needed; | 390 | u8 osi_needed; |
391 | u8 osi_enabled; | 391 | u8 osi_enabled; |
392 | u8 papr_enabled; | ||
393 | u8 sane; | ||
394 | u8 cpu_type; | ||
392 | u8 hcall_needed; | 395 | u8 hcall_needed; |
393 | 396 | ||
394 | u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ | 397 | u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ |
@@ -408,11 +411,13 @@ struct kvm_vcpu_arch { | |||
408 | struct dtl *dtl; | 411 | struct dtl *dtl; |
409 | struct dtl *dtl_end; | 412 | struct dtl *dtl_end; |
410 | 413 | ||
414 | wait_queue_head_t *wqp; | ||
411 | struct kvmppc_vcore *vcore; | 415 | struct kvmppc_vcore *vcore; |
412 | int ret; | 416 | int ret; |
413 | int trap; | 417 | int trap; |
414 | int state; | 418 | int state; |
415 | int ptid; | 419 | int ptid; |
420 | bool timer_running; | ||
416 | wait_queue_head_t cpu_run; | 421 | wait_queue_head_t cpu_run; |
417 | 422 | ||
418 | struct kvm_vcpu_arch_shared *shared; | 423 | struct kvm_vcpu_arch_shared *shared; |
@@ -428,8 +433,9 @@ struct kvm_vcpu_arch { | |||
428 | #endif | 433 | #endif |
429 | }; | 434 | }; |
430 | 435 | ||
431 | #define KVMPPC_VCPU_BUSY_IN_HOST 0 | 436 | /* Values for vcpu->arch.state */ |
432 | #define KVMPPC_VCPU_BLOCKED 1 | 437 | #define KVMPPC_VCPU_STOPPED 0 |
438 | #define KVMPPC_VCPU_BUSY_IN_HOST 1 | ||
433 | #define KVMPPC_VCPU_RUNNABLE 2 | 439 | #define KVMPPC_VCPU_RUNNABLE 2 |
434 | 440 | ||
435 | #endif /* __POWERPC_KVM_HOST_H__ */ | 441 | #endif /* __POWERPC_KVM_HOST_H__ */ |
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index d121f49d62b8..46efd1a265c9 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
@@ -66,6 +66,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run, | |||
66 | extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); | 66 | extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); |
67 | extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); | 67 | extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); |
68 | extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); | 68 | extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); |
69 | extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu); | ||
69 | 70 | ||
70 | /* Core-specific hooks */ | 71 | /* Core-specific hooks */ |
71 | 72 | ||
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 5f078bc2063e..69f7ffe7f674 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include <asm/compat.h> | 44 | #include <asm/compat.h> |
45 | #include <asm/mmu.h> | 45 | #include <asm/mmu.h> |
46 | #include <asm/hvcall.h> | 46 | #include <asm/hvcall.h> |
47 | #include <asm/xics.h> | ||
47 | #endif | 48 | #endif |
48 | #ifdef CONFIG_PPC_ISERIES | 49 | #ifdef CONFIG_PPC_ISERIES |
49 | #include <asm/iseries/alpaca.h> | 50 | #include <asm/iseries/alpaca.h> |
@@ -449,8 +450,6 @@ int main(void) | |||
449 | #ifdef CONFIG_PPC_BOOK3S | 450 | #ifdef CONFIG_PPC_BOOK3S |
450 | DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); | 451 | DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); |
451 | DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); | 452 | DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); |
452 | DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip)); | ||
453 | DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr)); | ||
454 | DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr)); | 453 | DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr)); |
455 | DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr)); | 454 | DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr)); |
456 | DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr)); | 455 | DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr)); |
@@ -458,14 +457,12 @@ int main(void) | |||
458 | DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor)); | 457 | DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor)); |
459 | DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl)); | 458 | DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl)); |
460 | DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr)); | 459 | DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr)); |
461 | DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem)); | ||
462 | DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter)); | ||
463 | DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); | ||
464 | DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall)); | ||
465 | DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); | 460 | DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); |
466 | DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec)); | 461 | DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec)); |
467 | DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires)); | 462 | DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires)); |
468 | DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions)); | 463 | DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions)); |
464 | DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded)); | ||
465 | DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded)); | ||
469 | DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa)); | 466 | DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa)); |
470 | DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); | 467 | DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); |
471 | DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); | 468 | DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); |
@@ -481,6 +478,7 @@ int main(void) | |||
481 | DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count)); | 478 | DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count)); |
482 | DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count)); | 479 | DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count)); |
483 | DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); | 480 | DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); |
481 | DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads)); | ||
484 | DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - | 482 | DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - |
485 | offsetof(struct kvmppc_vcpu_book3s, vcpu)); | 483 | offsetof(struct kvmppc_vcpu_book3s, vcpu)); |
486 | DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); | 484 | DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); |
@@ -537,6 +535,8 @@ int main(void) | |||
537 | HSTATE_FIELD(HSTATE_SCRATCH0, scratch0); | 535 | HSTATE_FIELD(HSTATE_SCRATCH0, scratch0); |
538 | HSTATE_FIELD(HSTATE_SCRATCH1, scratch1); | 536 | HSTATE_FIELD(HSTATE_SCRATCH1, scratch1); |
539 | HSTATE_FIELD(HSTATE_IN_GUEST, in_guest); | 537 | HSTATE_FIELD(HSTATE_IN_GUEST, in_guest); |
538 | HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5); | ||
539 | HSTATE_FIELD(HSTATE_NAPPING, napping); | ||
540 | 540 | ||
541 | #ifdef CONFIG_KVM_BOOK3S_64_HV | 541 | #ifdef CONFIG_KVM_BOOK3S_64_HV |
542 | HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); | 542 | HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); |
@@ -549,6 +549,7 @@ int main(void) | |||
549 | HSTATE_FIELD(HSTATE_DSCR, host_dscr); | 549 | HSTATE_FIELD(HSTATE_DSCR, host_dscr); |
550 | HSTATE_FIELD(HSTATE_DABR, dabr); | 550 | HSTATE_FIELD(HSTATE_DABR, dabr); |
551 | HSTATE_FIELD(HSTATE_DECEXP, dec_expires); | 551 | HSTATE_FIELD(HSTATE_DECEXP, dec_expires); |
552 | DEFINE(IPI_PRIORITY, IPI_PRIORITY); | ||
552 | #endif /* CONFIG_KVM_BOOK3S_64_HV */ | 553 | #endif /* CONFIG_KVM_BOOK3S_64_HV */ |
553 | 554 | ||
554 | #else /* CONFIG_PPC_BOOK3S */ | 555 | #else /* CONFIG_PPC_BOOK3S */ |
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 41b02c792aa3..29ddd8b1c274 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S | |||
@@ -427,16 +427,6 @@ slb_miss_user_pseries: | |||
427 | b . /* prevent spec. execution */ | 427 | b . /* prevent spec. execution */ |
428 | #endif /* __DISABLED__ */ | 428 | #endif /* __DISABLED__ */ |
429 | 429 | ||
430 | /* KVM's trampoline code needs to be close to the interrupt handlers */ | ||
431 | |||
432 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER | ||
433 | #ifdef CONFIG_KVM_BOOK3S_PR | ||
434 | #include "../kvm/book3s_rmhandlers.S" | ||
435 | #else | ||
436 | #include "../kvm/book3s_hv_rmhandlers.S" | ||
437 | #endif | ||
438 | #endif | ||
439 | |||
440 | .align 7 | 430 | .align 7 |
441 | .globl __end_interrupts | 431 | .globl __end_interrupts |
442 | __end_interrupts: | 432 | __end_interrupts: |
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index da3a1225c0ac..ca1f88b3dc59 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c | |||
@@ -78,6 +78,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) | |||
78 | for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) | 78 | for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) |
79 | vcpu_44x->shadow_refs[i].gtlb_index = -1; | 79 | vcpu_44x->shadow_refs[i].gtlb_index = -1; |
80 | 80 | ||
81 | vcpu->arch.cpu_type = KVM_CPU_440; | ||
82 | |||
81 | return 0; | 83 | return 0; |
82 | } | 84 | } |
83 | 85 | ||
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 08428e2c188d..3688aeecc4b2 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile | |||
@@ -43,18 +43,22 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ | |||
43 | fpu.o \ | 43 | fpu.o \ |
44 | book3s_paired_singles.o \ | 44 | book3s_paired_singles.o \ |
45 | book3s_pr.o \ | 45 | book3s_pr.o \ |
46 | book3s_pr_papr.o \ | ||
46 | book3s_emulate.o \ | 47 | book3s_emulate.o \ |
47 | book3s_interrupts.o \ | 48 | book3s_interrupts.o \ |
48 | book3s_mmu_hpte.o \ | 49 | book3s_mmu_hpte.o \ |
49 | book3s_64_mmu_host.o \ | 50 | book3s_64_mmu_host.o \ |
50 | book3s_64_mmu.o \ | 51 | book3s_64_mmu.o \ |
51 | book3s_32_mmu.o | 52 | book3s_32_mmu.o |
53 | kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ | ||
54 | book3s_rmhandlers.o | ||
52 | 55 | ||
53 | kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ | 56 | kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ |
54 | book3s_hv.o \ | 57 | book3s_hv.o \ |
55 | book3s_hv_interrupts.o \ | 58 | book3s_hv_interrupts.o \ |
56 | book3s_64_mmu_hv.o | 59 | book3s_64_mmu_hv.o |
57 | kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ | 60 | kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ |
61 | book3s_hv_rmhandlers.o \ | ||
58 | book3s_hv_rm_mmu.o \ | 62 | book3s_hv_rm_mmu.o \ |
59 | book3s_64_vio_hv.o \ | 63 | book3s_64_vio_hv.o \ |
60 | book3s_hv_builtin.o | 64 | book3s_hv_builtin.o |
diff --git a/arch/powerpc/kvm/book3s_32_sr.S b/arch/powerpc/kvm/book3s_32_sr.S index 3608471ad2d8..7e06a6fc8d07 100644 --- a/arch/powerpc/kvm/book3s_32_sr.S +++ b/arch/powerpc/kvm/book3s_32_sr.S | |||
@@ -31,7 +31,7 @@ | |||
31 | * R1 = host R1 | 31 | * R1 = host R1 |
32 | * R2 = host R2 | 32 | * R2 = host R2 |
33 | * R3 = shadow vcpu | 33 | * R3 = shadow vcpu |
34 | * all other volatile GPRS = free | 34 | * all other volatile GPRS = free except R4, R6 |
35 | * SVCPU[CR] = guest CR | 35 | * SVCPU[CR] = guest CR |
36 | * SVCPU[XER] = guest XER | 36 | * SVCPU[XER] = guest XER |
37 | * SVCPU[CTR] = guest CTR | 37 | * SVCPU[CTR] = guest CTR |
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index c6d3e194b6b4..b871721c0050 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c | |||
@@ -128,7 +128,13 @@ static hva_t kvmppc_mmu_book3s_64_get_pteg( | |||
128 | dprintk("MMU: page=0x%x sdr1=0x%llx pteg=0x%llx vsid=0x%llx\n", | 128 | dprintk("MMU: page=0x%x sdr1=0x%llx pteg=0x%llx vsid=0x%llx\n", |
129 | page, vcpu_book3s->sdr1, pteg, slbe->vsid); | 129 | page, vcpu_book3s->sdr1, pteg, slbe->vsid); |
130 | 130 | ||
131 | r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT); | 131 | /* When running a PAPR guest, SDR1 contains a HVA address instead |
132 | of a GPA */ | ||
133 | if (vcpu_book3s->vcpu.arch.papr_enabled) | ||
134 | r = pteg; | ||
135 | else | ||
136 | r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT); | ||
137 | |||
132 | if (kvm_is_error_hva(r)) | 138 | if (kvm_is_error_hva(r)) |
133 | return r; | 139 | return r; |
134 | return r | (pteg & ~PAGE_MASK); | 140 | return r | (pteg & ~PAGE_MASK); |
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S index 04e7d3bbfe8b..f2e6e48ea463 100644 --- a/arch/powerpc/kvm/book3s_64_slb.S +++ b/arch/powerpc/kvm/book3s_64_slb.S | |||
@@ -53,7 +53,7 @@ slb_exit_skip_ ## num: | |||
53 | * R1 = host R1 | 53 | * R1 = host R1 |
54 | * R2 = host R2 | 54 | * R2 = host R2 |
55 | * R3 = shadow vcpu | 55 | * R3 = shadow vcpu |
56 | * all other volatile GPRS = free | 56 | * all other volatile GPRS = free except R4, R6 |
57 | * SVCPU[CR] = guest CR | 57 | * SVCPU[CR] = guest CR |
58 | * SVCPU[XER] = guest XER | 58 | * SVCPU[XER] = guest XER |
59 | * SVCPU[CTR] = guest CTR | 59 | * SVCPU[CTR] = guest CTR |
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 466846557089..0c9dc62532d0 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c | |||
@@ -63,6 +63,25 @@ | |||
63 | * function pointers, so let's just disable the define. */ | 63 | * function pointers, so let's just disable the define. */ |
64 | #undef mfsrin | 64 | #undef mfsrin |
65 | 65 | ||
66 | enum priv_level { | ||
67 | PRIV_PROBLEM = 0, | ||
68 | PRIV_SUPER = 1, | ||
69 | PRIV_HYPER = 2, | ||
70 | }; | ||
71 | |||
72 | static bool spr_allowed(struct kvm_vcpu *vcpu, enum priv_level level) | ||
73 | { | ||
74 | /* PAPR VMs only access supervisor SPRs */ | ||
75 | if (vcpu->arch.papr_enabled && (level > PRIV_SUPER)) | ||
76 | return false; | ||
77 | |||
78 | /* Limit user space to its own small SPR set */ | ||
79 | if ((vcpu->arch.shared->msr & MSR_PR) && level > PRIV_PROBLEM) | ||
80 | return false; | ||
81 | |||
82 | return true; | ||
83 | } | ||
84 | |||
66 | int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | 85 | int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, |
67 | unsigned int inst, int *advance) | 86 | unsigned int inst, int *advance) |
68 | { | 87 | { |
@@ -296,6 +315,8 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) | |||
296 | 315 | ||
297 | switch (sprn) { | 316 | switch (sprn) { |
298 | case SPRN_SDR1: | 317 | case SPRN_SDR1: |
318 | if (!spr_allowed(vcpu, PRIV_HYPER)) | ||
319 | goto unprivileged; | ||
299 | to_book3s(vcpu)->sdr1 = spr_val; | 320 | to_book3s(vcpu)->sdr1 = spr_val; |
300 | break; | 321 | break; |
301 | case SPRN_DSISR: | 322 | case SPRN_DSISR: |
@@ -390,6 +411,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) | |||
390 | case SPRN_PMC4_GEKKO: | 411 | case SPRN_PMC4_GEKKO: |
391 | case SPRN_WPAR_GEKKO: | 412 | case SPRN_WPAR_GEKKO: |
392 | break; | 413 | break; |
414 | unprivileged: | ||
393 | default: | 415 | default: |
394 | printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn); | 416 | printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn); |
395 | #ifndef DEBUG_SPR | 417 | #ifndef DEBUG_SPR |
@@ -421,6 +443,8 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) | |||
421 | break; | 443 | break; |
422 | } | 444 | } |
423 | case SPRN_SDR1: | 445 | case SPRN_SDR1: |
446 | if (!spr_allowed(vcpu, PRIV_HYPER)) | ||
447 | goto unprivileged; | ||
424 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1); | 448 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1); |
425 | break; | 449 | break; |
426 | case SPRN_DSISR: | 450 | case SPRN_DSISR: |
@@ -449,6 +473,10 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) | |||
449 | case SPRN_HID5: | 473 | case SPRN_HID5: |
450 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]); | 474 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]); |
451 | break; | 475 | break; |
476 | case SPRN_CFAR: | ||
477 | case SPRN_PURR: | ||
478 | kvmppc_set_gpr(vcpu, rt, 0); | ||
479 | break; | ||
452 | case SPRN_GQR0: | 480 | case SPRN_GQR0: |
453 | case SPRN_GQR1: | 481 | case SPRN_GQR1: |
454 | case SPRN_GQR2: | 482 | case SPRN_GQR2: |
@@ -476,6 +504,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) | |||
476 | kvmppc_set_gpr(vcpu, rt, 0); | 504 | kvmppc_set_gpr(vcpu, rt, 0); |
477 | break; | 505 | break; |
478 | default: | 506 | default: |
507 | unprivileged: | ||
479 | printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn); | 508 | printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn); |
480 | #ifndef DEBUG_SPR | 509 | #ifndef DEBUG_SPR |
481 | emulated = EMULATE_FAIL; | 510 | emulated = EMULATE_FAIL; |
diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c index 88c8f26add02..f7f63a00ab1f 100644 --- a/arch/powerpc/kvm/book3s_exports.c +++ b/arch/powerpc/kvm/book3s_exports.c | |||
@@ -23,9 +23,7 @@ | |||
23 | #ifdef CONFIG_KVM_BOOK3S_64_HV | 23 | #ifdef CONFIG_KVM_BOOK3S_64_HV |
24 | EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline); | 24 | EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline); |
25 | #else | 25 | #else |
26 | EXPORT_SYMBOL_GPL(kvmppc_handler_trampoline_enter); | 26 | EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline); |
27 | EXPORT_SYMBOL_GPL(kvmppc_handler_lowmem_trampoline); | ||
28 | EXPORT_SYMBOL_GPL(kvmppc_rmcall); | ||
29 | EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu); | 27 | EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu); |
30 | #ifdef CONFIG_ALTIVEC | 28 | #ifdef CONFIG_ALTIVEC |
31 | EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec); | 29 | EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec); |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index cc0d7f1b19ab..4644c7986d80 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
@@ -62,6 +62,8 @@ | |||
62 | /* #define EXIT_DEBUG_SIMPLE */ | 62 | /* #define EXIT_DEBUG_SIMPLE */ |
63 | /* #define EXIT_DEBUG_INT */ | 63 | /* #define EXIT_DEBUG_INT */ |
64 | 64 | ||
65 | static void kvmppc_end_cede(struct kvm_vcpu *vcpu); | ||
66 | |||
65 | void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 67 | void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
66 | { | 68 | { |
67 | local_paca->kvm_hstate.kvm_vcpu = vcpu; | 69 | local_paca->kvm_hstate.kvm_vcpu = vcpu; |
@@ -72,40 +74,10 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) | |||
72 | { | 74 | { |
73 | } | 75 | } |
74 | 76 | ||
75 | static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu); | ||
76 | static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu); | ||
77 | |||
78 | void kvmppc_vcpu_block(struct kvm_vcpu *vcpu) | ||
79 | { | ||
80 | u64 now; | ||
81 | unsigned long dec_nsec; | ||
82 | |||
83 | now = get_tb(); | ||
84 | if (now >= vcpu->arch.dec_expires && !kvmppc_core_pending_dec(vcpu)) | ||
85 | kvmppc_core_queue_dec(vcpu); | ||
86 | if (vcpu->arch.pending_exceptions) | ||
87 | return; | ||
88 | if (vcpu->arch.dec_expires != ~(u64)0) { | ||
89 | dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC / | ||
90 | tb_ticks_per_sec; | ||
91 | hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec), | ||
92 | HRTIMER_MODE_REL); | ||
93 | } | ||
94 | |||
95 | kvmppc_vcpu_blocked(vcpu); | ||
96 | |||
97 | kvm_vcpu_block(vcpu); | ||
98 | vcpu->stat.halt_wakeup++; | ||
99 | |||
100 | if (vcpu->arch.dec_expires != ~(u64)0) | ||
101 | hrtimer_try_to_cancel(&vcpu->arch.dec_timer); | ||
102 | |||
103 | kvmppc_vcpu_unblocked(vcpu); | ||
104 | } | ||
105 | |||
106 | void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) | 77 | void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) |
107 | { | 78 | { |
108 | vcpu->arch.shregs.msr = msr; | 79 | vcpu->arch.shregs.msr = msr; |
80 | kvmppc_end_cede(vcpu); | ||
109 | } | 81 | } |
110 | 82 | ||
111 | void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) | 83 | void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) |
@@ -257,15 +229,6 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | |||
257 | 229 | ||
258 | switch (req) { | 230 | switch (req) { |
259 | case H_CEDE: | 231 | case H_CEDE: |
260 | vcpu->arch.shregs.msr |= MSR_EE; | ||
261 | vcpu->arch.ceded = 1; | ||
262 | smp_mb(); | ||
263 | if (!vcpu->arch.prodded) | ||
264 | kvmppc_vcpu_block(vcpu); | ||
265 | else | ||
266 | vcpu->arch.prodded = 0; | ||
267 | smp_mb(); | ||
268 | vcpu->arch.ceded = 0; | ||
269 | break; | 232 | break; |
270 | case H_PROD: | 233 | case H_PROD: |
271 | target = kvmppc_get_gpr(vcpu, 4); | 234 | target = kvmppc_get_gpr(vcpu, 4); |
@@ -388,20 +351,6 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
388 | break; | 351 | break; |
389 | } | 352 | } |
390 | 353 | ||
391 | |||
392 | if (!(r & RESUME_HOST)) { | ||
393 | /* To avoid clobbering exit_reason, only check for signals if | ||
394 | * we aren't already exiting to userspace for some other | ||
395 | * reason. */ | ||
396 | if (signal_pending(tsk)) { | ||
397 | vcpu->stat.signal_exits++; | ||
398 | run->exit_reason = KVM_EXIT_INTR; | ||
399 | r = -EINTR; | ||
400 | } else { | ||
401 | kvmppc_core_deliver_interrupts(vcpu); | ||
402 | } | ||
403 | } | ||
404 | |||
405 | return r; | 354 | return r; |
406 | } | 355 | } |
407 | 356 | ||
@@ -479,13 +428,9 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | |||
479 | kvmppc_mmu_book3s_hv_init(vcpu); | 428 | kvmppc_mmu_book3s_hv_init(vcpu); |
480 | 429 | ||
481 | /* | 430 | /* |
482 | * Some vcpus may start out in stopped state. If we initialize | 431 | * We consider the vcpu stopped until we see the first run ioctl for it. |
483 | * them to busy-in-host state they will stop other vcpus in the | ||
484 | * vcore from running. Instead we initialize them to blocked | ||
485 | * state, effectively considering them to be stopped until we | ||
486 | * see the first run ioctl for them. | ||
487 | */ | 432 | */ |
488 | vcpu->arch.state = KVMPPC_VCPU_BLOCKED; | 433 | vcpu->arch.state = KVMPPC_VCPU_STOPPED; |
489 | 434 | ||
490 | init_waitqueue_head(&vcpu->arch.cpu_run); | 435 | init_waitqueue_head(&vcpu->arch.cpu_run); |
491 | 436 | ||
@@ -496,6 +441,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | |||
496 | if (vcore) { | 441 | if (vcore) { |
497 | INIT_LIST_HEAD(&vcore->runnable_threads); | 442 | INIT_LIST_HEAD(&vcore->runnable_threads); |
498 | spin_lock_init(&vcore->lock); | 443 | spin_lock_init(&vcore->lock); |
444 | init_waitqueue_head(&vcore->wq); | ||
499 | } | 445 | } |
500 | kvm->arch.vcores[core] = vcore; | 446 | kvm->arch.vcores[core] = vcore; |
501 | } | 447 | } |
@@ -506,10 +452,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | |||
506 | 452 | ||
507 | spin_lock(&vcore->lock); | 453 | spin_lock(&vcore->lock); |
508 | ++vcore->num_threads; | 454 | ++vcore->num_threads; |
509 | ++vcore->n_blocked; | ||
510 | spin_unlock(&vcore->lock); | 455 | spin_unlock(&vcore->lock); |
511 | vcpu->arch.vcore = vcore; | 456 | vcpu->arch.vcore = vcore; |
512 | 457 | ||
458 | vcpu->arch.cpu_type = KVM_CPU_3S_64; | ||
459 | kvmppc_sanity_check(vcpu); | ||
460 | |||
513 | return vcpu; | 461 | return vcpu; |
514 | 462 | ||
515 | free_vcpu: | 463 | free_vcpu: |
@@ -524,30 +472,31 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) | |||
524 | kfree(vcpu); | 472 | kfree(vcpu); |
525 | } | 473 | } |
526 | 474 | ||
527 | static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu) | 475 | static void kvmppc_set_timer(struct kvm_vcpu *vcpu) |
528 | { | 476 | { |
529 | struct kvmppc_vcore *vc = vcpu->arch.vcore; | 477 | unsigned long dec_nsec, now; |
530 | 478 | ||
531 | spin_lock(&vc->lock); | 479 | now = get_tb(); |
532 | vcpu->arch.state = KVMPPC_VCPU_BLOCKED; | 480 | if (now > vcpu->arch.dec_expires) { |
533 | ++vc->n_blocked; | 481 | /* decrementer has already gone negative */ |
534 | if (vc->n_runnable > 0 && | 482 | kvmppc_core_queue_dec(vcpu); |
535 | vc->n_runnable + vc->n_blocked == vc->num_threads) { | 483 | kvmppc_core_deliver_interrupts(vcpu); |
536 | vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu, | 484 | return; |
537 | arch.run_list); | ||
538 | wake_up(&vcpu->arch.cpu_run); | ||
539 | } | 485 | } |
540 | spin_unlock(&vc->lock); | 486 | dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC |
487 | / tb_ticks_per_sec; | ||
488 | hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec), | ||
489 | HRTIMER_MODE_REL); | ||
490 | vcpu->arch.timer_running = 1; | ||
541 | } | 491 | } |
542 | 492 | ||
543 | static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu) | 493 | static void kvmppc_end_cede(struct kvm_vcpu *vcpu) |
544 | { | 494 | { |
545 | struct kvmppc_vcore *vc = vcpu->arch.vcore; | 495 | vcpu->arch.ceded = 0; |
546 | 496 | if (vcpu->arch.timer_running) { | |
547 | spin_lock(&vc->lock); | 497 | hrtimer_try_to_cancel(&vcpu->arch.dec_timer); |
548 | vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; | 498 | vcpu->arch.timer_running = 0; |
549 | --vc->n_blocked; | 499 | } |
550 | spin_unlock(&vc->lock); | ||
551 | } | 500 | } |
552 | 501 | ||
553 | extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); | 502 | extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); |
@@ -562,6 +511,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, | |||
562 | return; | 511 | return; |
563 | vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; | 512 | vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; |
564 | --vc->n_runnable; | 513 | --vc->n_runnable; |
514 | ++vc->n_busy; | ||
565 | /* decrement the physical thread id of each following vcpu */ | 515 | /* decrement the physical thread id of each following vcpu */ |
566 | v = vcpu; | 516 | v = vcpu; |
567 | list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list) | 517 | list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list) |
@@ -575,15 +525,20 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu) | |||
575 | struct paca_struct *tpaca; | 525 | struct paca_struct *tpaca; |
576 | struct kvmppc_vcore *vc = vcpu->arch.vcore; | 526 | struct kvmppc_vcore *vc = vcpu->arch.vcore; |
577 | 527 | ||
528 | if (vcpu->arch.timer_running) { | ||
529 | hrtimer_try_to_cancel(&vcpu->arch.dec_timer); | ||
530 | vcpu->arch.timer_running = 0; | ||
531 | } | ||
578 | cpu = vc->pcpu + vcpu->arch.ptid; | 532 | cpu = vc->pcpu + vcpu->arch.ptid; |
579 | tpaca = &paca[cpu]; | 533 | tpaca = &paca[cpu]; |
580 | tpaca->kvm_hstate.kvm_vcpu = vcpu; | 534 | tpaca->kvm_hstate.kvm_vcpu = vcpu; |
581 | tpaca->kvm_hstate.kvm_vcore = vc; | 535 | tpaca->kvm_hstate.kvm_vcore = vc; |
536 | tpaca->kvm_hstate.napping = 0; | ||
537 | vcpu->cpu = vc->pcpu; | ||
582 | smp_wmb(); | 538 | smp_wmb(); |
583 | #ifdef CONFIG_PPC_ICP_NATIVE | 539 | #ifdef CONFIG_PPC_ICP_NATIVE |
584 | if (vcpu->arch.ptid) { | 540 | if (vcpu->arch.ptid) { |
585 | tpaca->cpu_start = 0x80; | 541 | tpaca->cpu_start = 0x80; |
586 | tpaca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST; | ||
587 | wmb(); | 542 | wmb(); |
588 | xics_wake_cpu(cpu); | 543 | xics_wake_cpu(cpu); |
589 | ++vc->n_woken; | 544 | ++vc->n_woken; |
@@ -631,9 +586,10 @@ static int on_primary_thread(void) | |||
631 | */ | 586 | */ |
632 | static int kvmppc_run_core(struct kvmppc_vcore *vc) | 587 | static int kvmppc_run_core(struct kvmppc_vcore *vc) |
633 | { | 588 | { |
634 | struct kvm_vcpu *vcpu, *vnext; | 589 | struct kvm_vcpu *vcpu, *vcpu0, *vnext; |
635 | long ret; | 590 | long ret; |
636 | u64 now; | 591 | u64 now; |
592 | int ptid; | ||
637 | 593 | ||
638 | /* don't start if any threads have a signal pending */ | 594 | /* don't start if any threads have a signal pending */ |
639 | list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) | 595 | list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) |
@@ -652,29 +608,50 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) | |||
652 | goto out; | 608 | goto out; |
653 | } | 609 | } |
654 | 610 | ||
611 | /* | ||
612 | * Assign physical thread IDs, first to non-ceded vcpus | ||
613 | * and then to ceded ones. | ||
614 | */ | ||
615 | ptid = 0; | ||
616 | vcpu0 = NULL; | ||
617 | list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { | ||
618 | if (!vcpu->arch.ceded) { | ||
619 | if (!ptid) | ||
620 | vcpu0 = vcpu; | ||
621 | vcpu->arch.ptid = ptid++; | ||
622 | } | ||
623 | } | ||
624 | if (!vcpu0) | ||
625 | return 0; /* nothing to run */ | ||
626 | list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) | ||
627 | if (vcpu->arch.ceded) | ||
628 | vcpu->arch.ptid = ptid++; | ||
629 | |||
655 | vc->n_woken = 0; | 630 | vc->n_woken = 0; |
656 | vc->nap_count = 0; | 631 | vc->nap_count = 0; |
657 | vc->entry_exit_count = 0; | 632 | vc->entry_exit_count = 0; |
658 | vc->vcore_running = 1; | 633 | vc->vcore_state = VCORE_RUNNING; |
659 | vc->in_guest = 0; | 634 | vc->in_guest = 0; |
660 | vc->pcpu = smp_processor_id(); | 635 | vc->pcpu = smp_processor_id(); |
636 | vc->napping_threads = 0; | ||
661 | list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) | 637 | list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) |
662 | kvmppc_start_thread(vcpu); | 638 | kvmppc_start_thread(vcpu); |
663 | vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu, | ||
664 | arch.run_list); | ||
665 | 639 | ||
640 | preempt_disable(); | ||
666 | spin_unlock(&vc->lock); | 641 | spin_unlock(&vc->lock); |
667 | 642 | ||
668 | preempt_disable(); | ||
669 | kvm_guest_enter(); | 643 | kvm_guest_enter(); |
670 | __kvmppc_vcore_entry(NULL, vcpu); | 644 | __kvmppc_vcore_entry(NULL, vcpu0); |
671 | 645 | ||
672 | /* wait for secondary threads to finish writing their state to memory */ | ||
673 | spin_lock(&vc->lock); | 646 | spin_lock(&vc->lock); |
647 | /* disable sending of IPIs on virtual external irqs */ | ||
648 | list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) | ||
649 | vcpu->cpu = -1; | ||
650 | /* wait for secondary threads to finish writing their state to memory */ | ||
674 | if (vc->nap_count < vc->n_woken) | 651 | if (vc->nap_count < vc->n_woken) |
675 | kvmppc_wait_for_nap(vc); | 652 | kvmppc_wait_for_nap(vc); |
676 | /* prevent other vcpu threads from doing kvmppc_start_thread() now */ | 653 | /* prevent other vcpu threads from doing kvmppc_start_thread() now */ |
677 | vc->vcore_running = 2; | 654 | vc->vcore_state = VCORE_EXITING; |
678 | spin_unlock(&vc->lock); | 655 | spin_unlock(&vc->lock); |
679 | 656 | ||
680 | /* make sure updates to secondary vcpu structs are visible now */ | 657 | /* make sure updates to secondary vcpu structs are visible now */ |
@@ -690,22 +667,26 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) | |||
690 | if (now < vcpu->arch.dec_expires && | 667 | if (now < vcpu->arch.dec_expires && |
691 | kvmppc_core_pending_dec(vcpu)) | 668 | kvmppc_core_pending_dec(vcpu)) |
692 | kvmppc_core_dequeue_dec(vcpu); | 669 | kvmppc_core_dequeue_dec(vcpu); |
693 | if (!vcpu->arch.trap) { | 670 | |
694 | if (signal_pending(vcpu->arch.run_task)) { | 671 | ret = RESUME_GUEST; |
695 | vcpu->arch.kvm_run->exit_reason = KVM_EXIT_INTR; | 672 | if (vcpu->arch.trap) |
696 | vcpu->arch.ret = -EINTR; | 673 | ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu, |
697 | } | 674 | vcpu->arch.run_task); |
698 | continue; /* didn't get to run */ | 675 | |
699 | } | ||
700 | ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu, | ||
701 | vcpu->arch.run_task); | ||
702 | vcpu->arch.ret = ret; | 676 | vcpu->arch.ret = ret; |
703 | vcpu->arch.trap = 0; | 677 | vcpu->arch.trap = 0; |
678 | |||
679 | if (vcpu->arch.ceded) { | ||
680 | if (ret != RESUME_GUEST) | ||
681 | kvmppc_end_cede(vcpu); | ||
682 | else | ||
683 | kvmppc_set_timer(vcpu); | ||
684 | } | ||
704 | } | 685 | } |
705 | 686 | ||
706 | spin_lock(&vc->lock); | 687 | spin_lock(&vc->lock); |
707 | out: | 688 | out: |
708 | vc->vcore_running = 0; | 689 | vc->vcore_state = VCORE_INACTIVE; |
709 | list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, | 690 | list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, |
710 | arch.run_list) { | 691 | arch.run_list) { |
711 | if (vcpu->arch.ret != RESUME_GUEST) { | 692 | if (vcpu->arch.ret != RESUME_GUEST) { |
@@ -717,82 +698,130 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) | |||
717 | return 1; | 698 | return 1; |
718 | } | 699 | } |
719 | 700 | ||
720 | static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | 701 | /* |
702 | * Wait for some other vcpu thread to execute us, and | ||
703 | * wake us up when we need to handle something in the host. | ||
704 | */ | ||
705 | static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state) | ||
721 | { | 706 | { |
722 | int ptid; | ||
723 | int wait_state; | ||
724 | struct kvmppc_vcore *vc; | ||
725 | DEFINE_WAIT(wait); | 707 | DEFINE_WAIT(wait); |
726 | 708 | ||
727 | /* No need to go into the guest when all we do is going out */ | 709 | prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state); |
728 | if (signal_pending(current)) { | 710 | if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) |
729 | kvm_run->exit_reason = KVM_EXIT_INTR; | 711 | schedule(); |
730 | return -EINTR; | 712 | finish_wait(&vcpu->arch.cpu_run, &wait); |
713 | } | ||
714 | |||
715 | /* | ||
716 | * All the vcpus in this vcore are idle, so wait for a decrementer | ||
717 | * or external interrupt to one of the vcpus. vc->lock is held. | ||
718 | */ | ||
719 | static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) | ||
720 | { | ||
721 | DEFINE_WAIT(wait); | ||
722 | struct kvm_vcpu *v; | ||
723 | int all_idle = 1; | ||
724 | |||
725 | prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE); | ||
726 | vc->vcore_state = VCORE_SLEEPING; | ||
727 | spin_unlock(&vc->lock); | ||
728 | list_for_each_entry(v, &vc->runnable_threads, arch.run_list) { | ||
729 | if (!v->arch.ceded || v->arch.pending_exceptions) { | ||
730 | all_idle = 0; | ||
731 | break; | ||
732 | } | ||
731 | } | 733 | } |
734 | if (all_idle) | ||
735 | schedule(); | ||
736 | finish_wait(&vc->wq, &wait); | ||
737 | spin_lock(&vc->lock); | ||
738 | vc->vcore_state = VCORE_INACTIVE; | ||
739 | } | ||
732 | 740 | ||
733 | /* On PPC970, check that we have an RMA region */ | 741 | static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) |
734 | if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201)) | 742 | { |
735 | return -EPERM; | 743 | int n_ceded; |
744 | int prev_state; | ||
745 | struct kvmppc_vcore *vc; | ||
746 | struct kvm_vcpu *v, *vn; | ||
736 | 747 | ||
737 | kvm_run->exit_reason = 0; | 748 | kvm_run->exit_reason = 0; |
738 | vcpu->arch.ret = RESUME_GUEST; | 749 | vcpu->arch.ret = RESUME_GUEST; |
739 | vcpu->arch.trap = 0; | 750 | vcpu->arch.trap = 0; |
740 | 751 | ||
741 | flush_fp_to_thread(current); | ||
742 | flush_altivec_to_thread(current); | ||
743 | flush_vsx_to_thread(current); | ||
744 | |||
745 | /* | 752 | /* |
746 | * Synchronize with other threads in this virtual core | 753 | * Synchronize with other threads in this virtual core |
747 | */ | 754 | */ |
748 | vc = vcpu->arch.vcore; | 755 | vc = vcpu->arch.vcore; |
749 | spin_lock(&vc->lock); | 756 | spin_lock(&vc->lock); |
750 | /* This happens the first time this is called for a vcpu */ | 757 | vcpu->arch.ceded = 0; |
751 | if (vcpu->arch.state == KVMPPC_VCPU_BLOCKED) | ||
752 | --vc->n_blocked; | ||
753 | vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; | ||
754 | ptid = vc->n_runnable; | ||
755 | vcpu->arch.run_task = current; | 758 | vcpu->arch.run_task = current; |
756 | vcpu->arch.kvm_run = kvm_run; | 759 | vcpu->arch.kvm_run = kvm_run; |
757 | vcpu->arch.ptid = ptid; | 760 | prev_state = vcpu->arch.state; |
761 | vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; | ||
758 | list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads); | 762 | list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads); |
759 | ++vc->n_runnable; | 763 | ++vc->n_runnable; |
760 | 764 | ||
761 | wait_state = TASK_INTERRUPTIBLE; | 765 | /* |
762 | while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { | 766 | * This happens the first time this is called for a vcpu. |
763 | if (signal_pending(current)) { | 767 | * If the vcore is already running, we may be able to start |
764 | if (!vc->vcore_running) { | 768 | * this thread straight away and have it join in. |
765 | kvm_run->exit_reason = KVM_EXIT_INTR; | 769 | */ |
766 | vcpu->arch.ret = -EINTR; | 770 | if (prev_state == KVMPPC_VCPU_STOPPED) { |
767 | break; | 771 | if (vc->vcore_state == VCORE_RUNNING && |
768 | } | 772 | VCORE_EXIT_COUNT(vc) == 0) { |
769 | /* have to wait for vcore to stop executing guest */ | 773 | vcpu->arch.ptid = vc->n_runnable - 1; |
770 | wait_state = TASK_UNINTERRUPTIBLE; | 774 | kvmppc_start_thread(vcpu); |
771 | smp_send_reschedule(vc->pcpu); | ||
772 | } | 775 | } |
773 | 776 | ||
774 | if (!vc->vcore_running && | 777 | } else if (prev_state == KVMPPC_VCPU_BUSY_IN_HOST) |
775 | vc->n_runnable + vc->n_blocked == vc->num_threads) { | 778 | --vc->n_busy; |
776 | /* we can run now */ | ||
777 | if (kvmppc_run_core(vc)) | ||
778 | continue; | ||
779 | } | ||
780 | 779 | ||
781 | if (vc->vcore_running == 1 && VCORE_EXIT_COUNT(vc) == 0) | 780 | while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && |
782 | kvmppc_start_thread(vcpu); | 781 | !signal_pending(current)) { |
782 | if (vc->n_busy || vc->vcore_state != VCORE_INACTIVE) { | ||
783 | spin_unlock(&vc->lock); | ||
784 | kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE); | ||
785 | spin_lock(&vc->lock); | ||
786 | continue; | ||
787 | } | ||
788 | n_ceded = 0; | ||
789 | list_for_each_entry(v, &vc->runnable_threads, arch.run_list) | ||
790 | n_ceded += v->arch.ceded; | ||
791 | if (n_ceded == vc->n_runnable) | ||
792 | kvmppc_vcore_blocked(vc); | ||
793 | else | ||
794 | kvmppc_run_core(vc); | ||
795 | |||
796 | list_for_each_entry_safe(v, vn, &vc->runnable_threads, | ||
797 | arch.run_list) { | ||
798 | kvmppc_core_deliver_interrupts(v); | ||
799 | if (signal_pending(v->arch.run_task)) { | ||
800 | kvmppc_remove_runnable(vc, v); | ||
801 | v->stat.signal_exits++; | ||
802 | v->arch.kvm_run->exit_reason = KVM_EXIT_INTR; | ||
803 | v->arch.ret = -EINTR; | ||
804 | wake_up(&v->arch.cpu_run); | ||
805 | } | ||
806 | } | ||
807 | } | ||
783 | 808 | ||
784 | /* wait for other threads to come in, or wait for vcore */ | 809 | if (signal_pending(current)) { |
785 | prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state); | 810 | if (vc->vcore_state == VCORE_RUNNING || |
786 | spin_unlock(&vc->lock); | 811 | vc->vcore_state == VCORE_EXITING) { |
787 | schedule(); | 812 | spin_unlock(&vc->lock); |
788 | finish_wait(&vcpu->arch.cpu_run, &wait); | 813 | kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE); |
789 | spin_lock(&vc->lock); | 814 | spin_lock(&vc->lock); |
815 | } | ||
816 | if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { | ||
817 | kvmppc_remove_runnable(vc, vcpu); | ||
818 | vcpu->stat.signal_exits++; | ||
819 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
820 | vcpu->arch.ret = -EINTR; | ||
821 | } | ||
790 | } | 822 | } |
791 | 823 | ||
792 | if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) | ||
793 | kvmppc_remove_runnable(vc, vcpu); | ||
794 | spin_unlock(&vc->lock); | 824 | spin_unlock(&vc->lock); |
795 | |||
796 | return vcpu->arch.ret; | 825 | return vcpu->arch.ret; |
797 | } | 826 | } |
798 | 827 | ||
@@ -800,6 +829,26 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
800 | { | 829 | { |
801 | int r; | 830 | int r; |
802 | 831 | ||
832 | if (!vcpu->arch.sane) { | ||
833 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
834 | return -EINVAL; | ||
835 | } | ||
836 | |||
837 | /* No need to go into the guest when all we'll do is come back out */ | ||
838 | if (signal_pending(current)) { | ||
839 | run->exit_reason = KVM_EXIT_INTR; | ||
840 | return -EINTR; | ||
841 | } | ||
842 | |||
843 | /* On PPC970, check that we have an RMA region */ | ||
844 | if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201)) | ||
845 | return -EPERM; | ||
846 | |||
847 | flush_fp_to_thread(current); | ||
848 | flush_altivec_to_thread(current); | ||
849 | flush_vsx_to_thread(current); | ||
850 | vcpu->arch.wqp = &vcpu->arch.vcore->wq; | ||
851 | |||
803 | do { | 852 | do { |
804 | r = kvmppc_run_vcpu(run, vcpu); | 853 | r = kvmppc_run_vcpu(run, vcpu); |
805 | 854 | ||
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index fcfe6b055558..bacb0cfa3602 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c | |||
@@ -110,39 +110,6 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | |||
110 | return H_SUCCESS; | 110 | return H_SUCCESS; |
111 | } | 111 | } |
112 | 112 | ||
113 | static unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, | ||
114 | unsigned long pte_index) | ||
115 | { | ||
116 | unsigned long rb, va_low; | ||
117 | |||
118 | rb = (v & ~0x7fUL) << 16; /* AVA field */ | ||
119 | va_low = pte_index >> 3; | ||
120 | if (v & HPTE_V_SECONDARY) | ||
121 | va_low = ~va_low; | ||
122 | /* xor vsid from AVA */ | ||
123 | if (!(v & HPTE_V_1TB_SEG)) | ||
124 | va_low ^= v >> 12; | ||
125 | else | ||
126 | va_low ^= v >> 24; | ||
127 | va_low &= 0x7ff; | ||
128 | if (v & HPTE_V_LARGE) { | ||
129 | rb |= 1; /* L field */ | ||
130 | if (cpu_has_feature(CPU_FTR_ARCH_206) && | ||
131 | (r & 0xff000)) { | ||
132 | /* non-16MB large page, must be 64k */ | ||
133 | /* (masks depend on page size) */ | ||
134 | rb |= 0x1000; /* page encoding in LP field */ | ||
135 | rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */ | ||
136 | rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */ | ||
137 | } | ||
138 | } else { | ||
139 | /* 4kB page */ | ||
140 | rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */ | ||
141 | } | ||
142 | rb |= (v >> 54) & 0x300; /* B field */ | ||
143 | return rb; | ||
144 | } | ||
145 | |||
146 | #define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) | 113 | #define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) |
147 | 114 | ||
148 | static inline int try_lock_tlbie(unsigned int *lock) | 115 | static inline int try_lock_tlbie(unsigned int *lock) |
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index de2950135e6e..f422231d9235 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S | |||
@@ -20,7 +20,10 @@ | |||
20 | #include <asm/ppc_asm.h> | 20 | #include <asm/ppc_asm.h> |
21 | #include <asm/kvm_asm.h> | 21 | #include <asm/kvm_asm.h> |
22 | #include <asm/reg.h> | 22 | #include <asm/reg.h> |
23 | #include <asm/mmu.h> | ||
23 | #include <asm/page.h> | 24 | #include <asm/page.h> |
25 | #include <asm/ptrace.h> | ||
26 | #include <asm/hvcall.h> | ||
24 | #include <asm/asm-offsets.h> | 27 | #include <asm/asm-offsets.h> |
25 | #include <asm/exception-64s.h> | 28 | #include <asm/exception-64s.h> |
26 | 29 | ||
@@ -49,7 +52,7 @@ kvmppc_skip_Hinterrupt: | |||
49 | b . | 52 | b . |
50 | 53 | ||
51 | /* | 54 | /* |
52 | * Call kvmppc_handler_trampoline_enter in real mode. | 55 | * Call kvmppc_hv_entry in real mode. |
53 | * Must be called with interrupts hard-disabled. | 56 | * Must be called with interrupts hard-disabled. |
54 | * | 57 | * |
55 | * Input Registers: | 58 | * Input Registers: |
@@ -89,6 +92,12 @@ _GLOBAL(kvmppc_hv_entry_trampoline) | |||
89 | kvm_start_guest: | 92 | kvm_start_guest: |
90 | ld r1,PACAEMERGSP(r13) | 93 | ld r1,PACAEMERGSP(r13) |
91 | subi r1,r1,STACK_FRAME_OVERHEAD | 94 | subi r1,r1,STACK_FRAME_OVERHEAD |
95 | ld r2,PACATOC(r13) | ||
96 | |||
97 | /* were we napping due to cede? */ | ||
98 | lbz r0,HSTATE_NAPPING(r13) | ||
99 | cmpwi r0,0 | ||
100 | bne kvm_end_cede | ||
92 | 101 | ||
93 | /* get vcpu pointer */ | 102 | /* get vcpu pointer */ |
94 | ld r4, HSTATE_KVM_VCPU(r13) | 103 | ld r4, HSTATE_KVM_VCPU(r13) |
@@ -276,15 +285,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) | |||
276 | cmpwi r0,0 | 285 | cmpwi r0,0 |
277 | beq 20b | 286 | beq 20b |
278 | 287 | ||
279 | /* Set LPCR. Set the MER bit if there is a pending external irq. */ | 288 | /* Set LPCR and RMOR. */ |
280 | 10: ld r8,KVM_LPCR(r9) | 289 | 10: ld r8,KVM_LPCR(r9) |
281 | ld r0,VCPU_PENDING_EXC(r4) | 290 | mtspr SPRN_LPCR,r8 |
282 | li r7,(1 << BOOK3S_IRQPRIO_EXTERNAL) | ||
283 | oris r7,r7,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h | ||
284 | and. r0,r0,r7 | ||
285 | beq 11f | ||
286 | ori r8,r8,LPCR_MER | ||
287 | 11: mtspr SPRN_LPCR,r8 | ||
288 | ld r8,KVM_RMOR(r9) | 291 | ld r8,KVM_RMOR(r9) |
289 | mtspr SPRN_RMOR,r8 | 292 | mtspr SPRN_RMOR,r8 |
290 | isync | 293 | isync |
@@ -448,19 +451,50 @@ toc_tlbie_lock: | |||
448 | mtctr r6 | 451 | mtctr r6 |
449 | mtxer r7 | 452 | mtxer r7 |
450 | 453 | ||
451 | /* Move SRR0 and SRR1 into the respective regs */ | 454 | kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */ |
452 | ld r6, VCPU_SRR0(r4) | 455 | ld r6, VCPU_SRR0(r4) |
453 | ld r7, VCPU_SRR1(r4) | 456 | ld r7, VCPU_SRR1(r4) |
454 | mtspr SPRN_SRR0, r6 | ||
455 | mtspr SPRN_SRR1, r7 | ||
456 | |||
457 | ld r10, VCPU_PC(r4) | 457 | ld r10, VCPU_PC(r4) |
458 | ld r11, VCPU_MSR(r4) /* r11 = vcpu->arch.msr & ~MSR_HV */ | ||
458 | 459 | ||
459 | ld r11, VCPU_MSR(r4) /* r10 = vcpu->arch.msr & ~MSR_HV */ | ||
460 | rldicl r11, r11, 63 - MSR_HV_LG, 1 | 460 | rldicl r11, r11, 63 - MSR_HV_LG, 1 |
461 | rotldi r11, r11, 1 + MSR_HV_LG | 461 | rotldi r11, r11, 1 + MSR_HV_LG |
462 | ori r11, r11, MSR_ME | 462 | ori r11, r11, MSR_ME |
463 | 463 | ||
464 | /* Check if we can deliver an external or decrementer interrupt now */ | ||
465 | ld r0,VCPU_PENDING_EXC(r4) | ||
466 | li r8,(1 << BOOK3S_IRQPRIO_EXTERNAL) | ||
467 | oris r8,r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h | ||
468 | and r0,r0,r8 | ||
469 | cmpdi cr1,r0,0 | ||
470 | andi. r0,r11,MSR_EE | ||
471 | beq cr1,11f | ||
472 | BEGIN_FTR_SECTION | ||
473 | mfspr r8,SPRN_LPCR | ||
474 | ori r8,r8,LPCR_MER | ||
475 | mtspr SPRN_LPCR,r8 | ||
476 | isync | ||
477 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) | ||
478 | beq 5f | ||
479 | li r0,BOOK3S_INTERRUPT_EXTERNAL | ||
480 | 12: mr r6,r10 | ||
481 | mr r10,r0 | ||
482 | mr r7,r11 | ||
483 | li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ | ||
484 | rotldi r11,r11,63 | ||
485 | b 5f | ||
486 | 11: beq 5f | ||
487 | mfspr r0,SPRN_DEC | ||
488 | cmpwi r0,0 | ||
489 | li r0,BOOK3S_INTERRUPT_DECREMENTER | ||
490 | blt 12b | ||
491 | |||
492 | /* Move SRR0 and SRR1 into the respective regs */ | ||
493 | 5: mtspr SPRN_SRR0, r6 | ||
494 | mtspr SPRN_SRR1, r7 | ||
495 | li r0,0 | ||
496 | stb r0,VCPU_CEDED(r4) /* cancel cede */ | ||
497 | |||
464 | fast_guest_return: | 498 | fast_guest_return: |
465 | mtspr SPRN_HSRR0,r10 | 499 | mtspr SPRN_HSRR0,r10 |
466 | mtspr SPRN_HSRR1,r11 | 500 | mtspr SPRN_HSRR1,r11 |
@@ -574,21 +608,20 @@ kvmppc_interrupt: | |||
574 | /* See if this is something we can handle in real mode */ | 608 | /* See if this is something we can handle in real mode */ |
575 | cmpwi r12,BOOK3S_INTERRUPT_SYSCALL | 609 | cmpwi r12,BOOK3S_INTERRUPT_SYSCALL |
576 | beq hcall_try_real_mode | 610 | beq hcall_try_real_mode |
577 | hcall_real_cont: | ||
578 | 611 | ||
579 | /* Check for mediated interrupts (could be done earlier really ...) */ | 612 | /* Check for mediated interrupts (could be done earlier really ...) */ |
580 | BEGIN_FTR_SECTION | 613 | BEGIN_FTR_SECTION |
581 | cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL | 614 | cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL |
582 | bne+ 1f | 615 | bne+ 1f |
583 | ld r5,VCPU_KVM(r9) | ||
584 | ld r5,KVM_LPCR(r5) | ||
585 | andi. r0,r11,MSR_EE | 616 | andi. r0,r11,MSR_EE |
586 | beq 1f | 617 | beq 1f |
618 | mfspr r5,SPRN_LPCR | ||
587 | andi. r0,r5,LPCR_MER | 619 | andi. r0,r5,LPCR_MER |
588 | bne bounce_ext_interrupt | 620 | bne bounce_ext_interrupt |
589 | 1: | 621 | 1: |
590 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) | 622 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) |
591 | 623 | ||
624 | hcall_real_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ | ||
592 | /* Save DEC */ | 625 | /* Save DEC */ |
593 | mfspr r5,SPRN_DEC | 626 | mfspr r5,SPRN_DEC |
594 | mftb r6 | 627 | mftb r6 |
@@ -682,7 +715,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201) | |||
682 | slbia | 715 | slbia |
683 | ptesync | 716 | ptesync |
684 | 717 | ||
685 | hdec_soon: | 718 | hdec_soon: /* r9 = vcpu, r12 = trap, r13 = paca */ |
686 | BEGIN_FTR_SECTION | 719 | BEGIN_FTR_SECTION |
687 | b 32f | 720 | b 32f |
688 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) | 721 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) |
@@ -700,6 +733,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) | |||
700 | addi r0,r3,0x100 | 733 | addi r0,r3,0x100 |
701 | stwcx. r0,0,r6 | 734 | stwcx. r0,0,r6 |
702 | bne 41b | 735 | bne 41b |
736 | lwsync | ||
703 | 737 | ||
704 | /* | 738 | /* |
705 | * At this point we have an interrupt that we have to pass | 739 | * At this point we have an interrupt that we have to pass |
@@ -713,18 +747,39 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) | |||
713 | * interrupt, since the other threads will already be on their | 747 | * interrupt, since the other threads will already be on their |
714 | * way here in that case. | 748 | * way here in that case. |
715 | */ | 749 | */ |
750 | cmpwi r3,0x100 /* Are we the first here? */ | ||
751 | bge 43f | ||
752 | cmpwi r3,1 /* Are any other threads in the guest? */ | ||
753 | ble 43f | ||
716 | cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER | 754 | cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER |
717 | beq 40f | 755 | beq 40f |
718 | cmpwi r3,0x100 /* Are we the first here? */ | ||
719 | bge 40f | ||
720 | cmpwi r3,1 | ||
721 | ble 40f | ||
722 | li r0,0 | 756 | li r0,0 |
723 | mtspr SPRN_HDEC,r0 | 757 | mtspr SPRN_HDEC,r0 |
724 | 40: | 758 | 40: |
759 | /* | ||
760 | * Send an IPI to any napping threads, since an HDEC interrupt | ||
761 | * doesn't wake CPUs up from nap. | ||
762 | */ | ||
763 | lwz r3,VCORE_NAPPING_THREADS(r5) | ||
764 | lwz r4,VCPU_PTID(r9) | ||
765 | li r0,1 | ||
766 | sldi r0,r0,r4 | ||
767 | andc. r3,r3,r0 /* no sense IPI'ing ourselves */ | ||
768 | beq 43f | ||
769 | mulli r4,r4,PACA_SIZE /* get paca for thread 0 */ | ||
770 | subf r6,r4,r13 | ||
771 | 42: andi. r0,r3,1 | ||
772 | beq 44f | ||
773 | ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */ | ||
774 | li r0,IPI_PRIORITY | ||
775 | li r7,XICS_QIRR | ||
776 | stbcix r0,r7,r8 /* trigger the IPI */ | ||
777 | 44: srdi. r3,r3,1 | ||
778 | addi r6,r6,PACA_SIZE | ||
779 | bne 42b | ||
725 | 780 | ||
726 | /* Secondary threads wait for primary to do partition switch */ | 781 | /* Secondary threads wait for primary to do partition switch */ |
727 | ld r4,VCPU_KVM(r9) /* pointer to struct kvm */ | 782 | 43: ld r4,VCPU_KVM(r9) /* pointer to struct kvm */ |
728 | ld r5,HSTATE_KVM_VCORE(r13) | 783 | ld r5,HSTATE_KVM_VCORE(r13) |
729 | lwz r3,VCPU_PTID(r9) | 784 | lwz r3,VCPU_PTID(r9) |
730 | cmpwi r3,0 | 785 | cmpwi r3,0 |
@@ -1077,7 +1132,6 @@ hcall_try_real_mode: | |||
1077 | hcall_real_fallback: | 1132 | hcall_real_fallback: |
1078 | li r12,BOOK3S_INTERRUPT_SYSCALL | 1133 | li r12,BOOK3S_INTERRUPT_SYSCALL |
1079 | ld r9, HSTATE_KVM_VCPU(r13) | 1134 | ld r9, HSTATE_KVM_VCPU(r13) |
1080 | ld r11, VCPU_MSR(r9) | ||
1081 | 1135 | ||
1082 | b hcall_real_cont | 1136 | b hcall_real_cont |
1083 | 1137 | ||
@@ -1139,7 +1193,7 @@ hcall_real_table: | |||
1139 | .long 0 /* 0xd4 */ | 1193 | .long 0 /* 0xd4 */ |
1140 | .long 0 /* 0xd8 */ | 1194 | .long 0 /* 0xd8 */ |
1141 | .long 0 /* 0xdc */ | 1195 | .long 0 /* 0xdc */ |
1142 | .long 0 /* 0xe0 */ | 1196 | .long .kvmppc_h_cede - hcall_real_table |
1143 | .long 0 /* 0xe4 */ | 1197 | .long 0 /* 0xe4 */ |
1144 | .long 0 /* 0xe8 */ | 1198 | .long 0 /* 0xe8 */ |
1145 | .long 0 /* 0xec */ | 1199 | .long 0 /* 0xec */ |
@@ -1168,7 +1222,8 @@ bounce_ext_interrupt: | |||
1168 | mtspr SPRN_SRR0,r10 | 1222 | mtspr SPRN_SRR0,r10 |
1169 | mtspr SPRN_SRR1,r11 | 1223 | mtspr SPRN_SRR1,r11 |
1170 | li r10,BOOK3S_INTERRUPT_EXTERNAL | 1224 | li r10,BOOK3S_INTERRUPT_EXTERNAL |
1171 | LOAD_REG_IMMEDIATE(r11,MSR_SF | MSR_ME); | 1225 | li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ |
1226 | rotldi r11,r11,63 | ||
1172 | b fast_guest_return | 1227 | b fast_guest_return |
1173 | 1228 | ||
1174 | _GLOBAL(kvmppc_h_set_dabr) | 1229 | _GLOBAL(kvmppc_h_set_dabr) |
@@ -1177,6 +1232,178 @@ _GLOBAL(kvmppc_h_set_dabr) | |||
1177 | li r3,0 | 1232 | li r3,0 |
1178 | blr | 1233 | blr |
1179 | 1234 | ||
1235 | _GLOBAL(kvmppc_h_cede) | ||
1236 | ori r11,r11,MSR_EE | ||
1237 | std r11,VCPU_MSR(r3) | ||
1238 | li r0,1 | ||
1239 | stb r0,VCPU_CEDED(r3) | ||
1240 | sync /* order setting ceded vs. testing prodded */ | ||
1241 | lbz r5,VCPU_PRODDED(r3) | ||
1242 | cmpwi r5,0 | ||
1243 | bne 1f | ||
1244 | li r0,0 /* set trap to 0 to say hcall is handled */ | ||
1245 | stw r0,VCPU_TRAP(r3) | ||
1246 | li r0,H_SUCCESS | ||
1247 | std r0,VCPU_GPR(r3)(r3) | ||
1248 | BEGIN_FTR_SECTION | ||
1249 | b 2f /* just send it up to host on 970 */ | ||
1250 | END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) | ||
1251 | |||
1252 | /* | ||
1253 | * Set our bit in the bitmask of napping threads unless all the | ||
1254 | * other threads are already napping, in which case we send this | ||
1255 | * up to the host. | ||
1256 | */ | ||
1257 | ld r5,HSTATE_KVM_VCORE(r13) | ||
1258 | lwz r6,VCPU_PTID(r3) | ||
1259 | lwz r8,VCORE_ENTRY_EXIT(r5) | ||
1260 | clrldi r8,r8,56 | ||
1261 | li r0,1 | ||
1262 | sld r0,r0,r6 | ||
1263 | addi r6,r5,VCORE_NAPPING_THREADS | ||
1264 | 31: lwarx r4,0,r6 | ||
1265 | or r4,r4,r0 | ||
1266 | popcntw r7,r4 | ||
1267 | cmpw r7,r8 | ||
1268 | bge 2f | ||
1269 | stwcx. r4,0,r6 | ||
1270 | bne 31b | ||
1271 | li r0,1 | ||
1272 | stb r0,HSTATE_NAPPING(r13) | ||
1273 | /* order napping_threads update vs testing entry_exit_count */ | ||
1274 | lwsync | ||
1275 | mr r4,r3 | ||
1276 | lwz r7,VCORE_ENTRY_EXIT(r5) | ||
1277 | cmpwi r7,0x100 | ||
1278 | bge 33f /* another thread already exiting */ | ||
1279 | |||
1280 | /* | ||
1281 | * Although not specifically required by the architecture, POWER7 | ||
1282 | * preserves the following registers in nap mode, even if an SMT mode | ||
1283 | * switch occurs: SLB entries, PURR, SPURR, AMOR, UAMOR, AMR, SPRG0-3, | ||
1284 | * DAR, DSISR, DABR, DABRX, DSCR, PMCx, MMCRx, SIAR, SDAR. | ||
1285 | */ | ||
1286 | /* Save non-volatile GPRs */ | ||
1287 | std r14, VCPU_GPR(r14)(r3) | ||
1288 | std r15, VCPU_GPR(r15)(r3) | ||
1289 | std r16, VCPU_GPR(r16)(r3) | ||
1290 | std r17, VCPU_GPR(r17)(r3) | ||
1291 | std r18, VCPU_GPR(r18)(r3) | ||
1292 | std r19, VCPU_GPR(r19)(r3) | ||
1293 | std r20, VCPU_GPR(r20)(r3) | ||
1294 | std r21, VCPU_GPR(r21)(r3) | ||
1295 | std r22, VCPU_GPR(r22)(r3) | ||
1296 | std r23, VCPU_GPR(r23)(r3) | ||
1297 | std r24, VCPU_GPR(r24)(r3) | ||
1298 | std r25, VCPU_GPR(r25)(r3) | ||
1299 | std r26, VCPU_GPR(r26)(r3) | ||
1300 | std r27, VCPU_GPR(r27)(r3) | ||
1301 | std r28, VCPU_GPR(r28)(r3) | ||
1302 | std r29, VCPU_GPR(r29)(r3) | ||
1303 | std r30, VCPU_GPR(r30)(r3) | ||
1304 | std r31, VCPU_GPR(r31)(r3) | ||
1305 | |||
1306 | /* save FP state */ | ||
1307 | bl .kvmppc_save_fp | ||
1308 | |||
1309 | /* | ||
1310 | * Take a nap until a decrementer or external interrupt occurs, | ||
1311 | * with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR | ||
1312 | */ | ||
1313 | li r0,0x80 | ||
1314 | stb r0,PACAPROCSTART(r13) | ||
1315 | mfspr r5,SPRN_LPCR | ||
1316 | ori r5,r5,LPCR_PECE0 | LPCR_PECE1 | ||
1317 | mtspr SPRN_LPCR,r5 | ||
1318 | isync | ||
1319 | li r0, 0 | ||
1320 | std r0, HSTATE_SCRATCH0(r13) | ||
1321 | ptesync | ||
1322 | ld r0, HSTATE_SCRATCH0(r13) | ||
1323 | 1: cmpd r0, r0 | ||
1324 | bne 1b | ||
1325 | nap | ||
1326 | b . | ||
1327 | |||
1328 | kvm_end_cede: | ||
1329 | /* Woken by external or decrementer interrupt */ | ||
1330 | ld r1, HSTATE_HOST_R1(r13) | ||
1331 | ld r2, PACATOC(r13) | ||
1332 | |||
1333 | /* If we're a secondary thread and we got here by an IPI, ack it */ | ||
1334 | ld r4,HSTATE_KVM_VCPU(r13) | ||
1335 | lwz r3,VCPU_PTID(r4) | ||
1336 | cmpwi r3,0 | ||
1337 | beq 27f | ||
1338 | mfspr r3,SPRN_SRR1 | ||
1339 | rlwinm r3,r3,44-31,0x7 /* extract wake reason field */ | ||
1340 | cmpwi r3,4 /* was it an external interrupt? */ | ||
1341 | bne 27f | ||
1342 | ld r5, HSTATE_XICS_PHYS(r13) | ||
1343 | li r0,0xff | ||
1344 | li r6,XICS_QIRR | ||
1345 | li r7,XICS_XIRR | ||
1346 | lwzcix r8,r5,r7 /* ack the interrupt */ | ||
1347 | sync | ||
1348 | stbcix r0,r5,r6 /* clear it */ | ||
1349 | stwcix r8,r5,r7 /* EOI it */ | ||
1350 | 27: | ||
1351 | /* load up FP state */ | ||
1352 | bl kvmppc_load_fp | ||
1353 | |||
1354 | /* Load NV GPRS */ | ||
1355 | ld r14, VCPU_GPR(r14)(r4) | ||
1356 | ld r15, VCPU_GPR(r15)(r4) | ||
1357 | ld r16, VCPU_GPR(r16)(r4) | ||
1358 | ld r17, VCPU_GPR(r17)(r4) | ||
1359 | ld r18, VCPU_GPR(r18)(r4) | ||
1360 | ld r19, VCPU_GPR(r19)(r4) | ||
1361 | ld r20, VCPU_GPR(r20)(r4) | ||
1362 | ld r21, VCPU_GPR(r21)(r4) | ||
1363 | ld r22, VCPU_GPR(r22)(r4) | ||
1364 | ld r23, VCPU_GPR(r23)(r4) | ||
1365 | ld r24, VCPU_GPR(r24)(r4) | ||
1366 | ld r25, VCPU_GPR(r25)(r4) | ||
1367 | ld r26, VCPU_GPR(r26)(r4) | ||
1368 | ld r27, VCPU_GPR(r27)(r4) | ||
1369 | ld r28, VCPU_GPR(r28)(r4) | ||
1370 | ld r29, VCPU_GPR(r29)(r4) | ||
1371 | ld r30, VCPU_GPR(r30)(r4) | ||
1372 | ld r31, VCPU_GPR(r31)(r4) | ||
1373 | |||
1374 | /* clear our bit in vcore->napping_threads */ | ||
1375 | 33: ld r5,HSTATE_KVM_VCORE(r13) | ||
1376 | lwz r3,VCPU_PTID(r4) | ||
1377 | li r0,1 | ||
1378 | sld r0,r0,r3 | ||
1379 | addi r6,r5,VCORE_NAPPING_THREADS | ||
1380 | 32: lwarx r7,0,r6 | ||
1381 | andc r7,r7,r0 | ||
1382 | stwcx. r7,0,r6 | ||
1383 | bne 32b | ||
1384 | li r0,0 | ||
1385 | stb r0,HSTATE_NAPPING(r13) | ||
1386 | |||
1387 | /* see if any other thread is already exiting */ | ||
1388 | lwz r0,VCORE_ENTRY_EXIT(r5) | ||
1389 | cmpwi r0,0x100 | ||
1390 | blt kvmppc_cede_reentry /* if not go back to guest */ | ||
1391 | |||
1392 | /* some threads are exiting, so go to the guest exit path */ | ||
1393 | b hcall_real_fallback | ||
1394 | |||
1395 | /* cede when already previously prodded case */ | ||
1396 | 1: li r0,0 | ||
1397 | stb r0,VCPU_PRODDED(r3) | ||
1398 | sync /* order testing prodded vs. clearing ceded */ | ||
1399 | stb r0,VCPU_CEDED(r3) | ||
1400 | li r3,H_SUCCESS | ||
1401 | blr | ||
1402 | |||
1403 | /* we've ceded but we want to give control to the host */ | ||
1404 | 2: li r3,H_TOO_HARD | ||
1405 | blr | ||
1406 | |||
1180 | secondary_too_late: | 1407 | secondary_too_late: |
1181 | ld r5,HSTATE_KVM_VCORE(r13) | 1408 | ld r5,HSTATE_KVM_VCORE(r13) |
1182 | HMT_LOW | 1409 | HMT_LOW |
@@ -1194,14 +1421,20 @@ secondary_too_late: | |||
1194 | slbmte r6,r5 | 1421 | slbmte r6,r5 |
1195 | 1: addi r11,r11,16 | 1422 | 1: addi r11,r11,16 |
1196 | .endr | 1423 | .endr |
1197 | b 50f | ||
1198 | 1424 | ||
1199 | secondary_nap: | 1425 | secondary_nap: |
1200 | /* Clear any pending IPI */ | 1426 | /* Clear any pending IPI - assume we're a secondary thread */ |
1201 | 50: ld r5, HSTATE_XICS_PHYS(r13) | 1427 | ld r5, HSTATE_XICS_PHYS(r13) |
1428 | li r7, XICS_XIRR | ||
1429 | lwzcix r3, r5, r7 /* ack any pending interrupt */ | ||
1430 | rlwinm. r0, r3, 0, 0xffffff /* any pending? */ | ||
1431 | beq 37f | ||
1432 | sync | ||
1202 | li r0, 0xff | 1433 | li r0, 0xff |
1203 | li r6, XICS_QIRR | 1434 | li r6, XICS_QIRR |
1204 | stbcix r0, r5, r6 | 1435 | stbcix r0, r5, r6 /* clear the IPI */ |
1436 | stwcix r3, r5, r7 /* EOI it */ | ||
1437 | 37: sync | ||
1205 | 1438 | ||
1206 | /* increment the nap count and then go to nap mode */ | 1439 | /* increment the nap count and then go to nap mode */ |
1207 | ld r4, HSTATE_KVM_VCORE(r13) | 1440 | ld r4, HSTATE_KVM_VCORE(r13) |
@@ -1211,13 +1444,12 @@ secondary_nap: | |||
1211 | addi r3, r3, 1 | 1444 | addi r3, r3, 1 |
1212 | stwcx. r3, 0, r4 | 1445 | stwcx. r3, 0, r4 |
1213 | bne 51b | 1446 | bne 51b |
1214 | isync | ||
1215 | 1447 | ||
1448 | li r3, LPCR_PECE0 | ||
1216 | mfspr r4, SPRN_LPCR | 1449 | mfspr r4, SPRN_LPCR |
1217 | li r0, LPCR_PECE | 1450 | rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 |
1218 | andc r4, r4, r0 | ||
1219 | ori r4, r4, LPCR_PECE0 /* exit nap on interrupt */ | ||
1220 | mtspr SPRN_LPCR, r4 | 1451 | mtspr SPRN_LPCR, r4 |
1452 | isync | ||
1221 | li r0, 0 | 1453 | li r0, 0 |
1222 | std r0, HSTATE_SCRATCH0(r13) | 1454 | std r0, HSTATE_SCRATCH0(r13) |
1223 | ptesync | 1455 | ptesync |
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index c54b0e30cf3f..0a8515a5c042 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S | |||
@@ -29,27 +29,11 @@ | |||
29 | #define ULONG_SIZE 8 | 29 | #define ULONG_SIZE 8 |
30 | #define FUNC(name) GLUE(.,name) | 30 | #define FUNC(name) GLUE(.,name) |
31 | 31 | ||
32 | #define GET_SHADOW_VCPU_R13 | ||
33 | |||
34 | #define DISABLE_INTERRUPTS \ | ||
35 | mfmsr r0; \ | ||
36 | rldicl r0,r0,48,1; \ | ||
37 | rotldi r0,r0,16; \ | ||
38 | mtmsrd r0,1; \ | ||
39 | |||
40 | #elif defined(CONFIG_PPC_BOOK3S_32) | 32 | #elif defined(CONFIG_PPC_BOOK3S_32) |
41 | 33 | ||
42 | #define ULONG_SIZE 4 | 34 | #define ULONG_SIZE 4 |
43 | #define FUNC(name) name | 35 | #define FUNC(name) name |
44 | 36 | ||
45 | #define GET_SHADOW_VCPU_R13 \ | ||
46 | lwz r13, (THREAD + THREAD_KVM_SVCPU)(r2) | ||
47 | |||
48 | #define DISABLE_INTERRUPTS \ | ||
49 | mfmsr r0; \ | ||
50 | rlwinm r0,r0,0,17,15; \ | ||
51 | mtmsr r0; \ | ||
52 | |||
53 | #endif /* CONFIG_PPC_BOOK3S_XX */ | 37 | #endif /* CONFIG_PPC_BOOK3S_XX */ |
54 | 38 | ||
55 | 39 | ||
@@ -108,44 +92,17 @@ kvm_start_entry: | |||
108 | 92 | ||
109 | kvm_start_lightweight: | 93 | kvm_start_lightweight: |
110 | 94 | ||
111 | GET_SHADOW_VCPU_R13 | ||
112 | PPC_LL r3, VCPU_HIGHMEM_HANDLER(r4) | ||
113 | PPC_STL r3, HSTATE_VMHANDLER(r13) | ||
114 | |||
115 | PPC_LL r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */ | ||
116 | |||
117 | DISABLE_INTERRUPTS | ||
118 | |||
119 | #ifdef CONFIG_PPC_BOOK3S_64 | 95 | #ifdef CONFIG_PPC_BOOK3S_64 |
120 | /* Some guests may need to have dcbz set to 32 byte length. | ||
121 | * | ||
122 | * Usually we ensure that by patching the guest's instructions | ||
123 | * to trap on dcbz and emulate it in the hypervisor. | ||
124 | * | ||
125 | * If we can, we should tell the CPU to use 32 byte dcbz though, | ||
126 | * because that's a lot faster. | ||
127 | */ | ||
128 | |||
129 | PPC_LL r3, VCPU_HFLAGS(r4) | 96 | PPC_LL r3, VCPU_HFLAGS(r4) |
130 | rldicl. r3, r3, 0, 63 /* CR = ((r3 & 1) == 0) */ | 97 | rldicl r3, r3, 0, 63 /* r3 &= 1 */ |
131 | beq no_dcbz32_on | 98 | stb r3, HSTATE_RESTORE_HID5(r13) |
132 | |||
133 | mfspr r3,SPRN_HID5 | ||
134 | ori r3, r3, 0x80 /* XXX HID5_dcbz32 = 0x80 */ | ||
135 | mtspr SPRN_HID5,r3 | ||
136 | |||
137 | no_dcbz32_on: | ||
138 | |||
139 | #endif /* CONFIG_PPC_BOOK3S_64 */ | 99 | #endif /* CONFIG_PPC_BOOK3S_64 */ |
140 | 100 | ||
141 | PPC_LL r6, VCPU_RMCALL(r4) | 101 | PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */ |
142 | mtctr r6 | ||
143 | |||
144 | PPC_LL r3, VCPU_TRAMPOLINE_ENTER(r4) | ||
145 | LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR)) | ||
146 | 102 | ||
147 | /* Jump to segment patching handler and into our guest */ | 103 | /* Jump to segment patching handler and into our guest */ |
148 | bctr | 104 | bl FUNC(kvmppc_entry_trampoline) |
105 | nop | ||
149 | 106 | ||
150 | /* | 107 | /* |
151 | * This is the handler in module memory. It gets jumped at from the | 108 | * This is the handler in module memory. It gets jumped at from the |
@@ -170,21 +127,6 @@ kvmppc_handler_highmem: | |||
170 | /* R7 = vcpu */ | 127 | /* R7 = vcpu */ |
171 | PPC_LL r7, GPR4(r1) | 128 | PPC_LL r7, GPR4(r1) |
172 | 129 | ||
173 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
174 | |||
175 | PPC_LL r5, VCPU_HFLAGS(r7) | ||
176 | rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */ | ||
177 | beq no_dcbz32_off | ||
178 | |||
179 | li r4, 0 | ||
180 | mfspr r5,SPRN_HID5 | ||
181 | rldimi r5,r4,6,56 | ||
182 | mtspr SPRN_HID5,r5 | ||
183 | |||
184 | no_dcbz32_off: | ||
185 | |||
186 | #endif /* CONFIG_PPC_BOOK3S_64 */ | ||
187 | |||
188 | PPC_STL r14, VCPU_GPR(r14)(r7) | 130 | PPC_STL r14, VCPU_GPR(r14)(r7) |
189 | PPC_STL r15, VCPU_GPR(r15)(r7) | 131 | PPC_STL r15, VCPU_GPR(r15)(r7) |
190 | PPC_STL r16, VCPU_GPR(r16)(r7) | 132 | PPC_STL r16, VCPU_GPR(r16)(r7) |
@@ -204,67 +146,6 @@ no_dcbz32_off: | |||
204 | PPC_STL r30, VCPU_GPR(r30)(r7) | 146 | PPC_STL r30, VCPU_GPR(r30)(r7) |
205 | PPC_STL r31, VCPU_GPR(r31)(r7) | 147 | PPC_STL r31, VCPU_GPR(r31)(r7) |
206 | 148 | ||
207 | /* Restore host msr -> SRR1 */ | ||
208 | PPC_LL r6, VCPU_HOST_MSR(r7) | ||
209 | |||
210 | /* | ||
211 | * For some interrupts, we need to call the real Linux | ||
212 | * handler, so it can do work for us. This has to happen | ||
213 | * as if the interrupt arrived from the kernel though, | ||
214 | * so let's fake it here where most state is restored. | ||
215 | * | ||
216 | * Call Linux for hardware interrupts/decrementer | ||
217 | * r3 = address of interrupt handler (exit reason) | ||
218 | */ | ||
219 | |||
220 | cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL | ||
221 | beq call_linux_handler | ||
222 | cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER | ||
223 | beq call_linux_handler | ||
224 | cmpwi r12, BOOK3S_INTERRUPT_PERFMON | ||
225 | beq call_linux_handler | ||
226 | |||
227 | /* Back to EE=1 */ | ||
228 | mtmsr r6 | ||
229 | sync | ||
230 | b kvm_return_point | ||
231 | |||
232 | call_linux_handler: | ||
233 | |||
234 | /* | ||
235 | * If we land here we need to jump back to the handler we | ||
236 | * came from. | ||
237 | * | ||
238 | * We have a page that we can access from real mode, so let's | ||
239 | * jump back to that and use it as a trampoline to get back into the | ||
240 | * interrupt handler! | ||
241 | * | ||
242 | * R3 still contains the exit code, | ||
243 | * R5 VCPU_HOST_RETIP and | ||
244 | * R6 VCPU_HOST_MSR | ||
245 | */ | ||
246 | |||
247 | /* Restore host IP -> SRR0 */ | ||
248 | PPC_LL r5, VCPU_HOST_RETIP(r7) | ||
249 | |||
250 | /* XXX Better move to a safe function? | ||
251 | * What if we get an HTAB flush in between mtsrr0 and mtsrr1? */ | ||
252 | |||
253 | mtlr r12 | ||
254 | |||
255 | PPC_LL r4, VCPU_TRAMPOLINE_LOWMEM(r7) | ||
256 | mtsrr0 r4 | ||
257 | LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR)) | ||
258 | mtsrr1 r3 | ||
259 | |||
260 | RFI | ||
261 | |||
262 | .global kvm_return_point | ||
263 | kvm_return_point: | ||
264 | |||
265 | /* Jump back to lightweight entry if we're supposed to */ | ||
266 | /* go back into the guest */ | ||
267 | |||
268 | /* Pass the exit number as 3rd argument to kvmppc_handle_exit */ | 149 | /* Pass the exit number as 3rd argument to kvmppc_handle_exit */ |
269 | mr r5, r12 | 150 | mr r5, r12 |
270 | 151 | ||
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 0c0d3f274437..d417511abfb1 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c | |||
@@ -150,16 +150,22 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) | |||
150 | #ifdef CONFIG_PPC_BOOK3S_64 | 150 | #ifdef CONFIG_PPC_BOOK3S_64 |
151 | if ((pvr >= 0x330000) && (pvr < 0x70330000)) { | 151 | if ((pvr >= 0x330000) && (pvr < 0x70330000)) { |
152 | kvmppc_mmu_book3s_64_init(vcpu); | 152 | kvmppc_mmu_book3s_64_init(vcpu); |
153 | to_book3s(vcpu)->hior = 0xfff00000; | 153 | if (!to_book3s(vcpu)->hior_sregs) |
154 | to_book3s(vcpu)->hior = 0xfff00000; | ||
154 | to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; | 155 | to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; |
156 | vcpu->arch.cpu_type = KVM_CPU_3S_64; | ||
155 | } else | 157 | } else |
156 | #endif | 158 | #endif |
157 | { | 159 | { |
158 | kvmppc_mmu_book3s_32_init(vcpu); | 160 | kvmppc_mmu_book3s_32_init(vcpu); |
159 | to_book3s(vcpu)->hior = 0; | 161 | if (!to_book3s(vcpu)->hior_sregs) |
162 | to_book3s(vcpu)->hior = 0; | ||
160 | to_book3s(vcpu)->msr_mask = 0xffffffffULL; | 163 | to_book3s(vcpu)->msr_mask = 0xffffffffULL; |
164 | vcpu->arch.cpu_type = KVM_CPU_3S_32; | ||
161 | } | 165 | } |
162 | 166 | ||
167 | kvmppc_sanity_check(vcpu); | ||
168 | |||
163 | /* If we are in hypervisor level on 970, we can tell the CPU to | 169 | /* If we are in hypervisor level on 970, we can tell the CPU to |
164 | * treat DCBZ as 32 bytes store */ | 170 | * treat DCBZ as 32 bytes store */ |
165 | vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32; | 171 | vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32; |
@@ -646,7 +652,27 @@ program_interrupt: | |||
646 | break; | 652 | break; |
647 | } | 653 | } |
648 | case BOOK3S_INTERRUPT_SYSCALL: | 654 | case BOOK3S_INTERRUPT_SYSCALL: |
649 | if (vcpu->arch.osi_enabled && | 655 | if (vcpu->arch.papr_enabled && |
656 | (kvmppc_get_last_inst(vcpu) == 0x44000022) && | ||
657 | !(vcpu->arch.shared->msr & MSR_PR)) { | ||
658 | /* SC 1 papr hypercalls */ | ||
659 | ulong cmd = kvmppc_get_gpr(vcpu, 3); | ||
660 | int i; | ||
661 | |||
662 | if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) { | ||
663 | r = RESUME_GUEST; | ||
664 | break; | ||
665 | } | ||
666 | |||
667 | run->papr_hcall.nr = cmd; | ||
668 | for (i = 0; i < 9; ++i) { | ||
669 | ulong gpr = kvmppc_get_gpr(vcpu, 4 + i); | ||
670 | run->papr_hcall.args[i] = gpr; | ||
671 | } | ||
672 | run->exit_reason = KVM_EXIT_PAPR_HCALL; | ||
673 | vcpu->arch.hcall_needed = 1; | ||
674 | r = RESUME_HOST; | ||
675 | } else if (vcpu->arch.osi_enabled && | ||
650 | (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) && | 676 | (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) && |
651 | (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) { | 677 | (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) { |
652 | /* MOL hypercalls */ | 678 | /* MOL hypercalls */ |
@@ -770,6 +796,9 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
770 | } | 796 | } |
771 | } | 797 | } |
772 | 798 | ||
799 | if (sregs->u.s.flags & KVM_SREGS_S_HIOR) | ||
800 | sregs->u.s.hior = to_book3s(vcpu)->hior; | ||
801 | |||
773 | return 0; | 802 | return 0; |
774 | } | 803 | } |
775 | 804 | ||
@@ -806,6 +835,11 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
806 | /* Flush the MMU after messing with the segments */ | 835 | /* Flush the MMU after messing with the segments */ |
807 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | 836 | kvmppc_mmu_pte_flush(vcpu, 0, 0); |
808 | 837 | ||
838 | if (sregs->u.s.flags & KVM_SREGS_S_HIOR) { | ||
839 | to_book3s(vcpu)->hior_sregs = true; | ||
840 | to_book3s(vcpu)->hior = sregs->u.s.hior; | ||
841 | } | ||
842 | |||
809 | return 0; | 843 | return 0; |
810 | } | 844 | } |
811 | 845 | ||
@@ -841,8 +875,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | |||
841 | if (!p) | 875 | if (!p) |
842 | goto uninit_vcpu; | 876 | goto uninit_vcpu; |
843 | 877 | ||
844 | vcpu->arch.host_retip = kvm_return_point; | ||
845 | vcpu->arch.host_msr = mfmsr(); | ||
846 | #ifdef CONFIG_PPC_BOOK3S_64 | 878 | #ifdef CONFIG_PPC_BOOK3S_64 |
847 | /* default to book3s_64 (970fx) */ | 879 | /* default to book3s_64 (970fx) */ |
848 | vcpu->arch.pvr = 0x3C0301; | 880 | vcpu->arch.pvr = 0x3C0301; |
@@ -853,16 +885,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | |||
853 | kvmppc_set_pvr(vcpu, vcpu->arch.pvr); | 885 | kvmppc_set_pvr(vcpu, vcpu->arch.pvr); |
854 | vcpu->arch.slb_nr = 64; | 886 | vcpu->arch.slb_nr = 64; |
855 | 887 | ||
856 | /* remember where some real-mode handlers are */ | ||
857 | vcpu->arch.trampoline_lowmem = __pa(kvmppc_handler_lowmem_trampoline); | ||
858 | vcpu->arch.trampoline_enter = __pa(kvmppc_handler_trampoline_enter); | ||
859 | vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem; | ||
860 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
861 | vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall; | ||
862 | #else | ||
863 | vcpu->arch.rmcall = (ulong)kvmppc_rmcall; | ||
864 | #endif | ||
865 | |||
866 | vcpu->arch.shadow_msr = MSR_USER64; | 888 | vcpu->arch.shadow_msr = MSR_USER64; |
867 | 889 | ||
868 | err = kvmppc_mmu_init(vcpu); | 890 | err = kvmppc_mmu_init(vcpu); |
@@ -908,6 +930,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
908 | #endif | 930 | #endif |
909 | ulong ext_msr; | 931 | ulong ext_msr; |
910 | 932 | ||
933 | /* Check if we can run the vcpu at all */ | ||
934 | if (!vcpu->arch.sane) { | ||
935 | kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
936 | return -EINVAL; | ||
937 | } | ||
938 | |||
911 | /* No need to go into the guest when all we do is going out */ | 939 | /* No need to go into the guest when all we do is going out */ |
912 | if (signal_pending(current)) { | 940 | if (signal_pending(current)) { |
913 | kvm_run->exit_reason = KVM_EXIT_INTR; | 941 | kvm_run->exit_reason = KVM_EXIT_INTR; |
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c new file mode 100644 index 000000000000..b9589324797b --- /dev/null +++ b/arch/powerpc/kvm/book3s_pr_papr.c | |||
@@ -0,0 +1,158 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2011. Freescale Inc. All rights reserved. | ||
3 | * | ||
4 | * Authors: | ||
5 | * Alexander Graf <agraf@suse.de> | ||
6 | * Paul Mackerras <paulus@samba.org> | ||
7 | * | ||
8 | * Description: | ||
9 | * | ||
10 | * Hypercall handling for running PAPR guests in PR KVM on Book 3S | ||
11 | * processors. | ||
12 | * | ||
13 | * This program is free software; you can redistribute it and/or modify | ||
14 | * it under the terms of the GNU General Public License, version 2, as | ||
15 | * published by the Free Software Foundation. | ||
16 | */ | ||
17 | |||
18 | #include <asm/uaccess.h> | ||
19 | #include <asm/kvm_ppc.h> | ||
20 | #include <asm/kvm_book3s.h> | ||
21 | |||
22 | static unsigned long get_pteg_addr(struct kvm_vcpu *vcpu, long pte_index) | ||
23 | { | ||
24 | struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); | ||
25 | unsigned long pteg_addr; | ||
26 | |||
27 | pte_index <<= 4; | ||
28 | pte_index &= ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1) << 7 | 0x70; | ||
29 | pteg_addr = vcpu_book3s->sdr1 & 0xfffffffffffc0000ULL; | ||
30 | pteg_addr |= pte_index; | ||
31 | |||
32 | return pteg_addr; | ||
33 | } | ||
34 | |||
35 | static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu) | ||
36 | { | ||
37 | long flags = kvmppc_get_gpr(vcpu, 4); | ||
38 | long pte_index = kvmppc_get_gpr(vcpu, 5); | ||
39 | unsigned long pteg[2 * 8]; | ||
40 | unsigned long pteg_addr, i, *hpte; | ||
41 | |||
42 | pte_index &= ~7UL; | ||
43 | pteg_addr = get_pteg_addr(vcpu, pte_index); | ||
44 | |||
45 | copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg)); | ||
46 | hpte = pteg; | ||
47 | |||
48 | if (likely((flags & H_EXACT) == 0)) { | ||
49 | pte_index &= ~7UL; | ||
50 | for (i = 0; ; ++i) { | ||
51 | if (i == 8) | ||
52 | return H_PTEG_FULL; | ||
53 | if ((*hpte & HPTE_V_VALID) == 0) | ||
54 | break; | ||
55 | hpte += 2; | ||
56 | } | ||
57 | } else { | ||
58 | i = kvmppc_get_gpr(vcpu, 5) & 7UL; | ||
59 | hpte += i * 2; | ||
60 | } | ||
61 | |||
62 | hpte[0] = kvmppc_get_gpr(vcpu, 6); | ||
63 | hpte[1] = kvmppc_get_gpr(vcpu, 7); | ||
64 | copy_to_user((void __user *)pteg_addr, pteg, sizeof(pteg)); | ||
65 | kvmppc_set_gpr(vcpu, 3, H_SUCCESS); | ||
66 | kvmppc_set_gpr(vcpu, 4, pte_index | i); | ||
67 | |||
68 | return EMULATE_DONE; | ||
69 | } | ||
70 | |||
71 | static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu) | ||
72 | { | ||
73 | unsigned long flags= kvmppc_get_gpr(vcpu, 4); | ||
74 | unsigned long pte_index = kvmppc_get_gpr(vcpu, 5); | ||
75 | unsigned long avpn = kvmppc_get_gpr(vcpu, 6); | ||
76 | unsigned long v = 0, pteg, rb; | ||
77 | unsigned long pte[2]; | ||
78 | |||
79 | pteg = get_pteg_addr(vcpu, pte_index); | ||
80 | copy_from_user(pte, (void __user *)pteg, sizeof(pte)); | ||
81 | |||
82 | if ((pte[0] & HPTE_V_VALID) == 0 || | ||
83 | ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn) || | ||
84 | ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) { | ||
85 | kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND); | ||
86 | return EMULATE_DONE; | ||
87 | } | ||
88 | |||
89 | copy_to_user((void __user *)pteg, &v, sizeof(v)); | ||
90 | |||
91 | rb = compute_tlbie_rb(pte[0], pte[1], pte_index); | ||
92 | vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); | ||
93 | |||
94 | kvmppc_set_gpr(vcpu, 3, H_SUCCESS); | ||
95 | kvmppc_set_gpr(vcpu, 4, pte[0]); | ||
96 | kvmppc_set_gpr(vcpu, 5, pte[1]); | ||
97 | |||
98 | return EMULATE_DONE; | ||
99 | } | ||
100 | |||
101 | static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) | ||
102 | { | ||
103 | unsigned long flags = kvmppc_get_gpr(vcpu, 4); | ||
104 | unsigned long pte_index = kvmppc_get_gpr(vcpu, 5); | ||
105 | unsigned long avpn = kvmppc_get_gpr(vcpu, 6); | ||
106 | unsigned long rb, pteg, r, v; | ||
107 | unsigned long pte[2]; | ||
108 | |||
109 | pteg = get_pteg_addr(vcpu, pte_index); | ||
110 | copy_from_user(pte, (void __user *)pteg, sizeof(pte)); | ||
111 | |||
112 | if ((pte[0] & HPTE_V_VALID) == 0 || | ||
113 | ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn)) { | ||
114 | kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND); | ||
115 | return EMULATE_DONE; | ||
116 | } | ||
117 | |||
118 | v = pte[0]; | ||
119 | r = pte[1]; | ||
120 | r &= ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_HI | | ||
121 | HPTE_R_KEY_LO); | ||
122 | r |= (flags << 55) & HPTE_R_PP0; | ||
123 | r |= (flags << 48) & HPTE_R_KEY_HI; | ||
124 | r |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); | ||
125 | |||
126 | pte[1] = r; | ||
127 | |||
128 | rb = compute_tlbie_rb(v, r, pte_index); | ||
129 | vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); | ||
130 | copy_to_user((void __user *)pteg, pte, sizeof(pte)); | ||
131 | |||
132 | kvmppc_set_gpr(vcpu, 3, H_SUCCESS); | ||
133 | |||
134 | return EMULATE_DONE; | ||
135 | } | ||
136 | |||
137 | int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) | ||
138 | { | ||
139 | switch (cmd) { | ||
140 | case H_ENTER: | ||
141 | return kvmppc_h_pr_enter(vcpu); | ||
142 | case H_REMOVE: | ||
143 | return kvmppc_h_pr_remove(vcpu); | ||
144 | case H_PROTECT: | ||
145 | return kvmppc_h_pr_protect(vcpu); | ||
146 | case H_BULK_REMOVE: | ||
147 | /* We just flush all PTEs, so user space can | ||
148 | handle the HPT modifications */ | ||
149 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | ||
150 | break; | ||
151 | case H_CEDE: | ||
152 | kvm_vcpu_block(vcpu); | ||
153 | vcpu->stat.halt_wakeup++; | ||
154 | return EMULATE_DONE; | ||
155 | } | ||
156 | |||
157 | return EMULATE_FAIL; | ||
158 | } | ||
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index c1f877c4a884..34187585c507 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <asm/ppc_asm.h> | 20 | #include <asm/ppc_asm.h> |
21 | #include <asm/kvm_asm.h> | 21 | #include <asm/kvm_asm.h> |
22 | #include <asm/reg.h> | 22 | #include <asm/reg.h> |
23 | #include <asm/mmu.h> | ||
23 | #include <asm/page.h> | 24 | #include <asm/page.h> |
24 | #include <asm/asm-offsets.h> | 25 | #include <asm/asm-offsets.h> |
25 | 26 | ||
@@ -35,10 +36,10 @@ | |||
35 | 36 | ||
36 | #if defined(CONFIG_PPC_BOOK3S_64) | 37 | #if defined(CONFIG_PPC_BOOK3S_64) |
37 | 38 | ||
38 | #define LOAD_SHADOW_VCPU(reg) GET_PACA(reg) | ||
39 | #define MSR_NOIRQ MSR_KERNEL & ~(MSR_IR | MSR_DR) | ||
40 | #define FUNC(name) GLUE(.,name) | 39 | #define FUNC(name) GLUE(.,name) |
40 | #define MTMSR_EERI(reg) mtmsrd (reg),1 | ||
41 | 41 | ||
42 | .globl kvmppc_skip_interrupt | ||
42 | kvmppc_skip_interrupt: | 43 | kvmppc_skip_interrupt: |
43 | /* | 44 | /* |
44 | * Here all GPRs are unchanged from when the interrupt happened | 45 | * Here all GPRs are unchanged from when the interrupt happened |
@@ -51,6 +52,7 @@ kvmppc_skip_interrupt: | |||
51 | rfid | 52 | rfid |
52 | b . | 53 | b . |
53 | 54 | ||
55 | .globl kvmppc_skip_Hinterrupt | ||
54 | kvmppc_skip_Hinterrupt: | 56 | kvmppc_skip_Hinterrupt: |
55 | /* | 57 | /* |
56 | * Here all GPRs are unchanged from when the interrupt happened | 58 | * Here all GPRs are unchanged from when the interrupt happened |
@@ -65,8 +67,8 @@ kvmppc_skip_Hinterrupt: | |||
65 | 67 | ||
66 | #elif defined(CONFIG_PPC_BOOK3S_32) | 68 | #elif defined(CONFIG_PPC_BOOK3S_32) |
67 | 69 | ||
68 | #define MSR_NOIRQ MSR_KERNEL | ||
69 | #define FUNC(name) name | 70 | #define FUNC(name) name |
71 | #define MTMSR_EERI(reg) mtmsr (reg) | ||
70 | 72 | ||
71 | .macro INTERRUPT_TRAMPOLINE intno | 73 | .macro INTERRUPT_TRAMPOLINE intno |
72 | 74 | ||
@@ -167,40 +169,24 @@ kvmppc_handler_skip_ins: | |||
167 | #endif | 169 | #endif |
168 | 170 | ||
169 | /* | 171 | /* |
170 | * This trampoline brings us back to a real mode handler | 172 | * Call kvmppc_handler_trampoline_enter in real mode |
171 | * | ||
172 | * Input Registers: | ||
173 | * | ||
174 | * R5 = SRR0 | ||
175 | * R6 = SRR1 | ||
176 | * LR = real-mode IP | ||
177 | * | 173 | * |
174 | * On entry, r4 contains the guest shadow MSR | ||
178 | */ | 175 | */ |
179 | .global kvmppc_handler_lowmem_trampoline | 176 | _GLOBAL(kvmppc_entry_trampoline) |
180 | kvmppc_handler_lowmem_trampoline: | 177 | mfmsr r5 |
181 | 178 | LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter) | |
182 | mtsrr0 r5 | 179 | toreal(r7) |
180 | |||
181 | li r9, MSR_RI | ||
182 | ori r9, r9, MSR_EE | ||
183 | andc r9, r5, r9 /* Clear EE and RI in MSR value */ | ||
184 | li r6, MSR_IR | MSR_DR | ||
185 | ori r6, r6, MSR_EE | ||
186 | andc r6, r5, r6 /* Clear EE, DR and IR in MSR value */ | ||
187 | MTMSR_EERI(r9) /* Clear EE and RI in MSR */ | ||
188 | mtsrr0 r7 /* before we set srr0/1 */ | ||
183 | mtsrr1 r6 | 189 | mtsrr1 r6 |
184 | blr | ||
185 | kvmppc_handler_lowmem_trampoline_end: | ||
186 | |||
187 | /* | ||
188 | * Call a function in real mode | ||
189 | * | ||
190 | * Input Registers: | ||
191 | * | ||
192 | * R3 = function | ||
193 | * R4 = MSR | ||
194 | * R5 = scratch register | ||
195 | * | ||
196 | */ | ||
197 | _GLOBAL(kvmppc_rmcall) | ||
198 | LOAD_REG_IMMEDIATE(r5, MSR_NOIRQ) | ||
199 | mtmsr r5 /* Disable relocation and interrupts, so mtsrr | ||
200 | doesn't get interrupted */ | ||
201 | sync | ||
202 | mtsrr0 r3 | ||
203 | mtsrr1 r4 | ||
204 | RFI | 190 | RFI |
205 | 191 | ||
206 | #if defined(CONFIG_PPC_BOOK3S_32) | 192 | #if defined(CONFIG_PPC_BOOK3S_32) |
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index aed32e517212..0676ae249b9f 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S | |||
@@ -23,6 +23,7 @@ | |||
23 | 23 | ||
24 | #define GET_SHADOW_VCPU(reg) \ | 24 | #define GET_SHADOW_VCPU(reg) \ |
25 | mr reg, r13 | 25 | mr reg, r13 |
26 | #define MTMSR_EERI(reg) mtmsrd (reg),1 | ||
26 | 27 | ||
27 | #elif defined(CONFIG_PPC_BOOK3S_32) | 28 | #elif defined(CONFIG_PPC_BOOK3S_32) |
28 | 29 | ||
@@ -30,6 +31,7 @@ | |||
30 | tophys(reg, r2); \ | 31 | tophys(reg, r2); \ |
31 | lwz reg, (THREAD + THREAD_KVM_SVCPU)(reg); \ | 32 | lwz reg, (THREAD + THREAD_KVM_SVCPU)(reg); \ |
32 | tophys(reg, reg) | 33 | tophys(reg, reg) |
34 | #define MTMSR_EERI(reg) mtmsr (reg) | ||
33 | 35 | ||
34 | #endif | 36 | #endif |
35 | 37 | ||
@@ -57,10 +59,12 @@ kvmppc_handler_trampoline_enter: | |||
57 | /* Required state: | 59 | /* Required state: |
58 | * | 60 | * |
59 | * MSR = ~IR|DR | 61 | * MSR = ~IR|DR |
60 | * R13 = PACA | ||
61 | * R1 = host R1 | 62 | * R1 = host R1 |
62 | * R2 = host R2 | 63 | * R2 = host R2 |
63 | * R10 = guest MSR | 64 | * R4 = guest shadow MSR |
65 | * R5 = normal host MSR | ||
66 | * R6 = current host MSR (EE, IR, DR off) | ||
67 | * LR = highmem guest exit code | ||
64 | * all other volatile GPRS = free | 68 | * all other volatile GPRS = free |
65 | * SVCPU[CR] = guest CR | 69 | * SVCPU[CR] = guest CR |
66 | * SVCPU[XER] = guest XER | 70 | * SVCPU[XER] = guest XER |
@@ -71,15 +75,15 @@ kvmppc_handler_trampoline_enter: | |||
71 | /* r3 = shadow vcpu */ | 75 | /* r3 = shadow vcpu */ |
72 | GET_SHADOW_VCPU(r3) | 76 | GET_SHADOW_VCPU(r3) |
73 | 77 | ||
78 | /* Save guest exit handler address and MSR */ | ||
79 | mflr r0 | ||
80 | PPC_STL r0, HSTATE_VMHANDLER(r3) | ||
81 | PPC_STL r5, HSTATE_HOST_MSR(r3) | ||
82 | |||
74 | /* Save R1/R2 in the PACA (64-bit) or shadow_vcpu (32-bit) */ | 83 | /* Save R1/R2 in the PACA (64-bit) or shadow_vcpu (32-bit) */ |
75 | PPC_STL r1, HSTATE_HOST_R1(r3) | 84 | PPC_STL r1, HSTATE_HOST_R1(r3) |
76 | PPC_STL r2, HSTATE_HOST_R2(r3) | 85 | PPC_STL r2, HSTATE_HOST_R2(r3) |
77 | 86 | ||
78 | /* Move SRR0 and SRR1 into the respective regs */ | ||
79 | PPC_LL r9, SVCPU_PC(r3) | ||
80 | mtsrr0 r9 | ||
81 | mtsrr1 r10 | ||
82 | |||
83 | /* Activate guest mode, so faults get handled by KVM */ | 87 | /* Activate guest mode, so faults get handled by KVM */ |
84 | li r11, KVM_GUEST_MODE_GUEST | 88 | li r11, KVM_GUEST_MODE_GUEST |
85 | stb r11, HSTATE_IN_GUEST(r3) | 89 | stb r11, HSTATE_IN_GUEST(r3) |
@@ -87,17 +91,46 @@ kvmppc_handler_trampoline_enter: | |||
87 | /* Switch to guest segment. This is subarch specific. */ | 91 | /* Switch to guest segment. This is subarch specific. */ |
88 | LOAD_GUEST_SEGMENTS | 92 | LOAD_GUEST_SEGMENTS |
89 | 93 | ||
94 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
95 | /* Some guests may need to have dcbz set to 32 byte length. | ||
96 | * | ||
97 | * Usually we ensure that by patching the guest's instructions | ||
98 | * to trap on dcbz and emulate it in the hypervisor. | ||
99 | * | ||
100 | * If we can, we should tell the CPU to use 32 byte dcbz though, | ||
101 | * because that's a lot faster. | ||
102 | */ | ||
103 | lbz r0, HSTATE_RESTORE_HID5(r3) | ||
104 | cmpwi r0, 0 | ||
105 | beq no_dcbz32_on | ||
106 | |||
107 | mfspr r0,SPRN_HID5 | ||
108 | ori r0, r0, 0x80 /* XXX HID5_dcbz32 = 0x80 */ | ||
109 | mtspr SPRN_HID5,r0 | ||
110 | no_dcbz32_on: | ||
111 | |||
112 | #endif /* CONFIG_PPC_BOOK3S_64 */ | ||
113 | |||
90 | /* Enter guest */ | 114 | /* Enter guest */ |
91 | 115 | ||
92 | PPC_LL r4, SVCPU_CTR(r3) | 116 | PPC_LL r8, SVCPU_CTR(r3) |
93 | PPC_LL r5, SVCPU_LR(r3) | 117 | PPC_LL r9, SVCPU_LR(r3) |
94 | lwz r6, SVCPU_CR(r3) | 118 | lwz r10, SVCPU_CR(r3) |
95 | lwz r7, SVCPU_XER(r3) | 119 | lwz r11, SVCPU_XER(r3) |
120 | |||
121 | mtctr r8 | ||
122 | mtlr r9 | ||
123 | mtcr r10 | ||
124 | mtxer r11 | ||
96 | 125 | ||
97 | mtctr r4 | 126 | /* Move SRR0 and SRR1 into the respective regs */ |
98 | mtlr r5 | 127 | PPC_LL r9, SVCPU_PC(r3) |
99 | mtcr r6 | 128 | /* First clear RI in our current MSR value */ |
100 | mtxer r7 | 129 | li r0, MSR_RI |
130 | andc r6, r6, r0 | ||
131 | MTMSR_EERI(r6) | ||
132 | mtsrr0 r9 | ||
133 | mtsrr1 r4 | ||
101 | 134 | ||
102 | PPC_LL r0, SVCPU_R0(r3) | 135 | PPC_LL r0, SVCPU_R0(r3) |
103 | PPC_LL r1, SVCPU_R1(r3) | 136 | PPC_LL r1, SVCPU_R1(r3) |
@@ -213,11 +246,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) | |||
213 | beq ld_last_inst | 246 | beq ld_last_inst |
214 | cmpwi r12, BOOK3S_INTERRUPT_PROGRAM | 247 | cmpwi r12, BOOK3S_INTERRUPT_PROGRAM |
215 | beq ld_last_inst | 248 | beq ld_last_inst |
249 | cmpwi r12, BOOK3S_INTERRUPT_SYSCALL | ||
250 | beq ld_last_prev_inst | ||
216 | cmpwi r12, BOOK3S_INTERRUPT_ALIGNMENT | 251 | cmpwi r12, BOOK3S_INTERRUPT_ALIGNMENT |
217 | beq- ld_last_inst | 252 | beq- ld_last_inst |
218 | 253 | ||
219 | b no_ld_last_inst | 254 | b no_ld_last_inst |
220 | 255 | ||
256 | ld_last_prev_inst: | ||
257 | addi r3, r3, -4 | ||
258 | |||
221 | ld_last_inst: | 259 | ld_last_inst: |
222 | /* Save off the guest instruction we're at */ | 260 | /* Save off the guest instruction we're at */ |
223 | 261 | ||
@@ -254,6 +292,43 @@ no_ld_last_inst: | |||
254 | /* Switch back to host MMU */ | 292 | /* Switch back to host MMU */ |
255 | LOAD_HOST_SEGMENTS | 293 | LOAD_HOST_SEGMENTS |
256 | 294 | ||
295 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
296 | |||
297 | lbz r5, HSTATE_RESTORE_HID5(r13) | ||
298 | cmpwi r5, 0 | ||
299 | beq no_dcbz32_off | ||
300 | |||
301 | li r4, 0 | ||
302 | mfspr r5,SPRN_HID5 | ||
303 | rldimi r5,r4,6,56 | ||
304 | mtspr SPRN_HID5,r5 | ||
305 | |||
306 | no_dcbz32_off: | ||
307 | |||
308 | #endif /* CONFIG_PPC_BOOK3S_64 */ | ||
309 | |||
310 | /* | ||
311 | * For some interrupts, we need to call the real Linux | ||
312 | * handler, so it can do work for us. This has to happen | ||
313 | * as if the interrupt arrived from the kernel though, | ||
314 | * so let's fake it here where most state is restored. | ||
315 | * | ||
316 | * Having set up SRR0/1 with the address where we want | ||
317 | * to continue with relocation on (potentially in module | ||
318 | * space), we either just go straight there with rfi[d], | ||
319 | * or we jump to an interrupt handler with bctr if there | ||
320 | * is an interrupt to be handled first. In the latter | ||
321 | * case, the rfi[d] at the end of the interrupt handler | ||
322 | * will get us back to where we want to continue. | ||
323 | */ | ||
324 | |||
325 | cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL | ||
326 | beq 1f | ||
327 | cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER | ||
328 | beq 1f | ||
329 | cmpwi r12, BOOK3S_INTERRUPT_PERFMON | ||
330 | 1: mtctr r12 | ||
331 | |||
257 | /* Register usage at this point: | 332 | /* Register usage at this point: |
258 | * | 333 | * |
259 | * R1 = host R1 | 334 | * R1 = host R1 |
@@ -264,13 +339,15 @@ no_ld_last_inst: | |||
264 | * | 339 | * |
265 | */ | 340 | */ |
266 | 341 | ||
267 | /* RFI into the highmem handler */ | 342 | PPC_LL r6, HSTATE_HOST_MSR(r13) |
268 | mfmsr r7 | ||
269 | ori r7, r7, MSR_IR|MSR_DR|MSR_RI|MSR_ME /* Enable paging */ | ||
270 | mtsrr1 r7 | ||
271 | /* Load highmem handler address */ | ||
272 | PPC_LL r8, HSTATE_VMHANDLER(r13) | 343 | PPC_LL r8, HSTATE_VMHANDLER(r13) |
344 | |||
345 | /* Restore host msr -> SRR1 */ | ||
346 | mtsrr1 r6 | ||
347 | /* Load highmem handler address */ | ||
273 | mtsrr0 r8 | 348 | mtsrr0 r8 |
274 | 349 | ||
350 | /* RFI into the highmem handler, or jump to interrupt handler */ | ||
351 | beqctr | ||
275 | RFI | 352 | RFI |
276 | kvmppc_handler_trampoline_exit_end: | 353 | kvmppc_handler_trampoline_exit_end: |
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index ee45fa01220e..bb6c988f010a 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c | |||
@@ -316,6 +316,11 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
316 | { | 316 | { |
317 | int ret; | 317 | int ret; |
318 | 318 | ||
319 | if (!vcpu->arch.sane) { | ||
320 | kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
321 | return -EINVAL; | ||
322 | } | ||
323 | |||
319 | local_irq_disable(); | 324 | local_irq_disable(); |
320 | kvm_guest_enter(); | 325 | kvm_guest_enter(); |
321 | ret = __kvmppc_vcpu_run(kvm_run, vcpu); | 326 | ret = __kvmppc_vcpu_run(kvm_run, vcpu); |
@@ -618,6 +623,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
618 | int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | 623 | int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) |
619 | { | 624 | { |
620 | int i; | 625 | int i; |
626 | int r; | ||
621 | 627 | ||
622 | vcpu->arch.pc = 0; | 628 | vcpu->arch.pc = 0; |
623 | vcpu->arch.shared->msr = 0; | 629 | vcpu->arch.shared->msr = 0; |
@@ -634,7 +640,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
634 | 640 | ||
635 | kvmppc_init_timing_stats(vcpu); | 641 | kvmppc_init_timing_stats(vcpu); |
636 | 642 | ||
637 | return kvmppc_core_vcpu_setup(vcpu); | 643 | r = kvmppc_core_vcpu_setup(vcpu); |
644 | kvmppc_sanity_check(vcpu); | ||
645 | return r; | ||
638 | } | 646 | } |
639 | 647 | ||
640 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | 648 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) |
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 797a7447c268..26d20903f2bc 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c | |||
@@ -73,6 +73,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) | |||
73 | /* Since booke kvm only support one core, update all vcpus' PIR to 0 */ | 73 | /* Since booke kvm only support one core, update all vcpus' PIR to 0 */ |
74 | vcpu->vcpu_id = 0; | 74 | vcpu->vcpu_id = 0; |
75 | 75 | ||
76 | vcpu->arch.cpu_type = KVM_CPU_E500V2; | ||
77 | |||
76 | return 0; | 78 | return 0; |
77 | } | 79 | } |
78 | 80 | ||
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index a107c9be0fb1..0d843c6ba315 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -39,12 +39,8 @@ | |||
39 | 39 | ||
40 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) | 40 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) |
41 | { | 41 | { |
42 | #ifndef CONFIG_KVM_BOOK3S_64_HV | ||
43 | return !(v->arch.shared->msr & MSR_WE) || | 42 | return !(v->arch.shared->msr & MSR_WE) || |
44 | !!(v->arch.pending_exceptions); | 43 | !!(v->arch.pending_exceptions); |
45 | #else | ||
46 | return !(v->arch.ceded) || !!(v->arch.pending_exceptions); | ||
47 | #endif | ||
48 | } | 44 | } |
49 | 45 | ||
50 | int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) | 46 | int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) |
@@ -95,6 +91,31 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) | |||
95 | return r; | 91 | return r; |
96 | } | 92 | } |
97 | 93 | ||
94 | int kvmppc_sanity_check(struct kvm_vcpu *vcpu) | ||
95 | { | ||
96 | int r = false; | ||
97 | |||
98 | /* We have to know what CPU to virtualize */ | ||
99 | if (!vcpu->arch.pvr) | ||
100 | goto out; | ||
101 | |||
102 | /* PAPR only works with book3s_64 */ | ||
103 | if ((vcpu->arch.cpu_type != KVM_CPU_3S_64) && vcpu->arch.papr_enabled) | ||
104 | goto out; | ||
105 | |||
106 | #ifdef CONFIG_KVM_BOOK3S_64_HV | ||
107 | /* HV KVM can only do PAPR mode for now */ | ||
108 | if (!vcpu->arch.papr_enabled) | ||
109 | goto out; | ||
110 | #endif | ||
111 | |||
112 | r = true; | ||
113 | |||
114 | out: | ||
115 | vcpu->arch.sane = r; | ||
116 | return r ? 0 : -EINVAL; | ||
117 | } | ||
118 | |||
98 | int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) | 119 | int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) |
99 | { | 120 | { |
100 | enum emulation_result er; | 121 | enum emulation_result er; |
@@ -188,6 +209,8 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
188 | case KVM_CAP_PPC_BOOKE_SREGS: | 209 | case KVM_CAP_PPC_BOOKE_SREGS: |
189 | #else | 210 | #else |
190 | case KVM_CAP_PPC_SEGSTATE: | 211 | case KVM_CAP_PPC_SEGSTATE: |
212 | case KVM_CAP_PPC_HIOR: | ||
213 | case KVM_CAP_PPC_PAPR: | ||
191 | #endif | 214 | #endif |
192 | case KVM_CAP_PPC_UNSET_IRQ: | 215 | case KVM_CAP_PPC_UNSET_IRQ: |
193 | case KVM_CAP_PPC_IRQ_LEVEL: | 216 | case KVM_CAP_PPC_IRQ_LEVEL: |
@@ -258,6 +281,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) | |||
258 | { | 281 | { |
259 | struct kvm_vcpu *vcpu; | 282 | struct kvm_vcpu *vcpu; |
260 | vcpu = kvmppc_core_vcpu_create(kvm, id); | 283 | vcpu = kvmppc_core_vcpu_create(kvm, id); |
284 | vcpu->arch.wqp = &vcpu->wq; | ||
261 | if (!IS_ERR(vcpu)) | 285 | if (!IS_ERR(vcpu)) |
262 | kvmppc_create_vcpu_debugfs(vcpu, id); | 286 | kvmppc_create_vcpu_debugfs(vcpu, id); |
263 | return vcpu; | 287 | return vcpu; |
@@ -289,8 +313,8 @@ static void kvmppc_decrementer_func(unsigned long data) | |||
289 | 313 | ||
290 | kvmppc_core_queue_dec(vcpu); | 314 | kvmppc_core_queue_dec(vcpu); |
291 | 315 | ||
292 | if (waitqueue_active(&vcpu->wq)) { | 316 | if (waitqueue_active(vcpu->arch.wqp)) { |
293 | wake_up_interruptible(&vcpu->wq); | 317 | wake_up_interruptible(vcpu->arch.wqp); |
294 | vcpu->stat.halt_wakeup++; | 318 | vcpu->stat.halt_wakeup++; |
295 | } | 319 | } |
296 | } | 320 | } |
@@ -543,13 +567,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
543 | 567 | ||
544 | int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) | 568 | int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) |
545 | { | 569 | { |
546 | if (irq->irq == KVM_INTERRUPT_UNSET) | 570 | if (irq->irq == KVM_INTERRUPT_UNSET) { |
547 | kvmppc_core_dequeue_external(vcpu, irq); | 571 | kvmppc_core_dequeue_external(vcpu, irq); |
548 | else | 572 | return 0; |
549 | kvmppc_core_queue_external(vcpu, irq); | 573 | } |
574 | |||
575 | kvmppc_core_queue_external(vcpu, irq); | ||
550 | 576 | ||
551 | if (waitqueue_active(&vcpu->wq)) { | 577 | if (waitqueue_active(vcpu->arch.wqp)) { |
552 | wake_up_interruptible(&vcpu->wq); | 578 | wake_up_interruptible(vcpu->arch.wqp); |
553 | vcpu->stat.halt_wakeup++; | 579 | vcpu->stat.halt_wakeup++; |
554 | } else if (vcpu->cpu != -1) { | 580 | } else if (vcpu->cpu != -1) { |
555 | smp_send_reschedule(vcpu->cpu); | 581 | smp_send_reschedule(vcpu->cpu); |
@@ -571,11 +597,18 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, | |||
571 | r = 0; | 597 | r = 0; |
572 | vcpu->arch.osi_enabled = true; | 598 | vcpu->arch.osi_enabled = true; |
573 | break; | 599 | break; |
600 | case KVM_CAP_PPC_PAPR: | ||
601 | r = 0; | ||
602 | vcpu->arch.papr_enabled = true; | ||
603 | break; | ||
574 | default: | 604 | default: |
575 | r = -EINVAL; | 605 | r = -EINVAL; |
576 | break; | 606 | break; |
577 | } | 607 | } |
578 | 608 | ||
609 | if (!r) | ||
610 | r = kvmppc_sanity_check(vcpu); | ||
611 | |||
579 | return r; | 612 | return r; |
580 | } | 613 | } |
581 | 614 | ||
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 00ff00dfb24c..1ca5de07ac36 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h | |||
@@ -119,6 +119,7 @@ struct kvm_vcpu_stat { | |||
119 | u32 instruction_lctlg; | 119 | u32 instruction_lctlg; |
120 | u32 exit_program_interruption; | 120 | u32 exit_program_interruption; |
121 | u32 exit_instr_and_program; | 121 | u32 exit_instr_and_program; |
122 | u32 deliver_external_call; | ||
122 | u32 deliver_emergency_signal; | 123 | u32 deliver_emergency_signal; |
123 | u32 deliver_service_signal; | 124 | u32 deliver_service_signal; |
124 | u32 deliver_virtio_interrupt; | 125 | u32 deliver_virtio_interrupt; |
@@ -138,6 +139,7 @@ struct kvm_vcpu_stat { | |||
138 | u32 instruction_stfl; | 139 | u32 instruction_stfl; |
139 | u32 instruction_tprot; | 140 | u32 instruction_tprot; |
140 | u32 instruction_sigp_sense; | 141 | u32 instruction_sigp_sense; |
142 | u32 instruction_sigp_external_call; | ||
141 | u32 instruction_sigp_emergency; | 143 | u32 instruction_sigp_emergency; |
142 | u32 instruction_sigp_stop; | 144 | u32 instruction_sigp_stop; |
143 | u32 instruction_sigp_arch; | 145 | u32 instruction_sigp_arch; |
@@ -174,6 +176,10 @@ struct kvm_s390_prefix_info { | |||
174 | __u32 address; | 176 | __u32 address; |
175 | }; | 177 | }; |
176 | 178 | ||
179 | struct kvm_s390_extcall_info { | ||
180 | __u16 code; | ||
181 | }; | ||
182 | |||
177 | struct kvm_s390_emerg_info { | 183 | struct kvm_s390_emerg_info { |
178 | __u16 code; | 184 | __u16 code; |
179 | }; | 185 | }; |
@@ -186,6 +192,7 @@ struct kvm_s390_interrupt_info { | |||
186 | struct kvm_s390_ext_info ext; | 192 | struct kvm_s390_ext_info ext; |
187 | struct kvm_s390_pgm_info pgm; | 193 | struct kvm_s390_pgm_info pgm; |
188 | struct kvm_s390_emerg_info emerg; | 194 | struct kvm_s390_emerg_info emerg; |
195 | struct kvm_s390_extcall_info extcall; | ||
189 | struct kvm_s390_prefix_info prefix; | 196 | struct kvm_s390_prefix_info prefix; |
190 | }; | 197 | }; |
191 | }; | 198 | }; |
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index c9aeb4b4d0b8..87c16705b381 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c | |||
@@ -38,6 +38,11 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, | |||
38 | struct kvm_s390_interrupt_info *inti) | 38 | struct kvm_s390_interrupt_info *inti) |
39 | { | 39 | { |
40 | switch (inti->type) { | 40 | switch (inti->type) { |
41 | case KVM_S390_INT_EXTERNAL_CALL: | ||
42 | if (psw_extint_disabled(vcpu)) | ||
43 | return 0; | ||
44 | if (vcpu->arch.sie_block->gcr[0] & 0x2000ul) | ||
45 | return 1; | ||
41 | case KVM_S390_INT_EMERGENCY: | 46 | case KVM_S390_INT_EMERGENCY: |
42 | if (psw_extint_disabled(vcpu)) | 47 | if (psw_extint_disabled(vcpu)) |
43 | return 0; | 48 | return 0; |
@@ -98,6 +103,7 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu, | |||
98 | struct kvm_s390_interrupt_info *inti) | 103 | struct kvm_s390_interrupt_info *inti) |
99 | { | 104 | { |
100 | switch (inti->type) { | 105 | switch (inti->type) { |
106 | case KVM_S390_INT_EXTERNAL_CALL: | ||
101 | case KVM_S390_INT_EMERGENCY: | 107 | case KVM_S390_INT_EMERGENCY: |
102 | case KVM_S390_INT_SERVICE: | 108 | case KVM_S390_INT_SERVICE: |
103 | case KVM_S390_INT_VIRTIO: | 109 | case KVM_S390_INT_VIRTIO: |
@@ -143,6 +149,28 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, | |||
143 | exception = 1; | 149 | exception = 1; |
144 | break; | 150 | break; |
145 | 151 | ||
152 | case KVM_S390_INT_EXTERNAL_CALL: | ||
153 | VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); | ||
154 | vcpu->stat.deliver_external_call++; | ||
155 | rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202); | ||
156 | if (rc == -EFAULT) | ||
157 | exception = 1; | ||
158 | |||
159 | rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, inti->extcall.code); | ||
160 | if (rc == -EFAULT) | ||
161 | exception = 1; | ||
162 | |||
163 | rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, | ||
164 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | ||
165 | if (rc == -EFAULT) | ||
166 | exception = 1; | ||
167 | |||
168 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
169 | __LC_EXT_NEW_PSW, sizeof(psw_t)); | ||
170 | if (rc == -EFAULT) | ||
171 | exception = 1; | ||
172 | break; | ||
173 | |||
146 | case KVM_S390_INT_SERVICE: | 174 | case KVM_S390_INT_SERVICE: |
147 | VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", | 175 | VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", |
148 | inti->ext.ext_params); | 176 | inti->ext.ext_params); |
@@ -522,6 +550,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, | |||
522 | break; | 550 | break; |
523 | case KVM_S390_PROGRAM_INT: | 551 | case KVM_S390_PROGRAM_INT: |
524 | case KVM_S390_SIGP_STOP: | 552 | case KVM_S390_SIGP_STOP: |
553 | case KVM_S390_INT_EXTERNAL_CALL: | ||
525 | case KVM_S390_INT_EMERGENCY: | 554 | case KVM_S390_INT_EMERGENCY: |
526 | default: | 555 | default: |
527 | kfree(inti); | 556 | kfree(inti); |
@@ -581,6 +610,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, | |||
581 | break; | 610 | break; |
582 | case KVM_S390_SIGP_STOP: | 611 | case KVM_S390_SIGP_STOP: |
583 | case KVM_S390_RESTART: | 612 | case KVM_S390_RESTART: |
613 | case KVM_S390_INT_EXTERNAL_CALL: | ||
584 | case KVM_S390_INT_EMERGENCY: | 614 | case KVM_S390_INT_EMERGENCY: |
585 | VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type); | 615 | VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type); |
586 | inti->type = s390int->type; | 616 | inti->type = s390int->type; |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index dc2b580e27bc..9610ba41b974 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
@@ -46,6 +46,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
46 | { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, | 46 | { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, |
47 | { "instruction_lctl", VCPU_STAT(instruction_lctl) }, | 47 | { "instruction_lctl", VCPU_STAT(instruction_lctl) }, |
48 | { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, | 48 | { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, |
49 | { "deliver_external_call", VCPU_STAT(deliver_external_call) }, | ||
49 | { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, | 50 | { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, |
50 | { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, | 51 | { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, |
51 | { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, | 52 | { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, |
@@ -64,6 +65,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
64 | { "instruction_stfl", VCPU_STAT(instruction_stfl) }, | 65 | { "instruction_stfl", VCPU_STAT(instruction_stfl) }, |
65 | { "instruction_tprot", VCPU_STAT(instruction_tprot) }, | 66 | { "instruction_tprot", VCPU_STAT(instruction_tprot) }, |
66 | { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, | 67 | { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, |
68 | { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, | ||
67 | { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, | 69 | { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, |
68 | { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, | 70 | { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, |
69 | { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, | 71 | { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, |
@@ -175,6 +177,8 @@ int kvm_arch_init_vm(struct kvm *kvm) | |||
175 | if (rc) | 177 | if (rc) |
176 | goto out_err; | 178 | goto out_err; |
177 | 179 | ||
180 | rc = -ENOMEM; | ||
181 | |||
178 | kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); | 182 | kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); |
179 | if (!kvm->arch.sca) | 183 | if (!kvm->arch.sca) |
180 | goto out_err; | 184 | goto out_err; |
@@ -312,11 +316,17 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
312 | struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, | 316 | struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, |
313 | unsigned int id) | 317 | unsigned int id) |
314 | { | 318 | { |
315 | struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); | 319 | struct kvm_vcpu *vcpu; |
316 | int rc = -ENOMEM; | 320 | int rc = -EINVAL; |
321 | |||
322 | if (id >= KVM_MAX_VCPUS) | ||
323 | goto out; | ||
317 | 324 | ||
325 | rc = -ENOMEM; | ||
326 | |||
327 | vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); | ||
318 | if (!vcpu) | 328 | if (!vcpu) |
319 | goto out_nomem; | 329 | goto out; |
320 | 330 | ||
321 | vcpu->arch.sie_block = (struct kvm_s390_sie_block *) | 331 | vcpu->arch.sie_block = (struct kvm_s390_sie_block *) |
322 | get_zeroed_page(GFP_KERNEL); | 332 | get_zeroed_page(GFP_KERNEL); |
@@ -352,7 +362,7 @@ out_free_sie_block: | |||
352 | free_page((unsigned long)(vcpu->arch.sie_block)); | 362 | free_page((unsigned long)(vcpu->arch.sie_block)); |
353 | out_free_cpu: | 363 | out_free_cpu: |
354 | kfree(vcpu); | 364 | kfree(vcpu); |
355 | out_nomem: | 365 | out: |
356 | return ERR_PTR(rc); | 366 | return ERR_PTR(rc); |
357 | } | 367 | } |
358 | 368 | ||
@@ -386,6 +396,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
386 | { | 396 | { |
387 | memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs)); | 397 | memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs)); |
388 | memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); | 398 | memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); |
399 | restore_access_regs(vcpu->arch.guest_acrs); | ||
389 | return 0; | 400 | return 0; |
390 | } | 401 | } |
391 | 402 | ||
@@ -401,6 +412,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | |||
401 | { | 412 | { |
402 | memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); | 413 | memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); |
403 | vcpu->arch.guest_fpregs.fpc = fpu->fpc; | 414 | vcpu->arch.guest_fpregs.fpc = fpu->fpc; |
415 | restore_fp_regs(&vcpu->arch.guest_fpregs); | ||
404 | return 0; | 416 | return 0; |
405 | } | 417 | } |
406 | 418 | ||
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index d6a50c1fb2e6..f815118835f3 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c | |||
@@ -87,6 +87,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) | |||
87 | return -ENOMEM; | 87 | return -ENOMEM; |
88 | 88 | ||
89 | inti->type = KVM_S390_INT_EMERGENCY; | 89 | inti->type = KVM_S390_INT_EMERGENCY; |
90 | inti->emerg.code = vcpu->vcpu_id; | ||
90 | 91 | ||
91 | spin_lock(&fi->lock); | 92 | spin_lock(&fi->lock); |
92 | li = fi->local_int[cpu_addr]; | 93 | li = fi->local_int[cpu_addr]; |
@@ -103,9 +104,47 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) | |||
103 | wake_up_interruptible(&li->wq); | 104 | wake_up_interruptible(&li->wq); |
104 | spin_unlock_bh(&li->lock); | 105 | spin_unlock_bh(&li->lock); |
105 | rc = 0; /* order accepted */ | 106 | rc = 0; /* order accepted */ |
107 | VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); | ||
108 | unlock: | ||
109 | spin_unlock(&fi->lock); | ||
110 | return rc; | ||
111 | } | ||
112 | |||
113 | static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) | ||
114 | { | ||
115 | struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; | ||
116 | struct kvm_s390_local_interrupt *li; | ||
117 | struct kvm_s390_interrupt_info *inti; | ||
118 | int rc; | ||
119 | |||
120 | if (cpu_addr >= KVM_MAX_VCPUS) | ||
121 | return 3; /* not operational */ | ||
122 | |||
123 | inti = kzalloc(sizeof(*inti), GFP_KERNEL); | ||
124 | if (!inti) | ||
125 | return -ENOMEM; | ||
126 | |||
127 | inti->type = KVM_S390_INT_EXTERNAL_CALL; | ||
128 | inti->extcall.code = vcpu->vcpu_id; | ||
129 | |||
130 | spin_lock(&fi->lock); | ||
131 | li = fi->local_int[cpu_addr]; | ||
132 | if (li == NULL) { | ||
133 | rc = 3; /* not operational */ | ||
134 | kfree(inti); | ||
135 | goto unlock; | ||
136 | } | ||
137 | spin_lock_bh(&li->lock); | ||
138 | list_add_tail(&inti->list, &li->list); | ||
139 | atomic_set(&li->active, 1); | ||
140 | atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); | ||
141 | if (waitqueue_active(&li->wq)) | ||
142 | wake_up_interruptible(&li->wq); | ||
143 | spin_unlock_bh(&li->lock); | ||
144 | rc = 0; /* order accepted */ | ||
145 | VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr); | ||
106 | unlock: | 146 | unlock: |
107 | spin_unlock(&fi->lock); | 147 | spin_unlock(&fi->lock); |
108 | VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); | ||
109 | return rc; | 148 | return rc; |
110 | } | 149 | } |
111 | 150 | ||
@@ -267,6 +306,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) | |||
267 | rc = __sigp_sense(vcpu, cpu_addr, | 306 | rc = __sigp_sense(vcpu, cpu_addr, |
268 | &vcpu->arch.guest_gprs[r1]); | 307 | &vcpu->arch.guest_gprs[r1]); |
269 | break; | 308 | break; |
309 | case SIGP_EXTERNAL_CALL: | ||
310 | vcpu->stat.instruction_sigp_external_call++; | ||
311 | rc = __sigp_external_call(vcpu, cpu_addr); | ||
312 | break; | ||
270 | case SIGP_EMERGENCY: | 313 | case SIGP_EMERGENCY: |
271 | vcpu->stat.instruction_sigp_emergency++; | 314 | vcpu->stat.instruction_sigp_emergency++; |
272 | rc = __sigp_emergency(vcpu, cpu_addr); | 315 | rc = __sigp_emergency(vcpu, cpu_addr); |
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 34595d5e1038..3925d8007864 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h | |||
@@ -100,7 +100,9 @@ | |||
100 | #define APIC_TIMER_BASE_CLKIN 0x0 | 100 | #define APIC_TIMER_BASE_CLKIN 0x0 |
101 | #define APIC_TIMER_BASE_TMBASE 0x1 | 101 | #define APIC_TIMER_BASE_TMBASE 0x1 |
102 | #define APIC_TIMER_BASE_DIV 0x2 | 102 | #define APIC_TIMER_BASE_DIV 0x2 |
103 | #define APIC_LVT_TIMER_ONESHOT (0 << 17) | ||
103 | #define APIC_LVT_TIMER_PERIODIC (1 << 17) | 104 | #define APIC_LVT_TIMER_PERIODIC (1 << 17) |
105 | #define APIC_LVT_TIMER_TSCDEADLINE (2 << 17) | ||
104 | #define APIC_LVT_MASKED (1 << 16) | 106 | #define APIC_LVT_MASKED (1 << 16) |
105 | #define APIC_LVT_LEVEL_TRIGGER (1 << 15) | 107 | #define APIC_LVT_LEVEL_TRIGGER (1 << 15) |
106 | #define APIC_LVT_REMOTE_IRR (1 << 14) | 108 | #define APIC_LVT_REMOTE_IRR (1 << 14) |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index aa6a488cd075..2f84a433b6a0 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -121,6 +121,7 @@ | |||
121 | #define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ | 121 | #define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ |
122 | #define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */ | 122 | #define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */ |
123 | #define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */ | 123 | #define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */ |
124 | #define X86_FEATURE_TSC_DEADLINE_TIMER (4*32+24) /* Tsc deadline timer */ | ||
124 | #define X86_FEATURE_AES (4*32+25) /* AES instructions */ | 125 | #define X86_FEATURE_AES (4*32+25) /* AES instructions */ |
125 | #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ | 126 | #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ |
126 | #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ | 127 | #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 6040d115ef51..a026507893e9 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -262,7 +262,7 @@ struct x86_emulate_ctxt { | |||
262 | struct operand dst; | 262 | struct operand dst; |
263 | bool has_seg_override; | 263 | bool has_seg_override; |
264 | u8 seg_override; | 264 | u8 seg_override; |
265 | unsigned int d; | 265 | u64 d; |
266 | int (*execute)(struct x86_emulate_ctxt *ctxt); | 266 | int (*execute)(struct x86_emulate_ctxt *ctxt); |
267 | int (*check_perm)(struct x86_emulate_ctxt *ctxt); | 267 | int (*check_perm)(struct x86_emulate_ctxt *ctxt); |
268 | /* modrm */ | 268 | /* modrm */ |
@@ -275,6 +275,8 @@ struct x86_emulate_ctxt { | |||
275 | unsigned long _eip; | 275 | unsigned long _eip; |
276 | /* Fields above regs are cleared together. */ | 276 | /* Fields above regs are cleared together. */ |
277 | unsigned long regs[NR_VCPU_REGS]; | 277 | unsigned long regs[NR_VCPU_REGS]; |
278 | struct operand memop; | ||
279 | struct operand *memopp; | ||
278 | struct fetch_cache fetch; | 280 | struct fetch_cache fetch; |
279 | struct read_cache io_read; | 281 | struct read_cache io_read; |
280 | struct read_cache mem_read; | 282 | struct read_cache mem_read; |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index dd51c83aa5de..b4973f4dab98 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -26,7 +26,8 @@ | |||
26 | #include <asm/mtrr.h> | 26 | #include <asm/mtrr.h> |
27 | #include <asm/msr-index.h> | 27 | #include <asm/msr-index.h> |
28 | 28 | ||
29 | #define KVM_MAX_VCPUS 64 | 29 | #define KVM_MAX_VCPUS 254 |
30 | #define KVM_SOFT_MAX_VCPUS 64 | ||
30 | #define KVM_MEMORY_SLOTS 32 | 31 | #define KVM_MEMORY_SLOTS 32 |
31 | /* memory slots that does not exposed to userspace */ | 32 | /* memory slots that does not exposed to userspace */ |
32 | #define KVM_PRIVATE_MEM_SLOTS 4 | 33 | #define KVM_PRIVATE_MEM_SLOTS 4 |
@@ -264,6 +265,7 @@ struct kvm_mmu { | |||
264 | void (*new_cr3)(struct kvm_vcpu *vcpu); | 265 | void (*new_cr3)(struct kvm_vcpu *vcpu); |
265 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); | 266 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); |
266 | unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); | 267 | unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); |
268 | u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); | ||
267 | int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, | 269 | int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, |
268 | bool prefault); | 270 | bool prefault); |
269 | void (*inject_page_fault)(struct kvm_vcpu *vcpu, | 271 | void (*inject_page_fault)(struct kvm_vcpu *vcpu, |
@@ -411,8 +413,9 @@ struct kvm_vcpu_arch { | |||
411 | u32 tsc_catchup_mult; | 413 | u32 tsc_catchup_mult; |
412 | s8 tsc_catchup_shift; | 414 | s8 tsc_catchup_shift; |
413 | 415 | ||
414 | bool nmi_pending; | 416 | atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ |
415 | bool nmi_injected; | 417 | unsigned nmi_pending; /* NMI queued after currently running handler */ |
418 | bool nmi_injected; /* Trying to inject an NMI this entry */ | ||
416 | 419 | ||
417 | struct mtrr_state_type mtrr_state; | 420 | struct mtrr_state_type mtrr_state; |
418 | u32 pat; | 421 | u32 pat; |
@@ -628,14 +631,13 @@ struct kvm_x86_ops { | |||
628 | void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); | 631 | void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); |
629 | 632 | ||
630 | u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); | 633 | u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); |
634 | u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu); | ||
631 | 635 | ||
632 | void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); | 636 | void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); |
633 | 637 | ||
634 | int (*check_intercept)(struct kvm_vcpu *vcpu, | 638 | int (*check_intercept)(struct kvm_vcpu *vcpu, |
635 | struct x86_instruction_info *info, | 639 | struct x86_instruction_info *info, |
636 | enum x86_intercept_stage stage); | 640 | enum x86_intercept_stage stage); |
637 | |||
638 | const struct trace_print_flags *exit_reasons_str; | ||
639 | }; | 641 | }; |
640 | 642 | ||
641 | struct kvm_arch_async_pf { | 643 | struct kvm_arch_async_pf { |
@@ -672,6 +674,8 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); | |||
672 | 674 | ||
673 | extern bool tdp_enabled; | 675 | extern bool tdp_enabled; |
674 | 676 | ||
677 | u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); | ||
678 | |||
675 | /* control of guest tsc rate supported? */ | 679 | /* control of guest tsc rate supported? */ |
676 | extern bool kvm_has_tsc_control; | 680 | extern bool kvm_has_tsc_control; |
677 | /* minimum supported tsc_khz for guests */ | 681 | /* minimum supported tsc_khz for guests */ |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index d52609aeeab8..a6962d9161a0 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -229,6 +229,8 @@ | |||
229 | #define MSR_IA32_APICBASE_ENABLE (1<<11) | 229 | #define MSR_IA32_APICBASE_ENABLE (1<<11) |
230 | #define MSR_IA32_APICBASE_BASE (0xfffff<<12) | 230 | #define MSR_IA32_APICBASE_BASE (0xfffff<<12) |
231 | 231 | ||
232 | #define MSR_IA32_TSCDEADLINE 0x000006e0 | ||
233 | |||
232 | #define MSR_IA32_UCODE_WRITE 0x00000079 | 234 | #define MSR_IA32_UCODE_WRITE 0x00000079 |
233 | #define MSR_IA32_UCODE_REV 0x0000008b | 235 | #define MSR_IA32_UCODE_REV 0x0000008b |
234 | 236 | ||
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 2caf290e9895..31f180c21ce9 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -350,6 +350,18 @@ enum vmcs_field { | |||
350 | #define DEBUG_REG_ACCESS_REG(eq) (((eq) >> 8) & 0xf) /* 11:8, general purpose reg. */ | 350 | #define DEBUG_REG_ACCESS_REG(eq) (((eq) >> 8) & 0xf) /* 11:8, general purpose reg. */ |
351 | 351 | ||
352 | 352 | ||
353 | /* | ||
354 | * Exit Qualifications for APIC-Access | ||
355 | */ | ||
356 | #define APIC_ACCESS_OFFSET 0xfff /* 11:0, offset within the APIC page */ | ||
357 | #define APIC_ACCESS_TYPE 0xf000 /* 15:12, access type */ | ||
358 | #define TYPE_LINEAR_APIC_INST_READ (0 << 12) | ||
359 | #define TYPE_LINEAR_APIC_INST_WRITE (1 << 12) | ||
360 | #define TYPE_LINEAR_APIC_INST_FETCH (2 << 12) | ||
361 | #define TYPE_LINEAR_APIC_EVENT (3 << 12) | ||
362 | #define TYPE_PHYSICAL_APIC_EVENT (10 << 12) | ||
363 | #define TYPE_PHYSICAL_APIC_INST (15 << 12) | ||
364 | |||
353 | /* segment AR */ | 365 | /* segment AR */ |
354 | #define SEGMENT_AR_L_MASK (1 << 13) | 366 | #define SEGMENT_AR_L_MASK (1 << 13) |
355 | 367 | ||
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8b4cc5f067de..f1e3be18a08f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -29,6 +29,39 @@ | |||
29 | #include "tss.h" | 29 | #include "tss.h" |
30 | 30 | ||
31 | /* | 31 | /* |
32 | * Operand types | ||
33 | */ | ||
34 | #define OpNone 0ull | ||
35 | #define OpImplicit 1ull /* No generic decode */ | ||
36 | #define OpReg 2ull /* Register */ | ||
37 | #define OpMem 3ull /* Memory */ | ||
38 | #define OpAcc 4ull /* Accumulator: AL/AX/EAX/RAX */ | ||
39 | #define OpDI 5ull /* ES:DI/EDI/RDI */ | ||
40 | #define OpMem64 6ull /* Memory, 64-bit */ | ||
41 | #define OpImmUByte 7ull /* Zero-extended 8-bit immediate */ | ||
42 | #define OpDX 8ull /* DX register */ | ||
43 | #define OpCL 9ull /* CL register (for shifts) */ | ||
44 | #define OpImmByte 10ull /* 8-bit sign extended immediate */ | ||
45 | #define OpOne 11ull /* Implied 1 */ | ||
46 | #define OpImm 12ull /* Sign extended immediate */ | ||
47 | #define OpMem16 13ull /* Memory operand (16-bit). */ | ||
48 | #define OpMem32 14ull /* Memory operand (32-bit). */ | ||
49 | #define OpImmU 15ull /* Immediate operand, zero extended */ | ||
50 | #define OpSI 16ull /* SI/ESI/RSI */ | ||
51 | #define OpImmFAddr 17ull /* Immediate far address */ | ||
52 | #define OpMemFAddr 18ull /* Far address in memory */ | ||
53 | #define OpImmU16 19ull /* Immediate operand, 16 bits, zero extended */ | ||
54 | #define OpES 20ull /* ES */ | ||
55 | #define OpCS 21ull /* CS */ | ||
56 | #define OpSS 22ull /* SS */ | ||
57 | #define OpDS 23ull /* DS */ | ||
58 | #define OpFS 24ull /* FS */ | ||
59 | #define OpGS 25ull /* GS */ | ||
60 | |||
61 | #define OpBits 5 /* Width of operand field */ | ||
62 | #define OpMask ((1ull << OpBits) - 1) | ||
63 | |||
64 | /* | ||
32 | * Opcode effective-address decode tables. | 65 | * Opcode effective-address decode tables. |
33 | * Note that we only emulate instructions that have at least one memory | 66 | * Note that we only emulate instructions that have at least one memory |
34 | * operand (excluding implicit stack references). We assume that stack | 67 | * operand (excluding implicit stack references). We assume that stack |
@@ -40,37 +73,35 @@ | |||
40 | /* Operand sizes: 8-bit operands or specified/overridden size. */ | 73 | /* Operand sizes: 8-bit operands or specified/overridden size. */ |
41 | #define ByteOp (1<<0) /* 8-bit operands. */ | 74 | #define ByteOp (1<<0) /* 8-bit operands. */ |
42 | /* Destination operand type. */ | 75 | /* Destination operand type. */ |
43 | #define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */ | 76 | #define DstShift 1 |
44 | #define DstReg (2<<1) /* Register operand. */ | 77 | #define ImplicitOps (OpImplicit << DstShift) |
45 | #define DstMem (3<<1) /* Memory operand. */ | 78 | #define DstReg (OpReg << DstShift) |
46 | #define DstAcc (4<<1) /* Destination Accumulator */ | 79 | #define DstMem (OpMem << DstShift) |
47 | #define DstDI (5<<1) /* Destination is in ES:(E)DI */ | 80 | #define DstAcc (OpAcc << DstShift) |
48 | #define DstMem64 (6<<1) /* 64bit memory operand */ | 81 | #define DstDI (OpDI << DstShift) |
49 | #define DstImmUByte (7<<1) /* 8-bit unsigned immediate operand */ | 82 | #define DstMem64 (OpMem64 << DstShift) |
50 | #define DstDX (8<<1) /* Destination is in DX register */ | 83 | #define DstImmUByte (OpImmUByte << DstShift) |
51 | #define DstMask (0xf<<1) | 84 | #define DstDX (OpDX << DstShift) |
85 | #define DstMask (OpMask << DstShift) | ||
52 | /* Source operand type. */ | 86 | /* Source operand type. */ |
53 | #define SrcNone (0<<5) /* No source operand. */ | 87 | #define SrcShift 6 |
54 | #define SrcReg (1<<5) /* Register operand. */ | 88 | #define SrcNone (OpNone << SrcShift) |
55 | #define SrcMem (2<<5) /* Memory operand. */ | 89 | #define SrcReg (OpReg << SrcShift) |
56 | #define SrcMem16 (3<<5) /* Memory operand (16-bit). */ | 90 | #define SrcMem (OpMem << SrcShift) |
57 | #define SrcMem32 (4<<5) /* Memory operand (32-bit). */ | 91 | #define SrcMem16 (OpMem16 << SrcShift) |
58 | #define SrcImm (5<<5) /* Immediate operand. */ | 92 | #define SrcMem32 (OpMem32 << SrcShift) |
59 | #define SrcImmByte (6<<5) /* 8-bit sign-extended immediate operand. */ | 93 | #define SrcImm (OpImm << SrcShift) |
60 | #define SrcOne (7<<5) /* Implied '1' */ | 94 | #define SrcImmByte (OpImmByte << SrcShift) |
61 | #define SrcImmUByte (8<<5) /* 8-bit unsigned immediate operand. */ | 95 | #define SrcOne (OpOne << SrcShift) |
62 | #define SrcImmU (9<<5) /* Immediate operand, unsigned */ | 96 | #define SrcImmUByte (OpImmUByte << SrcShift) |
63 | #define SrcSI (0xa<<5) /* Source is in the DS:RSI */ | 97 | #define SrcImmU (OpImmU << SrcShift) |
64 | #define SrcImmFAddr (0xb<<5) /* Source is immediate far address */ | 98 | #define SrcSI (OpSI << SrcShift) |
65 | #define SrcMemFAddr (0xc<<5) /* Source is far address in memory */ | 99 | #define SrcImmFAddr (OpImmFAddr << SrcShift) |
66 | #define SrcAcc (0xd<<5) /* Source Accumulator */ | 100 | #define SrcMemFAddr (OpMemFAddr << SrcShift) |
67 | #define SrcImmU16 (0xe<<5) /* Immediate operand, unsigned, 16 bits */ | 101 | #define SrcAcc (OpAcc << SrcShift) |
68 | #define SrcDX (0xf<<5) /* Source is in DX register */ | 102 | #define SrcImmU16 (OpImmU16 << SrcShift) |
69 | #define SrcMask (0xf<<5) | 103 | #define SrcDX (OpDX << SrcShift) |
70 | /* Generic ModRM decode. */ | 104 | #define SrcMask (OpMask << SrcShift) |
71 | #define ModRM (1<<9) | ||
72 | /* Destination is only written; never read. */ | ||
73 | #define Mov (1<<10) | ||
74 | #define BitOp (1<<11) | 105 | #define BitOp (1<<11) |
75 | #define MemAbs (1<<12) /* Memory operand is absolute displacement */ | 106 | #define MemAbs (1<<12) /* Memory operand is absolute displacement */ |
76 | #define String (1<<13) /* String instruction (rep capable) */ | 107 | #define String (1<<13) /* String instruction (rep capable) */ |
@@ -81,6 +112,10 @@ | |||
81 | #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */ | 112 | #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */ |
82 | #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */ | 113 | #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */ |
83 | #define Sse (1<<18) /* SSE Vector instruction */ | 114 | #define Sse (1<<18) /* SSE Vector instruction */ |
115 | /* Generic ModRM decode. */ | ||
116 | #define ModRM (1<<19) | ||
117 | /* Destination is only written; never read. */ | ||
118 | #define Mov (1<<20) | ||
84 | /* Misc flags */ | 119 | /* Misc flags */ |
85 | #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ | 120 | #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ |
86 | #define VendorSpecific (1<<22) /* Vendor specific instruction */ | 121 | #define VendorSpecific (1<<22) /* Vendor specific instruction */ |
@@ -91,12 +126,19 @@ | |||
91 | #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ | 126 | #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ |
92 | #define No64 (1<<28) | 127 | #define No64 (1<<28) |
93 | /* Source 2 operand type */ | 128 | /* Source 2 operand type */ |
94 | #define Src2None (0<<29) | 129 | #define Src2Shift (29) |
95 | #define Src2CL (1<<29) | 130 | #define Src2None (OpNone << Src2Shift) |
96 | #define Src2ImmByte (2<<29) | 131 | #define Src2CL (OpCL << Src2Shift) |
97 | #define Src2One (3<<29) | 132 | #define Src2ImmByte (OpImmByte << Src2Shift) |
98 | #define Src2Imm (4<<29) | 133 | #define Src2One (OpOne << Src2Shift) |
99 | #define Src2Mask (7<<29) | 134 | #define Src2Imm (OpImm << Src2Shift) |
135 | #define Src2ES (OpES << Src2Shift) | ||
136 | #define Src2CS (OpCS << Src2Shift) | ||
137 | #define Src2SS (OpSS << Src2Shift) | ||
138 | #define Src2DS (OpDS << Src2Shift) | ||
139 | #define Src2FS (OpFS << Src2Shift) | ||
140 | #define Src2GS (OpGS << Src2Shift) | ||
141 | #define Src2Mask (OpMask << Src2Shift) | ||
100 | 142 | ||
101 | #define X2(x...) x, x | 143 | #define X2(x...) x, x |
102 | #define X3(x...) X2(x), x | 144 | #define X3(x...) X2(x), x |
@@ -108,8 +150,8 @@ | |||
108 | #define X16(x...) X8(x), X8(x) | 150 | #define X16(x...) X8(x), X8(x) |
109 | 151 | ||
110 | struct opcode { | 152 | struct opcode { |
111 | u32 flags; | 153 | u64 flags : 56; |
112 | u8 intercept; | 154 | u64 intercept : 8; |
113 | union { | 155 | union { |
114 | int (*execute)(struct x86_emulate_ctxt *ctxt); | 156 | int (*execute)(struct x86_emulate_ctxt *ctxt); |
115 | struct opcode *group; | 157 | struct opcode *group; |
@@ -205,105 +247,100 @@ struct gprefix { | |||
205 | #define ON64(x) | 247 | #define ON64(x) |
206 | #endif | 248 | #endif |
207 | 249 | ||
208 | #define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix, _dsttype) \ | 250 | #define ____emulate_2op(ctxt, _op, _x, _y, _suffix, _dsttype) \ |
209 | do { \ | 251 | do { \ |
210 | __asm__ __volatile__ ( \ | 252 | __asm__ __volatile__ ( \ |
211 | _PRE_EFLAGS("0", "4", "2") \ | 253 | _PRE_EFLAGS("0", "4", "2") \ |
212 | _op _suffix " %"_x"3,%1; " \ | 254 | _op _suffix " %"_x"3,%1; " \ |
213 | _POST_EFLAGS("0", "4", "2") \ | 255 | _POST_EFLAGS("0", "4", "2") \ |
214 | : "=m" (_eflags), "+q" (*(_dsttype*)&(_dst).val),\ | 256 | : "=m" ((ctxt)->eflags), \ |
257 | "+q" (*(_dsttype*)&(ctxt)->dst.val), \ | ||
215 | "=&r" (_tmp) \ | 258 | "=&r" (_tmp) \ |
216 | : _y ((_src).val), "i" (EFLAGS_MASK)); \ | 259 | : _y ((ctxt)->src.val), "i" (EFLAGS_MASK)); \ |
217 | } while (0) | 260 | } while (0) |
218 | 261 | ||
219 | 262 | ||
220 | /* Raw emulation: instruction has two explicit operands. */ | 263 | /* Raw emulation: instruction has two explicit operands. */ |
221 | #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \ | 264 | #define __emulate_2op_nobyte(ctxt,_op,_wx,_wy,_lx,_ly,_qx,_qy) \ |
222 | do { \ | 265 | do { \ |
223 | unsigned long _tmp; \ | 266 | unsigned long _tmp; \ |
224 | \ | 267 | \ |
225 | switch ((_dst).bytes) { \ | 268 | switch ((ctxt)->dst.bytes) { \ |
226 | case 2: \ | 269 | case 2: \ |
227 | ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w",u16);\ | 270 | ____emulate_2op(ctxt,_op,_wx,_wy,"w",u16); \ |
228 | break; \ | 271 | break; \ |
229 | case 4: \ | 272 | case 4: \ |
230 | ____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l",u32);\ | 273 | ____emulate_2op(ctxt,_op,_lx,_ly,"l",u32); \ |
231 | break; \ | 274 | break; \ |
232 | case 8: \ | 275 | case 8: \ |
233 | ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q",u64)); \ | 276 | ON64(____emulate_2op(ctxt,_op,_qx,_qy,"q",u64)); \ |
234 | break; \ | 277 | break; \ |
235 | } \ | 278 | } \ |
236 | } while (0) | 279 | } while (0) |
237 | 280 | ||
238 | #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ | 281 | #define __emulate_2op(ctxt,_op,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ |
239 | do { \ | 282 | do { \ |
240 | unsigned long _tmp; \ | 283 | unsigned long _tmp; \ |
241 | switch ((_dst).bytes) { \ | 284 | switch ((ctxt)->dst.bytes) { \ |
242 | case 1: \ | 285 | case 1: \ |
243 | ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b",u8); \ | 286 | ____emulate_2op(ctxt,_op,_bx,_by,"b",u8); \ |
244 | break; \ | 287 | break; \ |
245 | default: \ | 288 | default: \ |
246 | __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ | 289 | __emulate_2op_nobyte(ctxt, _op, \ |
247 | _wx, _wy, _lx, _ly, _qx, _qy); \ | 290 | _wx, _wy, _lx, _ly, _qx, _qy); \ |
248 | break; \ | 291 | break; \ |
249 | } \ | 292 | } \ |
250 | } while (0) | 293 | } while (0) |
251 | 294 | ||
252 | /* Source operand is byte-sized and may be restricted to just %cl. */ | 295 | /* Source operand is byte-sized and may be restricted to just %cl. */ |
253 | #define emulate_2op_SrcB(_op, _src, _dst, _eflags) \ | 296 | #define emulate_2op_SrcB(ctxt, _op) \ |
254 | __emulate_2op(_op, _src, _dst, _eflags, \ | 297 | __emulate_2op(ctxt, _op, "b", "c", "b", "c", "b", "c", "b", "c") |
255 | "b", "c", "b", "c", "b", "c", "b", "c") | ||
256 | 298 | ||
257 | /* Source operand is byte, word, long or quad sized. */ | 299 | /* Source operand is byte, word, long or quad sized. */ |
258 | #define emulate_2op_SrcV(_op, _src, _dst, _eflags) \ | 300 | #define emulate_2op_SrcV(ctxt, _op) \ |
259 | __emulate_2op(_op, _src, _dst, _eflags, \ | 301 | __emulate_2op(ctxt, _op, "b", "q", "w", "r", _LO32, "r", "", "r") |
260 | "b", "q", "w", "r", _LO32, "r", "", "r") | ||
261 | 302 | ||
262 | /* Source operand is word, long or quad sized. */ | 303 | /* Source operand is word, long or quad sized. */ |
263 | #define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags) \ | 304 | #define emulate_2op_SrcV_nobyte(ctxt, _op) \ |
264 | __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ | 305 | __emulate_2op_nobyte(ctxt, _op, "w", "r", _LO32, "r", "", "r") |
265 | "w", "r", _LO32, "r", "", "r") | ||
266 | 306 | ||
267 | /* Instruction has three operands and one operand is stored in ECX register */ | 307 | /* Instruction has three operands and one operand is stored in ECX register */ |
268 | #define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ | 308 | #define __emulate_2op_cl(ctxt, _op, _suffix, _type) \ |
269 | do { \ | 309 | do { \ |
270 | unsigned long _tmp; \ | 310 | unsigned long _tmp; \ |
271 | _type _clv = (_cl).val; \ | 311 | _type _clv = (ctxt)->src2.val; \ |
272 | _type _srcv = (_src).val; \ | 312 | _type _srcv = (ctxt)->src.val; \ |
273 | _type _dstv = (_dst).val; \ | 313 | _type _dstv = (ctxt)->dst.val; \ |
274 | \ | 314 | \ |
275 | __asm__ __volatile__ ( \ | 315 | __asm__ __volatile__ ( \ |
276 | _PRE_EFLAGS("0", "5", "2") \ | 316 | _PRE_EFLAGS("0", "5", "2") \ |
277 | _op _suffix " %4,%1 \n" \ | 317 | _op _suffix " %4,%1 \n" \ |
278 | _POST_EFLAGS("0", "5", "2") \ | 318 | _POST_EFLAGS("0", "5", "2") \ |
279 | : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ | 319 | : "=m" ((ctxt)->eflags), "+r" (_dstv), "=&r" (_tmp) \ |
280 | : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ | 320 | : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ |
281 | ); \ | 321 | ); \ |
282 | \ | 322 | \ |
283 | (_cl).val = (unsigned long) _clv; \ | 323 | (ctxt)->src2.val = (unsigned long) _clv; \ |
284 | (_src).val = (unsigned long) _srcv; \ | 324 | (ctxt)->src2.val = (unsigned long) _srcv; \ |
285 | (_dst).val = (unsigned long) _dstv; \ | 325 | (ctxt)->dst.val = (unsigned long) _dstv; \ |
286 | } while (0) | 326 | } while (0) |
287 | 327 | ||
288 | #define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ | 328 | #define emulate_2op_cl(ctxt, _op) \ |
289 | do { \ | 329 | do { \ |
290 | switch ((_dst).bytes) { \ | 330 | switch ((ctxt)->dst.bytes) { \ |
291 | case 2: \ | 331 | case 2: \ |
292 | __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ | 332 | __emulate_2op_cl(ctxt, _op, "w", u16); \ |
293 | "w", unsigned short); \ | ||
294 | break; \ | 333 | break; \ |
295 | case 4: \ | 334 | case 4: \ |
296 | __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ | 335 | __emulate_2op_cl(ctxt, _op, "l", u32); \ |
297 | "l", unsigned int); \ | ||
298 | break; \ | 336 | break; \ |
299 | case 8: \ | 337 | case 8: \ |
300 | ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ | 338 | ON64(__emulate_2op_cl(ctxt, _op, "q", ulong)); \ |
301 | "q", unsigned long)); \ | ||
302 | break; \ | 339 | break; \ |
303 | } \ | 340 | } \ |
304 | } while (0) | 341 | } while (0) |
305 | 342 | ||
306 | #define __emulate_1op(_op, _dst, _eflags, _suffix) \ | 343 | #define __emulate_1op(ctxt, _op, _suffix) \ |
307 | do { \ | 344 | do { \ |
308 | unsigned long _tmp; \ | 345 | unsigned long _tmp; \ |
309 | \ | 346 | \ |
@@ -311,39 +348,27 @@ struct gprefix { | |||
311 | _PRE_EFLAGS("0", "3", "2") \ | 348 | _PRE_EFLAGS("0", "3", "2") \ |
312 | _op _suffix " %1; " \ | 349 | _op _suffix " %1; " \ |
313 | _POST_EFLAGS("0", "3", "2") \ | 350 | _POST_EFLAGS("0", "3", "2") \ |
314 | : "=m" (_eflags), "+m" ((_dst).val), \ | 351 | : "=m" ((ctxt)->eflags), "+m" ((ctxt)->dst.val), \ |
315 | "=&r" (_tmp) \ | 352 | "=&r" (_tmp) \ |
316 | : "i" (EFLAGS_MASK)); \ | 353 | : "i" (EFLAGS_MASK)); \ |
317 | } while (0) | 354 | } while (0) |
318 | 355 | ||
319 | /* Instruction has only one explicit operand (no source operand). */ | 356 | /* Instruction has only one explicit operand (no source operand). */ |
320 | #define emulate_1op(_op, _dst, _eflags) \ | 357 | #define emulate_1op(ctxt, _op) \ |
321 | do { \ | 358 | do { \ |
322 | switch ((_dst).bytes) { \ | 359 | switch ((ctxt)->dst.bytes) { \ |
323 | case 1: __emulate_1op(_op, _dst, _eflags, "b"); break; \ | 360 | case 1: __emulate_1op(ctxt, _op, "b"); break; \ |
324 | case 2: __emulate_1op(_op, _dst, _eflags, "w"); break; \ | 361 | case 2: __emulate_1op(ctxt, _op, "w"); break; \ |
325 | case 4: __emulate_1op(_op, _dst, _eflags, "l"); break; \ | 362 | case 4: __emulate_1op(ctxt, _op, "l"); break; \ |
326 | case 8: ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \ | 363 | case 8: ON64(__emulate_1op(ctxt, _op, "q")); break; \ |
327 | } \ | 364 | } \ |
328 | } while (0) | 365 | } while (0) |
329 | 366 | ||
330 | #define __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, _suffix) \ | 367 | #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \ |
331 | do { \ | ||
332 | unsigned long _tmp; \ | ||
333 | \ | ||
334 | __asm__ __volatile__ ( \ | ||
335 | _PRE_EFLAGS("0", "4", "1") \ | ||
336 | _op _suffix " %5; " \ | ||
337 | _POST_EFLAGS("0", "4", "1") \ | ||
338 | : "=m" (_eflags), "=&r" (_tmp), \ | ||
339 | "+a" (_rax), "+d" (_rdx) \ | ||
340 | : "i" (EFLAGS_MASK), "m" ((_src).val), \ | ||
341 | "a" (_rax), "d" (_rdx)); \ | ||
342 | } while (0) | ||
343 | |||
344 | #define __emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, _eflags, _suffix, _ex) \ | ||
345 | do { \ | 368 | do { \ |
346 | unsigned long _tmp; \ | 369 | unsigned long _tmp; \ |
370 | ulong *rax = &(ctxt)->regs[VCPU_REGS_RAX]; \ | ||
371 | ulong *rdx = &(ctxt)->regs[VCPU_REGS_RDX]; \ | ||
347 | \ | 372 | \ |
348 | __asm__ __volatile__ ( \ | 373 | __asm__ __volatile__ ( \ |
349 | _PRE_EFLAGS("0", "5", "1") \ | 374 | _PRE_EFLAGS("0", "5", "1") \ |
@@ -356,53 +381,27 @@ struct gprefix { | |||
356 | "jmp 2b \n\t" \ | 381 | "jmp 2b \n\t" \ |
357 | ".popsection \n\t" \ | 382 | ".popsection \n\t" \ |
358 | _ASM_EXTABLE(1b, 3b) \ | 383 | _ASM_EXTABLE(1b, 3b) \ |
359 | : "=m" (_eflags), "=&r" (_tmp), \ | 384 | : "=m" ((ctxt)->eflags), "=&r" (_tmp), \ |
360 | "+a" (_rax), "+d" (_rdx), "+qm"(_ex) \ | 385 | "+a" (*rax), "+d" (*rdx), "+qm"(_ex) \ |
361 | : "i" (EFLAGS_MASK), "m" ((_src).val), \ | 386 | : "i" (EFLAGS_MASK), "m" ((ctxt)->src.val), \ |
362 | "a" (_rax), "d" (_rdx)); \ | 387 | "a" (*rax), "d" (*rdx)); \ |
363 | } while (0) | 388 | } while (0) |
364 | 389 | ||
365 | /* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */ | 390 | /* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */ |
366 | #define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags) \ | 391 | #define emulate_1op_rax_rdx(ctxt, _op, _ex) \ |
367 | do { \ | 392 | do { \ |
368 | switch((_src).bytes) { \ | 393 | switch((ctxt)->src.bytes) { \ |
369 | case 1: \ | 394 | case 1: \ |
370 | __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ | 395 | __emulate_1op_rax_rdx(ctxt, _op, "b", _ex); \ |
371 | _eflags, "b"); \ | ||
372 | break; \ | 396 | break; \ |
373 | case 2: \ | 397 | case 2: \ |
374 | __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ | 398 | __emulate_1op_rax_rdx(ctxt, _op, "w", _ex); \ |
375 | _eflags, "w"); \ | ||
376 | break; \ | 399 | break; \ |
377 | case 4: \ | 400 | case 4: \ |
378 | __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ | 401 | __emulate_1op_rax_rdx(ctxt, _op, "l", _ex); \ |
379 | _eflags, "l"); \ | ||
380 | break; \ | ||
381 | case 8: \ | ||
382 | ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ | ||
383 | _eflags, "q")); \ | ||
384 | break; \ | ||
385 | } \ | ||
386 | } while (0) | ||
387 | |||
388 | #define emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, _eflags, _ex) \ | ||
389 | do { \ | ||
390 | switch((_src).bytes) { \ | ||
391 | case 1: \ | ||
392 | __emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \ | ||
393 | _eflags, "b", _ex); \ | ||
394 | break; \ | ||
395 | case 2: \ | ||
396 | __emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \ | ||
397 | _eflags, "w", _ex); \ | ||
398 | break; \ | ||
399 | case 4: \ | ||
400 | __emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \ | ||
401 | _eflags, "l", _ex); \ | ||
402 | break; \ | 402 | break; \ |
403 | case 8: ON64( \ | 403 | case 8: ON64( \ |
404 | __emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \ | 404 | __emulate_1op_rax_rdx(ctxt, _op, "q", _ex)); \ |
405 | _eflags, "q", _ex)); \ | ||
406 | break; \ | 405 | break; \ |
407 | } \ | 406 | } \ |
408 | } while (0) | 407 | } while (0) |
@@ -651,41 +650,50 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, | |||
651 | return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); | 650 | return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); |
652 | } | 651 | } |
653 | 652 | ||
654 | static int do_insn_fetch_byte(struct x86_emulate_ctxt *ctxt, | 653 | /* |
655 | unsigned long eip, u8 *dest) | 654 | * Fetch the next byte of the instruction being emulated which is pointed to |
655 | * by ctxt->_eip, then increment ctxt->_eip. | ||
656 | * | ||
657 | * Also prefetch the remaining bytes of the instruction without crossing page | ||
658 | * boundary if they are not in fetch_cache yet. | ||
659 | */ | ||
660 | static int do_insn_fetch_byte(struct x86_emulate_ctxt *ctxt, u8 *dest) | ||
656 | { | 661 | { |
657 | struct fetch_cache *fc = &ctxt->fetch; | 662 | struct fetch_cache *fc = &ctxt->fetch; |
658 | int rc; | 663 | int rc; |
659 | int size, cur_size; | 664 | int size, cur_size; |
660 | 665 | ||
661 | if (eip == fc->end) { | 666 | if (ctxt->_eip == fc->end) { |
662 | unsigned long linear; | 667 | unsigned long linear; |
663 | struct segmented_address addr = { .seg=VCPU_SREG_CS, .ea=eip}; | 668 | struct segmented_address addr = { .seg = VCPU_SREG_CS, |
669 | .ea = ctxt->_eip }; | ||
664 | cur_size = fc->end - fc->start; | 670 | cur_size = fc->end - fc->start; |
665 | size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); | 671 | size = min(15UL - cur_size, |
672 | PAGE_SIZE - offset_in_page(ctxt->_eip)); | ||
666 | rc = __linearize(ctxt, addr, size, false, true, &linear); | 673 | rc = __linearize(ctxt, addr, size, false, true, &linear); |
667 | if (rc != X86EMUL_CONTINUE) | 674 | if (unlikely(rc != X86EMUL_CONTINUE)) |
668 | return rc; | 675 | return rc; |
669 | rc = ctxt->ops->fetch(ctxt, linear, fc->data + cur_size, | 676 | rc = ctxt->ops->fetch(ctxt, linear, fc->data + cur_size, |
670 | size, &ctxt->exception); | 677 | size, &ctxt->exception); |
671 | if (rc != X86EMUL_CONTINUE) | 678 | if (unlikely(rc != X86EMUL_CONTINUE)) |
672 | return rc; | 679 | return rc; |
673 | fc->end += size; | 680 | fc->end += size; |
674 | } | 681 | } |
675 | *dest = fc->data[eip - fc->start]; | 682 | *dest = fc->data[ctxt->_eip - fc->start]; |
683 | ctxt->_eip++; | ||
676 | return X86EMUL_CONTINUE; | 684 | return X86EMUL_CONTINUE; |
677 | } | 685 | } |
678 | 686 | ||
679 | static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, | 687 | static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, |
680 | unsigned long eip, void *dest, unsigned size) | 688 | void *dest, unsigned size) |
681 | { | 689 | { |
682 | int rc; | 690 | int rc; |
683 | 691 | ||
684 | /* x86 instructions are limited to 15 bytes. */ | 692 | /* x86 instructions are limited to 15 bytes. */ |
685 | if (eip + size - ctxt->eip > 15) | 693 | if (unlikely(ctxt->_eip + size - ctxt->eip > 15)) |
686 | return X86EMUL_UNHANDLEABLE; | 694 | return X86EMUL_UNHANDLEABLE; |
687 | while (size--) { | 695 | while (size--) { |
688 | rc = do_insn_fetch_byte(ctxt, eip++, dest++); | 696 | rc = do_insn_fetch_byte(ctxt, dest++); |
689 | if (rc != X86EMUL_CONTINUE) | 697 | if (rc != X86EMUL_CONTINUE) |
690 | return rc; | 698 | return rc; |
691 | } | 699 | } |
@@ -693,20 +701,18 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, | |||
693 | } | 701 | } |
694 | 702 | ||
695 | /* Fetch next part of the instruction being emulated. */ | 703 | /* Fetch next part of the instruction being emulated. */ |
696 | #define insn_fetch(_type, _size, _eip) \ | 704 | #define insn_fetch(_type, _ctxt) \ |
697 | ({ unsigned long _x; \ | 705 | ({ unsigned long _x; \ |
698 | rc = do_insn_fetch(ctxt, (_eip), &_x, (_size)); \ | 706 | rc = do_insn_fetch(_ctxt, &_x, sizeof(_type)); \ |
699 | if (rc != X86EMUL_CONTINUE) \ | 707 | if (rc != X86EMUL_CONTINUE) \ |
700 | goto done; \ | 708 | goto done; \ |
701 | (_eip) += (_size); \ | ||
702 | (_type)_x; \ | 709 | (_type)_x; \ |
703 | }) | 710 | }) |
704 | 711 | ||
705 | #define insn_fetch_arr(_arr, _size, _eip) \ | 712 | #define insn_fetch_arr(_arr, _size, _ctxt) \ |
706 | ({ rc = do_insn_fetch(ctxt, (_eip), _arr, (_size)); \ | 713 | ({ rc = do_insn_fetch(_ctxt, _arr, (_size)); \ |
707 | if (rc != X86EMUL_CONTINUE) \ | 714 | if (rc != X86EMUL_CONTINUE) \ |
708 | goto done; \ | 715 | goto done; \ |
709 | (_eip) += (_size); \ | ||
710 | }) | 716 | }) |
711 | 717 | ||
712 | /* | 718 | /* |
@@ -894,7 +900,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
894 | ctxt->modrm_rm = base_reg = (ctxt->rex_prefix & 1) << 3; /* REG.B */ | 900 | ctxt->modrm_rm = base_reg = (ctxt->rex_prefix & 1) << 3; /* REG.B */ |
895 | } | 901 | } |
896 | 902 | ||
897 | ctxt->modrm = insn_fetch(u8, 1, ctxt->_eip); | 903 | ctxt->modrm = insn_fetch(u8, ctxt); |
898 | ctxt->modrm_mod |= (ctxt->modrm & 0xc0) >> 6; | 904 | ctxt->modrm_mod |= (ctxt->modrm & 0xc0) >> 6; |
899 | ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3; | 905 | ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3; |
900 | ctxt->modrm_rm |= (ctxt->modrm & 0x07); | 906 | ctxt->modrm_rm |= (ctxt->modrm & 0x07); |
@@ -928,13 +934,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
928 | switch (ctxt->modrm_mod) { | 934 | switch (ctxt->modrm_mod) { |
929 | case 0: | 935 | case 0: |
930 | if (ctxt->modrm_rm == 6) | 936 | if (ctxt->modrm_rm == 6) |
931 | modrm_ea += insn_fetch(u16, 2, ctxt->_eip); | 937 | modrm_ea += insn_fetch(u16, ctxt); |
932 | break; | 938 | break; |
933 | case 1: | 939 | case 1: |
934 | modrm_ea += insn_fetch(s8, 1, ctxt->_eip); | 940 | modrm_ea += insn_fetch(s8, ctxt); |
935 | break; | 941 | break; |
936 | case 2: | 942 | case 2: |
937 | modrm_ea += insn_fetch(u16, 2, ctxt->_eip); | 943 | modrm_ea += insn_fetch(u16, ctxt); |
938 | break; | 944 | break; |
939 | } | 945 | } |
940 | switch (ctxt->modrm_rm) { | 946 | switch (ctxt->modrm_rm) { |
@@ -971,13 +977,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
971 | } else { | 977 | } else { |
972 | /* 32/64-bit ModR/M decode. */ | 978 | /* 32/64-bit ModR/M decode. */ |
973 | if ((ctxt->modrm_rm & 7) == 4) { | 979 | if ((ctxt->modrm_rm & 7) == 4) { |
974 | sib = insn_fetch(u8, 1, ctxt->_eip); | 980 | sib = insn_fetch(u8, ctxt); |
975 | index_reg |= (sib >> 3) & 7; | 981 | index_reg |= (sib >> 3) & 7; |
976 | base_reg |= sib & 7; | 982 | base_reg |= sib & 7; |
977 | scale = sib >> 6; | 983 | scale = sib >> 6; |
978 | 984 | ||
979 | if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0) | 985 | if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0) |
980 | modrm_ea += insn_fetch(s32, 4, ctxt->_eip); | 986 | modrm_ea += insn_fetch(s32, ctxt); |
981 | else | 987 | else |
982 | modrm_ea += ctxt->regs[base_reg]; | 988 | modrm_ea += ctxt->regs[base_reg]; |
983 | if (index_reg != 4) | 989 | if (index_reg != 4) |
@@ -990,13 +996,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
990 | switch (ctxt->modrm_mod) { | 996 | switch (ctxt->modrm_mod) { |
991 | case 0: | 997 | case 0: |
992 | if (ctxt->modrm_rm == 5) | 998 | if (ctxt->modrm_rm == 5) |
993 | modrm_ea += insn_fetch(s32, 4, ctxt->_eip); | 999 | modrm_ea += insn_fetch(s32, ctxt); |
994 | break; | 1000 | break; |
995 | case 1: | 1001 | case 1: |
996 | modrm_ea += insn_fetch(s8, 1, ctxt->_eip); | 1002 | modrm_ea += insn_fetch(s8, ctxt); |
997 | break; | 1003 | break; |
998 | case 2: | 1004 | case 2: |
999 | modrm_ea += insn_fetch(s32, 4, ctxt->_eip); | 1005 | modrm_ea += insn_fetch(s32, ctxt); |
1000 | break; | 1006 | break; |
1001 | } | 1007 | } |
1002 | } | 1008 | } |
@@ -1013,13 +1019,13 @@ static int decode_abs(struct x86_emulate_ctxt *ctxt, | |||
1013 | op->type = OP_MEM; | 1019 | op->type = OP_MEM; |
1014 | switch (ctxt->ad_bytes) { | 1020 | switch (ctxt->ad_bytes) { |
1015 | case 2: | 1021 | case 2: |
1016 | op->addr.mem.ea = insn_fetch(u16, 2, ctxt->_eip); | 1022 | op->addr.mem.ea = insn_fetch(u16, ctxt); |
1017 | break; | 1023 | break; |
1018 | case 4: | 1024 | case 4: |
1019 | op->addr.mem.ea = insn_fetch(u32, 4, ctxt->_eip); | 1025 | op->addr.mem.ea = insn_fetch(u32, ctxt); |
1020 | break; | 1026 | break; |
1021 | case 8: | 1027 | case 8: |
1022 | op->addr.mem.ea = insn_fetch(u64, 8, ctxt->_eip); | 1028 | op->addr.mem.ea = insn_fetch(u64, ctxt); |
1023 | break; | 1029 | break; |
1024 | } | 1030 | } |
1025 | done: | 1031 | done: |
@@ -1452,15 +1458,18 @@ static int em_popf(struct x86_emulate_ctxt *ctxt) | |||
1452 | return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes); | 1458 | return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes); |
1453 | } | 1459 | } |
1454 | 1460 | ||
1455 | static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg) | 1461 | static int em_push_sreg(struct x86_emulate_ctxt *ctxt) |
1456 | { | 1462 | { |
1463 | int seg = ctxt->src2.val; | ||
1464 | |||
1457 | ctxt->src.val = get_segment_selector(ctxt, seg); | 1465 | ctxt->src.val = get_segment_selector(ctxt, seg); |
1458 | 1466 | ||
1459 | return em_push(ctxt); | 1467 | return em_push(ctxt); |
1460 | } | 1468 | } |
1461 | 1469 | ||
1462 | static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, int seg) | 1470 | static int em_pop_sreg(struct x86_emulate_ctxt *ctxt) |
1463 | { | 1471 | { |
1472 | int seg = ctxt->src2.val; | ||
1464 | unsigned long selector; | 1473 | unsigned long selector; |
1465 | int rc; | 1474 | int rc; |
1466 | 1475 | ||
@@ -1674,64 +1683,74 @@ static int em_grp2(struct x86_emulate_ctxt *ctxt) | |||
1674 | { | 1683 | { |
1675 | switch (ctxt->modrm_reg) { | 1684 | switch (ctxt->modrm_reg) { |
1676 | case 0: /* rol */ | 1685 | case 0: /* rol */ |
1677 | emulate_2op_SrcB("rol", ctxt->src, ctxt->dst, ctxt->eflags); | 1686 | emulate_2op_SrcB(ctxt, "rol"); |
1678 | break; | 1687 | break; |
1679 | case 1: /* ror */ | 1688 | case 1: /* ror */ |
1680 | emulate_2op_SrcB("ror", ctxt->src, ctxt->dst, ctxt->eflags); | 1689 | emulate_2op_SrcB(ctxt, "ror"); |
1681 | break; | 1690 | break; |
1682 | case 2: /* rcl */ | 1691 | case 2: /* rcl */ |
1683 | emulate_2op_SrcB("rcl", ctxt->src, ctxt->dst, ctxt->eflags); | 1692 | emulate_2op_SrcB(ctxt, "rcl"); |
1684 | break; | 1693 | break; |
1685 | case 3: /* rcr */ | 1694 | case 3: /* rcr */ |
1686 | emulate_2op_SrcB("rcr", ctxt->src, ctxt->dst, ctxt->eflags); | 1695 | emulate_2op_SrcB(ctxt, "rcr"); |
1687 | break; | 1696 | break; |
1688 | case 4: /* sal/shl */ | 1697 | case 4: /* sal/shl */ |
1689 | case 6: /* sal/shl */ | 1698 | case 6: /* sal/shl */ |
1690 | emulate_2op_SrcB("sal", ctxt->src, ctxt->dst, ctxt->eflags); | 1699 | emulate_2op_SrcB(ctxt, "sal"); |
1691 | break; | 1700 | break; |
1692 | case 5: /* shr */ | 1701 | case 5: /* shr */ |
1693 | emulate_2op_SrcB("shr", ctxt->src, ctxt->dst, ctxt->eflags); | 1702 | emulate_2op_SrcB(ctxt, "shr"); |
1694 | break; | 1703 | break; |
1695 | case 7: /* sar */ | 1704 | case 7: /* sar */ |
1696 | emulate_2op_SrcB("sar", ctxt->src, ctxt->dst, ctxt->eflags); | 1705 | emulate_2op_SrcB(ctxt, "sar"); |
1697 | break; | 1706 | break; |
1698 | } | 1707 | } |
1699 | return X86EMUL_CONTINUE; | 1708 | return X86EMUL_CONTINUE; |
1700 | } | 1709 | } |
1701 | 1710 | ||
1702 | static int em_grp3(struct x86_emulate_ctxt *ctxt) | 1711 | static int em_not(struct x86_emulate_ctxt *ctxt) |
1712 | { | ||
1713 | ctxt->dst.val = ~ctxt->dst.val; | ||
1714 | return X86EMUL_CONTINUE; | ||
1715 | } | ||
1716 | |||
1717 | static int em_neg(struct x86_emulate_ctxt *ctxt) | ||
1718 | { | ||
1719 | emulate_1op(ctxt, "neg"); | ||
1720 | return X86EMUL_CONTINUE; | ||
1721 | } | ||
1722 | |||
1723 | static int em_mul_ex(struct x86_emulate_ctxt *ctxt) | ||
1724 | { | ||
1725 | u8 ex = 0; | ||
1726 | |||
1727 | emulate_1op_rax_rdx(ctxt, "mul", ex); | ||
1728 | return X86EMUL_CONTINUE; | ||
1729 | } | ||
1730 | |||
1731 | static int em_imul_ex(struct x86_emulate_ctxt *ctxt) | ||
1732 | { | ||
1733 | u8 ex = 0; | ||
1734 | |||
1735 | emulate_1op_rax_rdx(ctxt, "imul", ex); | ||
1736 | return X86EMUL_CONTINUE; | ||
1737 | } | ||
1738 | |||
1739 | static int em_div_ex(struct x86_emulate_ctxt *ctxt) | ||
1703 | { | 1740 | { |
1704 | unsigned long *rax = &ctxt->regs[VCPU_REGS_RAX]; | ||
1705 | unsigned long *rdx = &ctxt->regs[VCPU_REGS_RDX]; | ||
1706 | u8 de = 0; | 1741 | u8 de = 0; |
1707 | 1742 | ||
1708 | switch (ctxt->modrm_reg) { | 1743 | emulate_1op_rax_rdx(ctxt, "div", de); |
1709 | case 0 ... 1: /* test */ | 1744 | if (de) |
1710 | emulate_2op_SrcV("test", ctxt->src, ctxt->dst, ctxt->eflags); | 1745 | return emulate_de(ctxt); |
1711 | break; | 1746 | return X86EMUL_CONTINUE; |
1712 | case 2: /* not */ | 1747 | } |
1713 | ctxt->dst.val = ~ctxt->dst.val; | 1748 | |
1714 | break; | 1749 | static int em_idiv_ex(struct x86_emulate_ctxt *ctxt) |
1715 | case 3: /* neg */ | 1750 | { |
1716 | emulate_1op("neg", ctxt->dst, ctxt->eflags); | 1751 | u8 de = 0; |
1717 | break; | 1752 | |
1718 | case 4: /* mul */ | 1753 | emulate_1op_rax_rdx(ctxt, "idiv", de); |
1719 | emulate_1op_rax_rdx("mul", ctxt->src, *rax, *rdx, ctxt->eflags); | ||
1720 | break; | ||
1721 | case 5: /* imul */ | ||
1722 | emulate_1op_rax_rdx("imul", ctxt->src, *rax, *rdx, ctxt->eflags); | ||
1723 | break; | ||
1724 | case 6: /* div */ | ||
1725 | emulate_1op_rax_rdx_ex("div", ctxt->src, *rax, *rdx, | ||
1726 | ctxt->eflags, de); | ||
1727 | break; | ||
1728 | case 7: /* idiv */ | ||
1729 | emulate_1op_rax_rdx_ex("idiv", ctxt->src, *rax, *rdx, | ||
1730 | ctxt->eflags, de); | ||
1731 | break; | ||
1732 | default: | ||
1733 | return X86EMUL_UNHANDLEABLE; | ||
1734 | } | ||
1735 | if (de) | 1754 | if (de) |
1736 | return emulate_de(ctxt); | 1755 | return emulate_de(ctxt); |
1737 | return X86EMUL_CONTINUE; | 1756 | return X86EMUL_CONTINUE; |
@@ -1743,10 +1762,10 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt) | |||
1743 | 1762 | ||
1744 | switch (ctxt->modrm_reg) { | 1763 | switch (ctxt->modrm_reg) { |
1745 | case 0: /* inc */ | 1764 | case 0: /* inc */ |
1746 | emulate_1op("inc", ctxt->dst, ctxt->eflags); | 1765 | emulate_1op(ctxt, "inc"); |
1747 | break; | 1766 | break; |
1748 | case 1: /* dec */ | 1767 | case 1: /* dec */ |
1749 | emulate_1op("dec", ctxt->dst, ctxt->eflags); | 1768 | emulate_1op(ctxt, "dec"); |
1750 | break; | 1769 | break; |
1751 | case 2: /* call near abs */ { | 1770 | case 2: /* call near abs */ { |
1752 | long int old_eip; | 1771 | long int old_eip; |
@@ -1812,8 +1831,9 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) | |||
1812 | return rc; | 1831 | return rc; |
1813 | } | 1832 | } |
1814 | 1833 | ||
1815 | static int emulate_load_segment(struct x86_emulate_ctxt *ctxt, int seg) | 1834 | static int em_lseg(struct x86_emulate_ctxt *ctxt) |
1816 | { | 1835 | { |
1836 | int seg = ctxt->src2.val; | ||
1817 | unsigned short sel; | 1837 | unsigned short sel; |
1818 | int rc; | 1838 | int rc; |
1819 | 1839 | ||
@@ -2452,7 +2472,7 @@ static int em_das(struct x86_emulate_ctxt *ctxt) | |||
2452 | ctxt->src.type = OP_IMM; | 2472 | ctxt->src.type = OP_IMM; |
2453 | ctxt->src.val = 0; | 2473 | ctxt->src.val = 0; |
2454 | ctxt->src.bytes = 1; | 2474 | ctxt->src.bytes = 1; |
2455 | emulate_2op_SrcV("or", ctxt->src, ctxt->dst, ctxt->eflags); | 2475 | emulate_2op_SrcV(ctxt, "or"); |
2456 | ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF); | 2476 | ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF); |
2457 | if (cf) | 2477 | if (cf) |
2458 | ctxt->eflags |= X86_EFLAGS_CF; | 2478 | ctxt->eflags |= X86_EFLAGS_CF; |
@@ -2502,49 +2522,49 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) | |||
2502 | 2522 | ||
2503 | static int em_add(struct x86_emulate_ctxt *ctxt) | 2523 | static int em_add(struct x86_emulate_ctxt *ctxt) |
2504 | { | 2524 | { |
2505 | emulate_2op_SrcV("add", ctxt->src, ctxt->dst, ctxt->eflags); | 2525 | emulate_2op_SrcV(ctxt, "add"); |
2506 | return X86EMUL_CONTINUE; | 2526 | return X86EMUL_CONTINUE; |
2507 | } | 2527 | } |
2508 | 2528 | ||
2509 | static int em_or(struct x86_emulate_ctxt *ctxt) | 2529 | static int em_or(struct x86_emulate_ctxt *ctxt) |
2510 | { | 2530 | { |
2511 | emulate_2op_SrcV("or", ctxt->src, ctxt->dst, ctxt->eflags); | 2531 | emulate_2op_SrcV(ctxt, "or"); |
2512 | return X86EMUL_CONTINUE; | 2532 | return X86EMUL_CONTINUE; |
2513 | } | 2533 | } |
2514 | 2534 | ||
2515 | static int em_adc(struct x86_emulate_ctxt *ctxt) | 2535 | static int em_adc(struct x86_emulate_ctxt *ctxt) |
2516 | { | 2536 | { |
2517 | emulate_2op_SrcV("adc", ctxt->src, ctxt->dst, ctxt->eflags); | 2537 | emulate_2op_SrcV(ctxt, "adc"); |
2518 | return X86EMUL_CONTINUE; | 2538 | return X86EMUL_CONTINUE; |
2519 | } | 2539 | } |
2520 | 2540 | ||
2521 | static int em_sbb(struct x86_emulate_ctxt *ctxt) | 2541 | static int em_sbb(struct x86_emulate_ctxt *ctxt) |
2522 | { | 2542 | { |
2523 | emulate_2op_SrcV("sbb", ctxt->src, ctxt->dst, ctxt->eflags); | 2543 | emulate_2op_SrcV(ctxt, "sbb"); |
2524 | return X86EMUL_CONTINUE; | 2544 | return X86EMUL_CONTINUE; |
2525 | } | 2545 | } |
2526 | 2546 | ||
2527 | static int em_and(struct x86_emulate_ctxt *ctxt) | 2547 | static int em_and(struct x86_emulate_ctxt *ctxt) |
2528 | { | 2548 | { |
2529 | emulate_2op_SrcV("and", ctxt->src, ctxt->dst, ctxt->eflags); | 2549 | emulate_2op_SrcV(ctxt, "and"); |
2530 | return X86EMUL_CONTINUE; | 2550 | return X86EMUL_CONTINUE; |
2531 | } | 2551 | } |
2532 | 2552 | ||
2533 | static int em_sub(struct x86_emulate_ctxt *ctxt) | 2553 | static int em_sub(struct x86_emulate_ctxt *ctxt) |
2534 | { | 2554 | { |
2535 | emulate_2op_SrcV("sub", ctxt->src, ctxt->dst, ctxt->eflags); | 2555 | emulate_2op_SrcV(ctxt, "sub"); |
2536 | return X86EMUL_CONTINUE; | 2556 | return X86EMUL_CONTINUE; |
2537 | } | 2557 | } |
2538 | 2558 | ||
2539 | static int em_xor(struct x86_emulate_ctxt *ctxt) | 2559 | static int em_xor(struct x86_emulate_ctxt *ctxt) |
2540 | { | 2560 | { |
2541 | emulate_2op_SrcV("xor", ctxt->src, ctxt->dst, ctxt->eflags); | 2561 | emulate_2op_SrcV(ctxt, "xor"); |
2542 | return X86EMUL_CONTINUE; | 2562 | return X86EMUL_CONTINUE; |
2543 | } | 2563 | } |
2544 | 2564 | ||
2545 | static int em_cmp(struct x86_emulate_ctxt *ctxt) | 2565 | static int em_cmp(struct x86_emulate_ctxt *ctxt) |
2546 | { | 2566 | { |
2547 | emulate_2op_SrcV("cmp", ctxt->src, ctxt->dst, ctxt->eflags); | 2567 | emulate_2op_SrcV(ctxt, "cmp"); |
2548 | /* Disable writeback. */ | 2568 | /* Disable writeback. */ |
2549 | ctxt->dst.type = OP_NONE; | 2569 | ctxt->dst.type = OP_NONE; |
2550 | return X86EMUL_CONTINUE; | 2570 | return X86EMUL_CONTINUE; |
@@ -2552,7 +2572,9 @@ static int em_cmp(struct x86_emulate_ctxt *ctxt) | |||
2552 | 2572 | ||
2553 | static int em_test(struct x86_emulate_ctxt *ctxt) | 2573 | static int em_test(struct x86_emulate_ctxt *ctxt) |
2554 | { | 2574 | { |
2555 | emulate_2op_SrcV("test", ctxt->src, ctxt->dst, ctxt->eflags); | 2575 | emulate_2op_SrcV(ctxt, "test"); |
2576 | /* Disable writeback. */ | ||
2577 | ctxt->dst.type = OP_NONE; | ||
2556 | return X86EMUL_CONTINUE; | 2578 | return X86EMUL_CONTINUE; |
2557 | } | 2579 | } |
2558 | 2580 | ||
@@ -2570,7 +2592,7 @@ static int em_xchg(struct x86_emulate_ctxt *ctxt) | |||
2570 | 2592 | ||
2571 | static int em_imul(struct x86_emulate_ctxt *ctxt) | 2593 | static int em_imul(struct x86_emulate_ctxt *ctxt) |
2572 | { | 2594 | { |
2573 | emulate_2op_SrcV_nobyte("imul", ctxt->src, ctxt->dst, ctxt->eflags); | 2595 | emulate_2op_SrcV_nobyte(ctxt, "imul"); |
2574 | return X86EMUL_CONTINUE; | 2596 | return X86EMUL_CONTINUE; |
2575 | } | 2597 | } |
2576 | 2598 | ||
@@ -3025,9 +3047,14 @@ static struct opcode group1A[] = { | |||
3025 | }; | 3047 | }; |
3026 | 3048 | ||
3027 | static struct opcode group3[] = { | 3049 | static struct opcode group3[] = { |
3028 | D(DstMem | SrcImm | ModRM), D(DstMem | SrcImm | ModRM), | 3050 | I(DstMem | SrcImm | ModRM, em_test), |
3029 | D(DstMem | SrcNone | ModRM | Lock), D(DstMem | SrcNone | ModRM | Lock), | 3051 | I(DstMem | SrcImm | ModRM, em_test), |
3030 | X4(D(SrcMem | ModRM)), | 3052 | I(DstMem | SrcNone | ModRM | Lock, em_not), |
3053 | I(DstMem | SrcNone | ModRM | Lock, em_neg), | ||
3054 | I(SrcMem | ModRM, em_mul_ex), | ||
3055 | I(SrcMem | ModRM, em_imul_ex), | ||
3056 | I(SrcMem | ModRM, em_div_ex), | ||
3057 | I(SrcMem | ModRM, em_idiv_ex), | ||
3031 | }; | 3058 | }; |
3032 | 3059 | ||
3033 | static struct opcode group4[] = { | 3060 | static struct opcode group4[] = { |
@@ -3090,16 +3117,20 @@ static struct gprefix pfx_0f_6f_0f_7f = { | |||
3090 | static struct opcode opcode_table[256] = { | 3117 | static struct opcode opcode_table[256] = { |
3091 | /* 0x00 - 0x07 */ | 3118 | /* 0x00 - 0x07 */ |
3092 | I6ALU(Lock, em_add), | 3119 | I6ALU(Lock, em_add), |
3093 | D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), | 3120 | I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), |
3121 | I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg), | ||
3094 | /* 0x08 - 0x0F */ | 3122 | /* 0x08 - 0x0F */ |
3095 | I6ALU(Lock, em_or), | 3123 | I6ALU(Lock, em_or), |
3096 | D(ImplicitOps | Stack | No64), N, | 3124 | I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg), |
3125 | N, | ||
3097 | /* 0x10 - 0x17 */ | 3126 | /* 0x10 - 0x17 */ |
3098 | I6ALU(Lock, em_adc), | 3127 | I6ALU(Lock, em_adc), |
3099 | D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), | 3128 | I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg), |
3129 | I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg), | ||
3100 | /* 0x18 - 0x1F */ | 3130 | /* 0x18 - 0x1F */ |
3101 | I6ALU(Lock, em_sbb), | 3131 | I6ALU(Lock, em_sbb), |
3102 | D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), | 3132 | I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg), |
3133 | I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg), | ||
3103 | /* 0x20 - 0x27 */ | 3134 | /* 0x20 - 0x27 */ |
3104 | I6ALU(Lock, em_and), N, N, | 3135 | I6ALU(Lock, em_and), N, N, |
3105 | /* 0x28 - 0x2F */ | 3136 | /* 0x28 - 0x2F */ |
@@ -3167,7 +3198,8 @@ static struct opcode opcode_table[256] = { | |||
3167 | D2bv(DstMem | SrcImmByte | ModRM), | 3198 | D2bv(DstMem | SrcImmByte | ModRM), |
3168 | I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm), | 3199 | I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm), |
3169 | I(ImplicitOps | Stack, em_ret), | 3200 | I(ImplicitOps | Stack, em_ret), |
3170 | D(DstReg | SrcMemFAddr | ModRM | No64), D(DstReg | SrcMemFAddr | ModRM | No64), | 3201 | I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg), |
3202 | I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg), | ||
3171 | G(ByteOp, group11), G(0, group11), | 3203 | G(ByteOp, group11), G(0, group11), |
3172 | /* 0xC8 - 0xCF */ | 3204 | /* 0xC8 - 0xCF */ |
3173 | N, N, N, I(ImplicitOps | Stack, em_ret_far), | 3205 | N, N, N, I(ImplicitOps | Stack, em_ret_far), |
@@ -3242,20 +3274,22 @@ static struct opcode twobyte_table[256] = { | |||
3242 | /* 0x90 - 0x9F */ | 3274 | /* 0x90 - 0x9F */ |
3243 | X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), | 3275 | X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), |
3244 | /* 0xA0 - 0xA7 */ | 3276 | /* 0xA0 - 0xA7 */ |
3245 | D(ImplicitOps | Stack), D(ImplicitOps | Stack), | 3277 | I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), |
3246 | DI(ImplicitOps, cpuid), D(DstMem | SrcReg | ModRM | BitOp), | 3278 | DI(ImplicitOps, cpuid), D(DstMem | SrcReg | ModRM | BitOp), |
3247 | D(DstMem | SrcReg | Src2ImmByte | ModRM), | 3279 | D(DstMem | SrcReg | Src2ImmByte | ModRM), |
3248 | D(DstMem | SrcReg | Src2CL | ModRM), N, N, | 3280 | D(DstMem | SrcReg | Src2CL | ModRM), N, N, |
3249 | /* 0xA8 - 0xAF */ | 3281 | /* 0xA8 - 0xAF */ |
3250 | D(ImplicitOps | Stack), D(ImplicitOps | Stack), | 3282 | I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), |
3251 | DI(ImplicitOps, rsm), D(DstMem | SrcReg | ModRM | BitOp | Lock), | 3283 | DI(ImplicitOps, rsm), D(DstMem | SrcReg | ModRM | BitOp | Lock), |
3252 | D(DstMem | SrcReg | Src2ImmByte | ModRM), | 3284 | D(DstMem | SrcReg | Src2ImmByte | ModRM), |
3253 | D(DstMem | SrcReg | Src2CL | ModRM), | 3285 | D(DstMem | SrcReg | Src2CL | ModRM), |
3254 | D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), | 3286 | D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), |
3255 | /* 0xB0 - 0xB7 */ | 3287 | /* 0xB0 - 0xB7 */ |
3256 | D2bv(DstMem | SrcReg | ModRM | Lock), | 3288 | D2bv(DstMem | SrcReg | ModRM | Lock), |
3257 | D(DstReg | SrcMemFAddr | ModRM), D(DstMem | SrcReg | ModRM | BitOp | Lock), | 3289 | I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), |
3258 | D(DstReg | SrcMemFAddr | ModRM), D(DstReg | SrcMemFAddr | ModRM), | 3290 | D(DstMem | SrcReg | ModRM | BitOp | Lock), |
3291 | I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), | ||
3292 | I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), | ||
3259 | D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), | 3293 | D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), |
3260 | /* 0xB8 - 0xBF */ | 3294 | /* 0xB8 - 0xBF */ |
3261 | N, N, | 3295 | N, N, |
@@ -3309,13 +3343,13 @@ static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
3309 | /* NB. Immediates are sign-extended as necessary. */ | 3343 | /* NB. Immediates are sign-extended as necessary. */ |
3310 | switch (op->bytes) { | 3344 | switch (op->bytes) { |
3311 | case 1: | 3345 | case 1: |
3312 | op->val = insn_fetch(s8, 1, ctxt->_eip); | 3346 | op->val = insn_fetch(s8, ctxt); |
3313 | break; | 3347 | break; |
3314 | case 2: | 3348 | case 2: |
3315 | op->val = insn_fetch(s16, 2, ctxt->_eip); | 3349 | op->val = insn_fetch(s16, ctxt); |
3316 | break; | 3350 | break; |
3317 | case 4: | 3351 | case 4: |
3318 | op->val = insn_fetch(s32, 4, ctxt->_eip); | 3352 | op->val = insn_fetch(s32, ctxt); |
3319 | break; | 3353 | break; |
3320 | } | 3354 | } |
3321 | if (!sign_extension) { | 3355 | if (!sign_extension) { |
@@ -3335,6 +3369,125 @@ done: | |||
3335 | return rc; | 3369 | return rc; |
3336 | } | 3370 | } |
3337 | 3371 | ||
3372 | static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | ||
3373 | unsigned d) | ||
3374 | { | ||
3375 | int rc = X86EMUL_CONTINUE; | ||
3376 | |||
3377 | switch (d) { | ||
3378 | case OpReg: | ||
3379 | decode_register_operand(ctxt, op, | ||
3380 | op == &ctxt->dst && | ||
3381 | ctxt->twobyte && (ctxt->b == 0xb6 || ctxt->b == 0xb7)); | ||
3382 | break; | ||
3383 | case OpImmUByte: | ||
3384 | rc = decode_imm(ctxt, op, 1, false); | ||
3385 | break; | ||
3386 | case OpMem: | ||
3387 | ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | ||
3388 | mem_common: | ||
3389 | *op = ctxt->memop; | ||
3390 | ctxt->memopp = op; | ||
3391 | if ((ctxt->d & BitOp) && op == &ctxt->dst) | ||
3392 | fetch_bit_operand(ctxt); | ||
3393 | op->orig_val = op->val; | ||
3394 | break; | ||
3395 | case OpMem64: | ||
3396 | ctxt->memop.bytes = 8; | ||
3397 | goto mem_common; | ||
3398 | case OpAcc: | ||
3399 | op->type = OP_REG; | ||
3400 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | ||
3401 | op->addr.reg = &ctxt->regs[VCPU_REGS_RAX]; | ||
3402 | fetch_register_operand(op); | ||
3403 | op->orig_val = op->val; | ||
3404 | break; | ||
3405 | case OpDI: | ||
3406 | op->type = OP_MEM; | ||
3407 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | ||
3408 | op->addr.mem.ea = | ||
3409 | register_address(ctxt, ctxt->regs[VCPU_REGS_RDI]); | ||
3410 | op->addr.mem.seg = VCPU_SREG_ES; | ||
3411 | op->val = 0; | ||
3412 | break; | ||
3413 | case OpDX: | ||
3414 | op->type = OP_REG; | ||
3415 | op->bytes = 2; | ||
3416 | op->addr.reg = &ctxt->regs[VCPU_REGS_RDX]; | ||
3417 | fetch_register_operand(op); | ||
3418 | break; | ||
3419 | case OpCL: | ||
3420 | op->bytes = 1; | ||
3421 | op->val = ctxt->regs[VCPU_REGS_RCX] & 0xff; | ||
3422 | break; | ||
3423 | case OpImmByte: | ||
3424 | rc = decode_imm(ctxt, op, 1, true); | ||
3425 | break; | ||
3426 | case OpOne: | ||
3427 | op->bytes = 1; | ||
3428 | op->val = 1; | ||
3429 | break; | ||
3430 | case OpImm: | ||
3431 | rc = decode_imm(ctxt, op, imm_size(ctxt), true); | ||
3432 | break; | ||
3433 | case OpMem16: | ||
3434 | ctxt->memop.bytes = 2; | ||
3435 | goto mem_common; | ||
3436 | case OpMem32: | ||
3437 | ctxt->memop.bytes = 4; | ||
3438 | goto mem_common; | ||
3439 | case OpImmU16: | ||
3440 | rc = decode_imm(ctxt, op, 2, false); | ||
3441 | break; | ||
3442 | case OpImmU: | ||
3443 | rc = decode_imm(ctxt, op, imm_size(ctxt), false); | ||
3444 | break; | ||
3445 | case OpSI: | ||
3446 | op->type = OP_MEM; | ||
3447 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | ||
3448 | op->addr.mem.ea = | ||
3449 | register_address(ctxt, ctxt->regs[VCPU_REGS_RSI]); | ||
3450 | op->addr.mem.seg = seg_override(ctxt); | ||
3451 | op->val = 0; | ||
3452 | break; | ||
3453 | case OpImmFAddr: | ||
3454 | op->type = OP_IMM; | ||
3455 | op->addr.mem.ea = ctxt->_eip; | ||
3456 | op->bytes = ctxt->op_bytes + 2; | ||
3457 | insn_fetch_arr(op->valptr, op->bytes, ctxt); | ||
3458 | break; | ||
3459 | case OpMemFAddr: | ||
3460 | ctxt->memop.bytes = ctxt->op_bytes + 2; | ||
3461 | goto mem_common; | ||
3462 | case OpES: | ||
3463 | op->val = VCPU_SREG_ES; | ||
3464 | break; | ||
3465 | case OpCS: | ||
3466 | op->val = VCPU_SREG_CS; | ||
3467 | break; | ||
3468 | case OpSS: | ||
3469 | op->val = VCPU_SREG_SS; | ||
3470 | break; | ||
3471 | case OpDS: | ||
3472 | op->val = VCPU_SREG_DS; | ||
3473 | break; | ||
3474 | case OpFS: | ||
3475 | op->val = VCPU_SREG_FS; | ||
3476 | break; | ||
3477 | case OpGS: | ||
3478 | op->val = VCPU_SREG_GS; | ||
3479 | break; | ||
3480 | case OpImplicit: | ||
3481 | /* Special instructions do their own operand decoding. */ | ||
3482 | default: | ||
3483 | op->type = OP_NONE; /* Disable writeback. */ | ||
3484 | break; | ||
3485 | } | ||
3486 | |||
3487 | done: | ||
3488 | return rc; | ||
3489 | } | ||
3490 | |||
3338 | int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) | 3491 | int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) |
3339 | { | 3492 | { |
3340 | int rc = X86EMUL_CONTINUE; | 3493 | int rc = X86EMUL_CONTINUE; |
@@ -3342,8 +3495,9 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) | |||
3342 | int def_op_bytes, def_ad_bytes, goffset, simd_prefix; | 3495 | int def_op_bytes, def_ad_bytes, goffset, simd_prefix; |
3343 | bool op_prefix = false; | 3496 | bool op_prefix = false; |
3344 | struct opcode opcode; | 3497 | struct opcode opcode; |
3345 | struct operand memop = { .type = OP_NONE }, *memopp = NULL; | ||
3346 | 3498 | ||
3499 | ctxt->memop.type = OP_NONE; | ||
3500 | ctxt->memopp = NULL; | ||
3347 | ctxt->_eip = ctxt->eip; | 3501 | ctxt->_eip = ctxt->eip; |
3348 | ctxt->fetch.start = ctxt->_eip; | 3502 | ctxt->fetch.start = ctxt->_eip; |
3349 | ctxt->fetch.end = ctxt->fetch.start + insn_len; | 3503 | ctxt->fetch.end = ctxt->fetch.start + insn_len; |
@@ -3366,7 +3520,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) | |||
3366 | break; | 3520 | break; |
3367 | #endif | 3521 | #endif |
3368 | default: | 3522 | default: |
3369 | return -1; | 3523 | return EMULATION_FAILED; |
3370 | } | 3524 | } |
3371 | 3525 | ||
3372 | ctxt->op_bytes = def_op_bytes; | 3526 | ctxt->op_bytes = def_op_bytes; |
@@ -3374,7 +3528,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) | |||
3374 | 3528 | ||
3375 | /* Legacy prefixes. */ | 3529 | /* Legacy prefixes. */ |
3376 | for (;;) { | 3530 | for (;;) { |
3377 | switch (ctxt->b = insn_fetch(u8, 1, ctxt->_eip)) { | 3531 | switch (ctxt->b = insn_fetch(u8, ctxt)) { |
3378 | case 0x66: /* operand-size override */ | 3532 | case 0x66: /* operand-size override */ |
3379 | op_prefix = true; | 3533 | op_prefix = true; |
3380 | /* switch between 2/4 bytes */ | 3534 | /* switch between 2/4 bytes */ |
@@ -3430,7 +3584,7 @@ done_prefixes: | |||
3430 | /* Two-byte opcode? */ | 3584 | /* Two-byte opcode? */ |
3431 | if (ctxt->b == 0x0f) { | 3585 | if (ctxt->b == 0x0f) { |
3432 | ctxt->twobyte = 1; | 3586 | ctxt->twobyte = 1; |
3433 | ctxt->b = insn_fetch(u8, 1, ctxt->_eip); | 3587 | ctxt->b = insn_fetch(u8, ctxt); |
3434 | opcode = twobyte_table[ctxt->b]; | 3588 | opcode = twobyte_table[ctxt->b]; |
3435 | } | 3589 | } |
3436 | ctxt->d = opcode.flags; | 3590 | ctxt->d = opcode.flags; |
@@ -3438,13 +3592,13 @@ done_prefixes: | |||
3438 | while (ctxt->d & GroupMask) { | 3592 | while (ctxt->d & GroupMask) { |
3439 | switch (ctxt->d & GroupMask) { | 3593 | switch (ctxt->d & GroupMask) { |
3440 | case Group: | 3594 | case Group: |
3441 | ctxt->modrm = insn_fetch(u8, 1, ctxt->_eip); | 3595 | ctxt->modrm = insn_fetch(u8, ctxt); |
3442 | --ctxt->_eip; | 3596 | --ctxt->_eip; |
3443 | goffset = (ctxt->modrm >> 3) & 7; | 3597 | goffset = (ctxt->modrm >> 3) & 7; |
3444 | opcode = opcode.u.group[goffset]; | 3598 | opcode = opcode.u.group[goffset]; |
3445 | break; | 3599 | break; |
3446 | case GroupDual: | 3600 | case GroupDual: |
3447 | ctxt->modrm = insn_fetch(u8, 1, ctxt->_eip); | 3601 | ctxt->modrm = insn_fetch(u8, ctxt); |
3448 | --ctxt->_eip; | 3602 | --ctxt->_eip; |
3449 | goffset = (ctxt->modrm >> 3) & 7; | 3603 | goffset = (ctxt->modrm >> 3) & 7; |
3450 | if ((ctxt->modrm >> 6) == 3) | 3604 | if ((ctxt->modrm >> 6) == 3) |
@@ -3458,7 +3612,7 @@ done_prefixes: | |||
3458 | break; | 3612 | break; |
3459 | case Prefix: | 3613 | case Prefix: |
3460 | if (ctxt->rep_prefix && op_prefix) | 3614 | if (ctxt->rep_prefix && op_prefix) |
3461 | return X86EMUL_UNHANDLEABLE; | 3615 | return EMULATION_FAILED; |
3462 | simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix; | 3616 | simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix; |
3463 | switch (simd_prefix) { | 3617 | switch (simd_prefix) { |
3464 | case 0x00: opcode = opcode.u.gprefix->pfx_no; break; | 3618 | case 0x00: opcode = opcode.u.gprefix->pfx_no; break; |
@@ -3468,10 +3622,10 @@ done_prefixes: | |||
3468 | } | 3622 | } |
3469 | break; | 3623 | break; |
3470 | default: | 3624 | default: |
3471 | return X86EMUL_UNHANDLEABLE; | 3625 | return EMULATION_FAILED; |
3472 | } | 3626 | } |
3473 | 3627 | ||
3474 | ctxt->d &= ~GroupMask; | 3628 | ctxt->d &= ~(u64)GroupMask; |
3475 | ctxt->d |= opcode.flags; | 3629 | ctxt->d |= opcode.flags; |
3476 | } | 3630 | } |
3477 | 3631 | ||
@@ -3481,10 +3635,10 @@ done_prefixes: | |||
3481 | 3635 | ||
3482 | /* Unrecognised? */ | 3636 | /* Unrecognised? */ |
3483 | if (ctxt->d == 0 || (ctxt->d & Undefined)) | 3637 | if (ctxt->d == 0 || (ctxt->d & Undefined)) |
3484 | return -1; | 3638 | return EMULATION_FAILED; |
3485 | 3639 | ||
3486 | if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn) | 3640 | if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn) |
3487 | return -1; | 3641 | return EMULATION_FAILED; |
3488 | 3642 | ||
3489 | if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) | 3643 | if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) |
3490 | ctxt->op_bytes = 8; | 3644 | ctxt->op_bytes = 8; |
@@ -3501,96 +3655,27 @@ done_prefixes: | |||
3501 | 3655 | ||
3502 | /* ModRM and SIB bytes. */ | 3656 | /* ModRM and SIB bytes. */ |
3503 | if (ctxt->d & ModRM) { | 3657 | if (ctxt->d & ModRM) { |
3504 | rc = decode_modrm(ctxt, &memop); | 3658 | rc = decode_modrm(ctxt, &ctxt->memop); |
3505 | if (!ctxt->has_seg_override) | 3659 | if (!ctxt->has_seg_override) |
3506 | set_seg_override(ctxt, ctxt->modrm_seg); | 3660 | set_seg_override(ctxt, ctxt->modrm_seg); |
3507 | } else if (ctxt->d & MemAbs) | 3661 | } else if (ctxt->d & MemAbs) |
3508 | rc = decode_abs(ctxt, &memop); | 3662 | rc = decode_abs(ctxt, &ctxt->memop); |
3509 | if (rc != X86EMUL_CONTINUE) | 3663 | if (rc != X86EMUL_CONTINUE) |
3510 | goto done; | 3664 | goto done; |
3511 | 3665 | ||
3512 | if (!ctxt->has_seg_override) | 3666 | if (!ctxt->has_seg_override) |
3513 | set_seg_override(ctxt, VCPU_SREG_DS); | 3667 | set_seg_override(ctxt, VCPU_SREG_DS); |
3514 | 3668 | ||
3515 | memop.addr.mem.seg = seg_override(ctxt); | 3669 | ctxt->memop.addr.mem.seg = seg_override(ctxt); |
3516 | 3670 | ||
3517 | if (memop.type == OP_MEM && ctxt->ad_bytes != 8) | 3671 | if (ctxt->memop.type == OP_MEM && ctxt->ad_bytes != 8) |
3518 | memop.addr.mem.ea = (u32)memop.addr.mem.ea; | 3672 | ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea; |
3519 | 3673 | ||
3520 | /* | 3674 | /* |
3521 | * Decode and fetch the source operand: register, memory | 3675 | * Decode and fetch the source operand: register, memory |
3522 | * or immediate. | 3676 | * or immediate. |
3523 | */ | 3677 | */ |
3524 | switch (ctxt->d & SrcMask) { | 3678 | rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask); |
3525 | case SrcNone: | ||
3526 | break; | ||
3527 | case SrcReg: | ||
3528 | decode_register_operand(ctxt, &ctxt->src, 0); | ||
3529 | break; | ||
3530 | case SrcMem16: | ||
3531 | memop.bytes = 2; | ||
3532 | goto srcmem_common; | ||
3533 | case SrcMem32: | ||
3534 | memop.bytes = 4; | ||
3535 | goto srcmem_common; | ||
3536 | case SrcMem: | ||
3537 | memop.bytes = (ctxt->d & ByteOp) ? 1 : | ||
3538 | ctxt->op_bytes; | ||
3539 | srcmem_common: | ||
3540 | ctxt->src = memop; | ||
3541 | memopp = &ctxt->src; | ||
3542 | break; | ||
3543 | case SrcImmU16: | ||
3544 | rc = decode_imm(ctxt, &ctxt->src, 2, false); | ||
3545 | break; | ||
3546 | case SrcImm: | ||
3547 | rc = decode_imm(ctxt, &ctxt->src, imm_size(ctxt), true); | ||
3548 | break; | ||
3549 | case SrcImmU: | ||
3550 | rc = decode_imm(ctxt, &ctxt->src, imm_size(ctxt), false); | ||
3551 | break; | ||
3552 | case SrcImmByte: | ||
3553 | rc = decode_imm(ctxt, &ctxt->src, 1, true); | ||
3554 | break; | ||
3555 | case SrcImmUByte: | ||
3556 | rc = decode_imm(ctxt, &ctxt->src, 1, false); | ||
3557 | break; | ||
3558 | case SrcAcc: | ||
3559 | ctxt->src.type = OP_REG; | ||
3560 | ctxt->src.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | ||
3561 | ctxt->src.addr.reg = &ctxt->regs[VCPU_REGS_RAX]; | ||
3562 | fetch_register_operand(&ctxt->src); | ||
3563 | break; | ||
3564 | case SrcOne: | ||
3565 | ctxt->src.bytes = 1; | ||
3566 | ctxt->src.val = 1; | ||
3567 | break; | ||
3568 | case SrcSI: | ||
3569 | ctxt->src.type = OP_MEM; | ||
3570 | ctxt->src.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | ||
3571 | ctxt->src.addr.mem.ea = | ||
3572 | register_address(ctxt, ctxt->regs[VCPU_REGS_RSI]); | ||
3573 | ctxt->src.addr.mem.seg = seg_override(ctxt); | ||
3574 | ctxt->src.val = 0; | ||
3575 | break; | ||
3576 | case SrcImmFAddr: | ||
3577 | ctxt->src.type = OP_IMM; | ||
3578 | ctxt->src.addr.mem.ea = ctxt->_eip; | ||
3579 | ctxt->src.bytes = ctxt->op_bytes + 2; | ||
3580 | insn_fetch_arr(ctxt->src.valptr, ctxt->src.bytes, ctxt->_eip); | ||
3581 | break; | ||
3582 | case SrcMemFAddr: | ||
3583 | memop.bytes = ctxt->op_bytes + 2; | ||
3584 | goto srcmem_common; | ||
3585 | break; | ||
3586 | case SrcDX: | ||
3587 | ctxt->src.type = OP_REG; | ||
3588 | ctxt->src.bytes = 2; | ||
3589 | ctxt->src.addr.reg = &ctxt->regs[VCPU_REGS_RDX]; | ||
3590 | fetch_register_operand(&ctxt->src); | ||
3591 | break; | ||
3592 | } | ||
3593 | |||
3594 | if (rc != X86EMUL_CONTINUE) | 3679 | if (rc != X86EMUL_CONTINUE) |
3595 | goto done; | 3680 | goto done; |
3596 | 3681 | ||
@@ -3598,85 +3683,18 @@ done_prefixes: | |||
3598 | * Decode and fetch the second source operand: register, memory | 3683 | * Decode and fetch the second source operand: register, memory |
3599 | * or immediate. | 3684 | * or immediate. |
3600 | */ | 3685 | */ |
3601 | switch (ctxt->d & Src2Mask) { | 3686 | rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask); |
3602 | case Src2None: | ||
3603 | break; | ||
3604 | case Src2CL: | ||
3605 | ctxt->src2.bytes = 1; | ||
3606 | ctxt->src2.val = ctxt->regs[VCPU_REGS_RCX] & 0xff; | ||
3607 | break; | ||
3608 | case Src2ImmByte: | ||
3609 | rc = decode_imm(ctxt, &ctxt->src2, 1, true); | ||
3610 | break; | ||
3611 | case Src2One: | ||
3612 | ctxt->src2.bytes = 1; | ||
3613 | ctxt->src2.val = 1; | ||
3614 | break; | ||
3615 | case Src2Imm: | ||
3616 | rc = decode_imm(ctxt, &ctxt->src2, imm_size(ctxt), true); | ||
3617 | break; | ||
3618 | } | ||
3619 | |||
3620 | if (rc != X86EMUL_CONTINUE) | 3687 | if (rc != X86EMUL_CONTINUE) |
3621 | goto done; | 3688 | goto done; |
3622 | 3689 | ||
3623 | /* Decode and fetch the destination operand: register or memory. */ | 3690 | /* Decode and fetch the destination operand: register or memory. */ |
3624 | switch (ctxt->d & DstMask) { | 3691 | rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); |
3625 | case DstReg: | ||
3626 | decode_register_operand(ctxt, &ctxt->dst, | ||
3627 | ctxt->twobyte && (ctxt->b == 0xb6 || ctxt->b == 0xb7)); | ||
3628 | break; | ||
3629 | case DstImmUByte: | ||
3630 | ctxt->dst.type = OP_IMM; | ||
3631 | ctxt->dst.addr.mem.ea = ctxt->_eip; | ||
3632 | ctxt->dst.bytes = 1; | ||
3633 | ctxt->dst.val = insn_fetch(u8, 1, ctxt->_eip); | ||
3634 | break; | ||
3635 | case DstMem: | ||
3636 | case DstMem64: | ||
3637 | ctxt->dst = memop; | ||
3638 | memopp = &ctxt->dst; | ||
3639 | if ((ctxt->d & DstMask) == DstMem64) | ||
3640 | ctxt->dst.bytes = 8; | ||
3641 | else | ||
3642 | ctxt->dst.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | ||
3643 | if (ctxt->d & BitOp) | ||
3644 | fetch_bit_operand(ctxt); | ||
3645 | ctxt->dst.orig_val = ctxt->dst.val; | ||
3646 | break; | ||
3647 | case DstAcc: | ||
3648 | ctxt->dst.type = OP_REG; | ||
3649 | ctxt->dst.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | ||
3650 | ctxt->dst.addr.reg = &ctxt->regs[VCPU_REGS_RAX]; | ||
3651 | fetch_register_operand(&ctxt->dst); | ||
3652 | ctxt->dst.orig_val = ctxt->dst.val; | ||
3653 | break; | ||
3654 | case DstDI: | ||
3655 | ctxt->dst.type = OP_MEM; | ||
3656 | ctxt->dst.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | ||
3657 | ctxt->dst.addr.mem.ea = | ||
3658 | register_address(ctxt, ctxt->regs[VCPU_REGS_RDI]); | ||
3659 | ctxt->dst.addr.mem.seg = VCPU_SREG_ES; | ||
3660 | ctxt->dst.val = 0; | ||
3661 | break; | ||
3662 | case DstDX: | ||
3663 | ctxt->dst.type = OP_REG; | ||
3664 | ctxt->dst.bytes = 2; | ||
3665 | ctxt->dst.addr.reg = &ctxt->regs[VCPU_REGS_RDX]; | ||
3666 | fetch_register_operand(&ctxt->dst); | ||
3667 | break; | ||
3668 | case ImplicitOps: | ||
3669 | /* Special instructions do their own operand decoding. */ | ||
3670 | default: | ||
3671 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ | ||
3672 | break; | ||
3673 | } | ||
3674 | 3692 | ||
3675 | done: | 3693 | done: |
3676 | if (memopp && memopp->type == OP_MEM && ctxt->rip_relative) | 3694 | if (ctxt->memopp && ctxt->memopp->type == OP_MEM && ctxt->rip_relative) |
3677 | memopp->addr.mem.ea += ctxt->_eip; | 3695 | ctxt->memopp->addr.mem.ea += ctxt->_eip; |
3678 | 3696 | ||
3679 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; | 3697 | return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; |
3680 | } | 3698 | } |
3681 | 3699 | ||
3682 | static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) | 3700 | static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) |
@@ -3825,32 +3843,11 @@ special_insn: | |||
3825 | goto twobyte_insn; | 3843 | goto twobyte_insn; |
3826 | 3844 | ||
3827 | switch (ctxt->b) { | 3845 | switch (ctxt->b) { |
3828 | case 0x06: /* push es */ | ||
3829 | rc = emulate_push_sreg(ctxt, VCPU_SREG_ES); | ||
3830 | break; | ||
3831 | case 0x07: /* pop es */ | ||
3832 | rc = emulate_pop_sreg(ctxt, VCPU_SREG_ES); | ||
3833 | break; | ||
3834 | case 0x0e: /* push cs */ | ||
3835 | rc = emulate_push_sreg(ctxt, VCPU_SREG_CS); | ||
3836 | break; | ||
3837 | case 0x16: /* push ss */ | ||
3838 | rc = emulate_push_sreg(ctxt, VCPU_SREG_SS); | ||
3839 | break; | ||
3840 | case 0x17: /* pop ss */ | ||
3841 | rc = emulate_pop_sreg(ctxt, VCPU_SREG_SS); | ||
3842 | break; | ||
3843 | case 0x1e: /* push ds */ | ||
3844 | rc = emulate_push_sreg(ctxt, VCPU_SREG_DS); | ||
3845 | break; | ||
3846 | case 0x1f: /* pop ds */ | ||
3847 | rc = emulate_pop_sreg(ctxt, VCPU_SREG_DS); | ||
3848 | break; | ||
3849 | case 0x40 ... 0x47: /* inc r16/r32 */ | 3846 | case 0x40 ... 0x47: /* inc r16/r32 */ |
3850 | emulate_1op("inc", ctxt->dst, ctxt->eflags); | 3847 | emulate_1op(ctxt, "inc"); |
3851 | break; | 3848 | break; |
3852 | case 0x48 ... 0x4f: /* dec r16/r32 */ | 3849 | case 0x48 ... 0x4f: /* dec r16/r32 */ |
3853 | emulate_1op("dec", ctxt->dst, ctxt->eflags); | 3850 | emulate_1op(ctxt, "dec"); |
3854 | break; | 3851 | break; |
3855 | case 0x63: /* movsxd */ | 3852 | case 0x63: /* movsxd */ |
3856 | if (ctxt->mode != X86EMUL_MODE_PROT64) | 3853 | if (ctxt->mode != X86EMUL_MODE_PROT64) |
@@ -3891,12 +3888,6 @@ special_insn: | |||
3891 | case 0xc0 ... 0xc1: | 3888 | case 0xc0 ... 0xc1: |
3892 | rc = em_grp2(ctxt); | 3889 | rc = em_grp2(ctxt); |
3893 | break; | 3890 | break; |
3894 | case 0xc4: /* les */ | ||
3895 | rc = emulate_load_segment(ctxt, VCPU_SREG_ES); | ||
3896 | break; | ||
3897 | case 0xc5: /* lds */ | ||
3898 | rc = emulate_load_segment(ctxt, VCPU_SREG_DS); | ||
3899 | break; | ||
3900 | case 0xcc: /* int3 */ | 3891 | case 0xcc: /* int3 */ |
3901 | rc = emulate_int(ctxt, 3); | 3892 | rc = emulate_int(ctxt, 3); |
3902 | break; | 3893 | break; |
@@ -3953,9 +3944,6 @@ special_insn: | |||
3953 | /* complement carry flag from eflags reg */ | 3944 | /* complement carry flag from eflags reg */ |
3954 | ctxt->eflags ^= EFLG_CF; | 3945 | ctxt->eflags ^= EFLG_CF; |
3955 | break; | 3946 | break; |
3956 | case 0xf6 ... 0xf7: /* Grp3 */ | ||
3957 | rc = em_grp3(ctxt); | ||
3958 | break; | ||
3959 | case 0xf8: /* clc */ | 3947 | case 0xf8: /* clc */ |
3960 | ctxt->eflags &= ~EFLG_CF; | 3948 | ctxt->eflags &= ~EFLG_CF; |
3961 | break; | 3949 | break; |
@@ -4103,36 +4091,24 @@ twobyte_insn: | |||
4103 | case 0x90 ... 0x9f: /* setcc r/m8 */ | 4091 | case 0x90 ... 0x9f: /* setcc r/m8 */ |
4104 | ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); | 4092 | ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); |
4105 | break; | 4093 | break; |
4106 | case 0xa0: /* push fs */ | ||
4107 | rc = emulate_push_sreg(ctxt, VCPU_SREG_FS); | ||
4108 | break; | ||
4109 | case 0xa1: /* pop fs */ | ||
4110 | rc = emulate_pop_sreg(ctxt, VCPU_SREG_FS); | ||
4111 | break; | ||
4112 | case 0xa3: | 4094 | case 0xa3: |
4113 | bt: /* bt */ | 4095 | bt: /* bt */ |
4114 | ctxt->dst.type = OP_NONE; | 4096 | ctxt->dst.type = OP_NONE; |
4115 | /* only subword offset */ | 4097 | /* only subword offset */ |
4116 | ctxt->src.val &= (ctxt->dst.bytes << 3) - 1; | 4098 | ctxt->src.val &= (ctxt->dst.bytes << 3) - 1; |
4117 | emulate_2op_SrcV_nobyte("bt", ctxt->src, ctxt->dst, ctxt->eflags); | 4099 | emulate_2op_SrcV_nobyte(ctxt, "bt"); |
4118 | break; | 4100 | break; |
4119 | case 0xa4: /* shld imm8, r, r/m */ | 4101 | case 0xa4: /* shld imm8, r, r/m */ |
4120 | case 0xa5: /* shld cl, r, r/m */ | 4102 | case 0xa5: /* shld cl, r, r/m */ |
4121 | emulate_2op_cl("shld", ctxt->src2, ctxt->src, ctxt->dst, ctxt->eflags); | 4103 | emulate_2op_cl(ctxt, "shld"); |
4122 | break; | ||
4123 | case 0xa8: /* push gs */ | ||
4124 | rc = emulate_push_sreg(ctxt, VCPU_SREG_GS); | ||
4125 | break; | ||
4126 | case 0xa9: /* pop gs */ | ||
4127 | rc = emulate_pop_sreg(ctxt, VCPU_SREG_GS); | ||
4128 | break; | 4104 | break; |
4129 | case 0xab: | 4105 | case 0xab: |
4130 | bts: /* bts */ | 4106 | bts: /* bts */ |
4131 | emulate_2op_SrcV_nobyte("bts", ctxt->src, ctxt->dst, ctxt->eflags); | 4107 | emulate_2op_SrcV_nobyte(ctxt, "bts"); |
4132 | break; | 4108 | break; |
4133 | case 0xac: /* shrd imm8, r, r/m */ | 4109 | case 0xac: /* shrd imm8, r, r/m */ |
4134 | case 0xad: /* shrd cl, r, r/m */ | 4110 | case 0xad: /* shrd cl, r, r/m */ |
4135 | emulate_2op_cl("shrd", ctxt->src2, ctxt->src, ctxt->dst, ctxt->eflags); | 4111 | emulate_2op_cl(ctxt, "shrd"); |
4136 | break; | 4112 | break; |
4137 | case 0xae: /* clflush */ | 4113 | case 0xae: /* clflush */ |
4138 | break; | 4114 | break; |
@@ -4143,7 +4119,7 @@ twobyte_insn: | |||
4143 | */ | 4119 | */ |
4144 | ctxt->src.orig_val = ctxt->src.val; | 4120 | ctxt->src.orig_val = ctxt->src.val; |
4145 | ctxt->src.val = ctxt->regs[VCPU_REGS_RAX]; | 4121 | ctxt->src.val = ctxt->regs[VCPU_REGS_RAX]; |
4146 | emulate_2op_SrcV("cmp", ctxt->src, ctxt->dst, ctxt->eflags); | 4122 | emulate_2op_SrcV(ctxt, "cmp"); |
4147 | if (ctxt->eflags & EFLG_ZF) { | 4123 | if (ctxt->eflags & EFLG_ZF) { |
4148 | /* Success: write back to memory. */ | 4124 | /* Success: write back to memory. */ |
4149 | ctxt->dst.val = ctxt->src.orig_val; | 4125 | ctxt->dst.val = ctxt->src.orig_val; |
@@ -4153,18 +4129,9 @@ twobyte_insn: | |||
4153 | ctxt->dst.addr.reg = (unsigned long *)&ctxt->regs[VCPU_REGS_RAX]; | 4129 | ctxt->dst.addr.reg = (unsigned long *)&ctxt->regs[VCPU_REGS_RAX]; |
4154 | } | 4130 | } |
4155 | break; | 4131 | break; |
4156 | case 0xb2: /* lss */ | ||
4157 | rc = emulate_load_segment(ctxt, VCPU_SREG_SS); | ||
4158 | break; | ||
4159 | case 0xb3: | 4132 | case 0xb3: |
4160 | btr: /* btr */ | 4133 | btr: /* btr */ |
4161 | emulate_2op_SrcV_nobyte("btr", ctxt->src, ctxt->dst, ctxt->eflags); | 4134 | emulate_2op_SrcV_nobyte(ctxt, "btr"); |
4162 | break; | ||
4163 | case 0xb4: /* lfs */ | ||
4164 | rc = emulate_load_segment(ctxt, VCPU_SREG_FS); | ||
4165 | break; | ||
4166 | case 0xb5: /* lgs */ | ||
4167 | rc = emulate_load_segment(ctxt, VCPU_SREG_GS); | ||
4168 | break; | 4135 | break; |
4169 | case 0xb6 ... 0xb7: /* movzx */ | 4136 | case 0xb6 ... 0xb7: /* movzx */ |
4170 | ctxt->dst.bytes = ctxt->op_bytes; | 4137 | ctxt->dst.bytes = ctxt->op_bytes; |
@@ -4185,7 +4152,7 @@ twobyte_insn: | |||
4185 | break; | 4152 | break; |
4186 | case 0xbb: | 4153 | case 0xbb: |
4187 | btc: /* btc */ | 4154 | btc: /* btc */ |
4188 | emulate_2op_SrcV_nobyte("btc", ctxt->src, ctxt->dst, ctxt->eflags); | 4155 | emulate_2op_SrcV_nobyte(ctxt, "btc"); |
4189 | break; | 4156 | break; |
4190 | case 0xbc: { /* bsf */ | 4157 | case 0xbc: { /* bsf */ |
4191 | u8 zf; | 4158 | u8 zf; |
@@ -4217,7 +4184,7 @@ twobyte_insn: | |||
4217 | (s16) ctxt->src.val; | 4184 | (s16) ctxt->src.val; |
4218 | break; | 4185 | break; |
4219 | case 0xc0 ... 0xc1: /* xadd */ | 4186 | case 0xc0 ... 0xc1: /* xadd */ |
4220 | emulate_2op_SrcV("add", ctxt->src, ctxt->dst, ctxt->eflags); | 4187 | emulate_2op_SrcV(ctxt, "add"); |
4221 | /* Write back the register source. */ | 4188 | /* Write back the register source. */ |
4222 | ctxt->src.val = ctxt->dst.orig_val; | 4189 | ctxt->src.val = ctxt->dst.orig_val; |
4223 | write_register_operand(&ctxt->src); | 4190 | write_register_operand(&ctxt->src); |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index efad72385058..76e3f1cd0369 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -713,14 +713,16 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | |||
713 | kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); | 713 | kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); |
714 | 714 | ||
715 | kvm_iodevice_init(&pit->dev, &pit_dev_ops); | 715 | kvm_iodevice_init(&pit->dev, &pit_dev_ops); |
716 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev); | 716 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS, |
717 | KVM_PIT_MEM_LENGTH, &pit->dev); | ||
717 | if (ret < 0) | 718 | if (ret < 0) |
718 | goto fail; | 719 | goto fail; |
719 | 720 | ||
720 | if (flags & KVM_PIT_SPEAKER_DUMMY) { | 721 | if (flags & KVM_PIT_SPEAKER_DUMMY) { |
721 | kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops); | 722 | kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops); |
722 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, | 723 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, |
723 | &pit->speaker_dev); | 724 | KVM_SPEAKER_BASE_ADDRESS, 4, |
725 | &pit->speaker_dev); | ||
724 | if (ret < 0) | 726 | if (ret < 0) |
725 | goto fail_unregister; | 727 | goto fail_unregister; |
726 | } | 728 | } |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 19fe855e7953..cac4746d7ffb 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -34,6 +34,9 @@ | |||
34 | #include <linux/kvm_host.h> | 34 | #include <linux/kvm_host.h> |
35 | #include "trace.h" | 35 | #include "trace.h" |
36 | 36 | ||
37 | #define pr_pic_unimpl(fmt, ...) \ | ||
38 | pr_err_ratelimited("kvm: pic: " fmt, ## __VA_ARGS__) | ||
39 | |||
37 | static void pic_irq_request(struct kvm *kvm, int level); | 40 | static void pic_irq_request(struct kvm *kvm, int level); |
38 | 41 | ||
39 | static void pic_lock(struct kvm_pic *s) | 42 | static void pic_lock(struct kvm_pic *s) |
@@ -306,10 +309,10 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) | |||
306 | } | 309 | } |
307 | s->init_state = 1; | 310 | s->init_state = 1; |
308 | if (val & 0x02) | 311 | if (val & 0x02) |
309 | printk(KERN_ERR "single mode not supported"); | 312 | pr_pic_unimpl("single mode not supported"); |
310 | if (val & 0x08) | 313 | if (val & 0x08) |
311 | printk(KERN_ERR | 314 | pr_pic_unimpl( |
312 | "level sensitive irq not supported"); | 315 | "level sensitive irq not supported"); |
313 | } else if (val & 0x08) { | 316 | } else if (val & 0x08) { |
314 | if (val & 0x04) | 317 | if (val & 0x04) |
315 | s->poll = 1; | 318 | s->poll = 1; |
@@ -459,22 +462,15 @@ static int picdev_in_range(gpa_t addr) | |||
459 | } | 462 | } |
460 | } | 463 | } |
461 | 464 | ||
462 | static inline struct kvm_pic *to_pic(struct kvm_io_device *dev) | 465 | static int picdev_write(struct kvm_pic *s, |
463 | { | ||
464 | return container_of(dev, struct kvm_pic, dev); | ||
465 | } | ||
466 | |||
467 | static int picdev_write(struct kvm_io_device *this, | ||
468 | gpa_t addr, int len, const void *val) | 466 | gpa_t addr, int len, const void *val) |
469 | { | 467 | { |
470 | struct kvm_pic *s = to_pic(this); | ||
471 | unsigned char data = *(unsigned char *)val; | 468 | unsigned char data = *(unsigned char *)val; |
472 | if (!picdev_in_range(addr)) | 469 | if (!picdev_in_range(addr)) |
473 | return -EOPNOTSUPP; | 470 | return -EOPNOTSUPP; |
474 | 471 | ||
475 | if (len != 1) { | 472 | if (len != 1) { |
476 | if (printk_ratelimit()) | 473 | pr_pic_unimpl("non byte write\n"); |
477 | printk(KERN_ERR "PIC: non byte write\n"); | ||
478 | return 0; | 474 | return 0; |
479 | } | 475 | } |
480 | pic_lock(s); | 476 | pic_lock(s); |
@@ -494,17 +490,15 @@ static int picdev_write(struct kvm_io_device *this, | |||
494 | return 0; | 490 | return 0; |
495 | } | 491 | } |
496 | 492 | ||
497 | static int picdev_read(struct kvm_io_device *this, | 493 | static int picdev_read(struct kvm_pic *s, |
498 | gpa_t addr, int len, void *val) | 494 | gpa_t addr, int len, void *val) |
499 | { | 495 | { |
500 | struct kvm_pic *s = to_pic(this); | ||
501 | unsigned char data = 0; | 496 | unsigned char data = 0; |
502 | if (!picdev_in_range(addr)) | 497 | if (!picdev_in_range(addr)) |
503 | return -EOPNOTSUPP; | 498 | return -EOPNOTSUPP; |
504 | 499 | ||
505 | if (len != 1) { | 500 | if (len != 1) { |
506 | if (printk_ratelimit()) | 501 | pr_pic_unimpl("non byte read\n"); |
507 | printk(KERN_ERR "PIC: non byte read\n"); | ||
508 | return 0; | 502 | return 0; |
509 | } | 503 | } |
510 | pic_lock(s); | 504 | pic_lock(s); |
@@ -525,6 +519,48 @@ static int picdev_read(struct kvm_io_device *this, | |||
525 | return 0; | 519 | return 0; |
526 | } | 520 | } |
527 | 521 | ||
522 | static int picdev_master_write(struct kvm_io_device *dev, | ||
523 | gpa_t addr, int len, const void *val) | ||
524 | { | ||
525 | return picdev_write(container_of(dev, struct kvm_pic, dev_master), | ||
526 | addr, len, val); | ||
527 | } | ||
528 | |||
529 | static int picdev_master_read(struct kvm_io_device *dev, | ||
530 | gpa_t addr, int len, void *val) | ||
531 | { | ||
532 | return picdev_read(container_of(dev, struct kvm_pic, dev_master), | ||
533 | addr, len, val); | ||
534 | } | ||
535 | |||
536 | static int picdev_slave_write(struct kvm_io_device *dev, | ||
537 | gpa_t addr, int len, const void *val) | ||
538 | { | ||
539 | return picdev_write(container_of(dev, struct kvm_pic, dev_slave), | ||
540 | addr, len, val); | ||
541 | } | ||
542 | |||
543 | static int picdev_slave_read(struct kvm_io_device *dev, | ||
544 | gpa_t addr, int len, void *val) | ||
545 | { | ||
546 | return picdev_read(container_of(dev, struct kvm_pic, dev_slave), | ||
547 | addr, len, val); | ||
548 | } | ||
549 | |||
550 | static int picdev_eclr_write(struct kvm_io_device *dev, | ||
551 | gpa_t addr, int len, const void *val) | ||
552 | { | ||
553 | return picdev_write(container_of(dev, struct kvm_pic, dev_eclr), | ||
554 | addr, len, val); | ||
555 | } | ||
556 | |||
557 | static int picdev_eclr_read(struct kvm_io_device *dev, | ||
558 | gpa_t addr, int len, void *val) | ||
559 | { | ||
560 | return picdev_read(container_of(dev, struct kvm_pic, dev_eclr), | ||
561 | addr, len, val); | ||
562 | } | ||
563 | |||
528 | /* | 564 | /* |
529 | * callback when PIC0 irq status changed | 565 | * callback when PIC0 irq status changed |
530 | */ | 566 | */ |
@@ -537,9 +573,19 @@ static void pic_irq_request(struct kvm *kvm, int level) | |||
537 | s->output = level; | 573 | s->output = level; |
538 | } | 574 | } |
539 | 575 | ||
540 | static const struct kvm_io_device_ops picdev_ops = { | 576 | static const struct kvm_io_device_ops picdev_master_ops = { |
541 | .read = picdev_read, | 577 | .read = picdev_master_read, |
542 | .write = picdev_write, | 578 | .write = picdev_master_write, |
579 | }; | ||
580 | |||
581 | static const struct kvm_io_device_ops picdev_slave_ops = { | ||
582 | .read = picdev_slave_read, | ||
583 | .write = picdev_slave_write, | ||
584 | }; | ||
585 | |||
586 | static const struct kvm_io_device_ops picdev_eclr_ops = { | ||
587 | .read = picdev_eclr_read, | ||
588 | .write = picdev_eclr_write, | ||
543 | }; | 589 | }; |
544 | 590 | ||
545 | struct kvm_pic *kvm_create_pic(struct kvm *kvm) | 591 | struct kvm_pic *kvm_create_pic(struct kvm *kvm) |
@@ -560,16 +606,39 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) | |||
560 | /* | 606 | /* |
561 | * Initialize PIO device | 607 | * Initialize PIO device |
562 | */ | 608 | */ |
563 | kvm_iodevice_init(&s->dev, &picdev_ops); | 609 | kvm_iodevice_init(&s->dev_master, &picdev_master_ops); |
610 | kvm_iodevice_init(&s->dev_slave, &picdev_slave_ops); | ||
611 | kvm_iodevice_init(&s->dev_eclr, &picdev_eclr_ops); | ||
564 | mutex_lock(&kvm->slots_lock); | 612 | mutex_lock(&kvm->slots_lock); |
565 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev); | 613 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x20, 2, |
614 | &s->dev_master); | ||
615 | if (ret < 0) | ||
616 | goto fail_unlock; | ||
617 | |||
618 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0xa0, 2, &s->dev_slave); | ||
619 | if (ret < 0) | ||
620 | goto fail_unreg_2; | ||
621 | |||
622 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x4d0, 2, &s->dev_eclr); | ||
623 | if (ret < 0) | ||
624 | goto fail_unreg_1; | ||
625 | |||
566 | mutex_unlock(&kvm->slots_lock); | 626 | mutex_unlock(&kvm->slots_lock); |
567 | if (ret < 0) { | ||
568 | kfree(s); | ||
569 | return NULL; | ||
570 | } | ||
571 | 627 | ||
572 | return s; | 628 | return s; |
629 | |||
630 | fail_unreg_1: | ||
631 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_slave); | ||
632 | |||
633 | fail_unreg_2: | ||
634 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_master); | ||
635 | |||
636 | fail_unlock: | ||
637 | mutex_unlock(&kvm->slots_lock); | ||
638 | |||
639 | kfree(s); | ||
640 | |||
641 | return NULL; | ||
573 | } | 642 | } |
574 | 643 | ||
575 | void kvm_destroy_pic(struct kvm *kvm) | 644 | void kvm_destroy_pic(struct kvm *kvm) |
@@ -577,7 +646,9 @@ void kvm_destroy_pic(struct kvm *kvm) | |||
577 | struct kvm_pic *vpic = kvm->arch.vpic; | 646 | struct kvm_pic *vpic = kvm->arch.vpic; |
578 | 647 | ||
579 | if (vpic) { | 648 | if (vpic) { |
580 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev); | 649 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_master); |
650 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_slave); | ||
651 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_eclr); | ||
581 | kvm->arch.vpic = NULL; | 652 | kvm->arch.vpic = NULL; |
582 | kfree(vpic); | 653 | kfree(vpic); |
583 | } | 654 | } |
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 53e2d084bffb..2086f2bfba33 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -66,7 +66,9 @@ struct kvm_pic { | |||
66 | struct kvm *kvm; | 66 | struct kvm *kvm; |
67 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ | 67 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ |
68 | int output; /* intr from master PIC */ | 68 | int output; /* intr from master PIC */ |
69 | struct kvm_io_device dev; | 69 | struct kvm_io_device dev_master; |
70 | struct kvm_io_device dev_slave; | ||
71 | struct kvm_io_device dev_eclr; | ||
70 | void (*ack_notifier)(void *opaque, int irq); | 72 | void (*ack_notifier)(void *opaque, int irq); |
71 | unsigned long irq_states[16]; | 73 | unsigned long irq_states[16]; |
72 | }; | 74 | }; |
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 3377d53fcd36..544076c4f44b 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h | |||
@@ -45,13 +45,6 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) | |||
45 | return vcpu->arch.walk_mmu->pdptrs[index]; | 45 | return vcpu->arch.walk_mmu->pdptrs[index]; |
46 | } | 46 | } |
47 | 47 | ||
48 | static inline u64 kvm_pdptr_read_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, int index) | ||
49 | { | ||
50 | load_pdptrs(vcpu, mmu, mmu->get_cr3(vcpu)); | ||
51 | |||
52 | return mmu->pdptrs[index]; | ||
53 | } | ||
54 | |||
55 | static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask) | 48 | static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask) |
56 | { | 49 | { |
57 | ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS; | 50 | ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS; |
diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h index 64bc6ea78d90..497dbaa366d4 100644 --- a/arch/x86/kvm/kvm_timer.h +++ b/arch/x86/kvm/kvm_timer.h | |||
@@ -2,6 +2,8 @@ | |||
2 | struct kvm_timer { | 2 | struct kvm_timer { |
3 | struct hrtimer timer; | 3 | struct hrtimer timer; |
4 | s64 period; /* unit: ns */ | 4 | s64 period; /* unit: ns */ |
5 | u32 timer_mode_mask; | ||
6 | u64 tscdeadline; | ||
5 | atomic_t pending; /* accumulated triggered timers */ | 7 | atomic_t pending; /* accumulated triggered timers */ |
6 | bool reinject; | 8 | bool reinject; |
7 | struct kvm_timer_ops *t_ops; | 9 | struct kvm_timer_ops *t_ops; |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 57dcbd4308fa..54abb40199d6 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -68,6 +68,9 @@ | |||
68 | #define VEC_POS(v) ((v) & (32 - 1)) | 68 | #define VEC_POS(v) ((v) & (32 - 1)) |
69 | #define REG_POS(v) (((v) >> 5) << 4) | 69 | #define REG_POS(v) (((v) >> 5) << 4) |
70 | 70 | ||
71 | static unsigned int min_timer_period_us = 500; | ||
72 | module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); | ||
73 | |||
71 | static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off) | 74 | static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off) |
72 | { | 75 | { |
73 | return *((u32 *) (apic->regs + reg_off)); | 76 | return *((u32 *) (apic->regs + reg_off)); |
@@ -135,9 +138,23 @@ static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) | |||
135 | return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; | 138 | return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; |
136 | } | 139 | } |
137 | 140 | ||
141 | static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) | ||
142 | { | ||
143 | return ((apic_get_reg(apic, APIC_LVTT) & | ||
144 | apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT); | ||
145 | } | ||
146 | |||
138 | static inline int apic_lvtt_period(struct kvm_lapic *apic) | 147 | static inline int apic_lvtt_period(struct kvm_lapic *apic) |
139 | { | 148 | { |
140 | return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC; | 149 | return ((apic_get_reg(apic, APIC_LVTT) & |
150 | apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC); | ||
151 | } | ||
152 | |||
153 | static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) | ||
154 | { | ||
155 | return ((apic_get_reg(apic, APIC_LVTT) & | ||
156 | apic->lapic_timer.timer_mode_mask) == | ||
157 | APIC_LVT_TIMER_TSCDEADLINE); | ||
141 | } | 158 | } |
142 | 159 | ||
143 | static inline int apic_lvt_nmi_mode(u32 lvt_val) | 160 | static inline int apic_lvt_nmi_mode(u32 lvt_val) |
@@ -166,7 +183,7 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic) | |||
166 | } | 183 | } |
167 | 184 | ||
168 | static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { | 185 | static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { |
169 | LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ | 186 | LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ |
170 | LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ | 187 | LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ |
171 | LVT_MASK | APIC_MODE_MASK, /* LVTPC */ | 188 | LVT_MASK | APIC_MODE_MASK, /* LVTPC */ |
172 | LINT_MASK, LINT_MASK, /* LVT0-1 */ | 189 | LINT_MASK, LINT_MASK, /* LVT0-1 */ |
@@ -316,8 +333,8 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) | |||
316 | result = 1; | 333 | result = 1; |
317 | break; | 334 | break; |
318 | default: | 335 | default: |
319 | printk(KERN_WARNING "Bad DFR vcpu %d: %08x\n", | 336 | apic_debug("Bad DFR vcpu %d: %08x\n", |
320 | apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR)); | 337 | apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR)); |
321 | break; | 338 | break; |
322 | } | 339 | } |
323 | 340 | ||
@@ -354,8 +371,8 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | |||
354 | result = (target != source); | 371 | result = (target != source); |
355 | break; | 372 | break; |
356 | default: | 373 | default: |
357 | printk(KERN_WARNING "Bad dest shorthand value %x\n", | 374 | apic_debug("kvm: apic: Bad dest shorthand value %x\n", |
358 | short_hand); | 375 | short_hand); |
359 | break; | 376 | break; |
360 | } | 377 | } |
361 | 378 | ||
@@ -401,11 +418,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
401 | break; | 418 | break; |
402 | 419 | ||
403 | case APIC_DM_REMRD: | 420 | case APIC_DM_REMRD: |
404 | printk(KERN_DEBUG "Ignoring delivery mode 3\n"); | 421 | apic_debug("Ignoring delivery mode 3\n"); |
405 | break; | 422 | break; |
406 | 423 | ||
407 | case APIC_DM_SMI: | 424 | case APIC_DM_SMI: |
408 | printk(KERN_DEBUG "Ignoring guest SMI\n"); | 425 | apic_debug("Ignoring guest SMI\n"); |
409 | break; | 426 | break; |
410 | 427 | ||
411 | case APIC_DM_NMI: | 428 | case APIC_DM_NMI: |
@@ -565,11 +582,13 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) | |||
565 | val = kvm_apic_id(apic) << 24; | 582 | val = kvm_apic_id(apic) << 24; |
566 | break; | 583 | break; |
567 | case APIC_ARBPRI: | 584 | case APIC_ARBPRI: |
568 | printk(KERN_WARNING "Access APIC ARBPRI register " | 585 | apic_debug("Access APIC ARBPRI register which is for P6\n"); |
569 | "which is for P6\n"); | ||
570 | break; | 586 | break; |
571 | 587 | ||
572 | case APIC_TMCCT: /* Timer CCR */ | 588 | case APIC_TMCCT: /* Timer CCR */ |
589 | if (apic_lvtt_tscdeadline(apic)) | ||
590 | return 0; | ||
591 | |||
573 | val = apic_get_tmcct(apic); | 592 | val = apic_get_tmcct(apic); |
574 | break; | 593 | break; |
575 | 594 | ||
@@ -664,29 +683,40 @@ static void update_divide_count(struct kvm_lapic *apic) | |||
664 | 683 | ||
665 | static void start_apic_timer(struct kvm_lapic *apic) | 684 | static void start_apic_timer(struct kvm_lapic *apic) |
666 | { | 685 | { |
667 | ktime_t now = apic->lapic_timer.timer.base->get_time(); | 686 | ktime_t now; |
668 | |||
669 | apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) * | ||
670 | APIC_BUS_CYCLE_NS * apic->divide_count; | ||
671 | atomic_set(&apic->lapic_timer.pending, 0); | 687 | atomic_set(&apic->lapic_timer.pending, 0); |
672 | 688 | ||
673 | if (!apic->lapic_timer.period) | 689 | if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { |
674 | return; | 690 | /* lapic timer in oneshot or peroidic mode */ |
675 | /* | 691 | now = apic->lapic_timer.timer.base->get_time(); |
676 | * Do not allow the guest to program periodic timers with small | 692 | apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) |
677 | * interval, since the hrtimers are not throttled by the host | 693 | * APIC_BUS_CYCLE_NS * apic->divide_count; |
678 | * scheduler. | 694 | |
679 | */ | 695 | if (!apic->lapic_timer.period) |
680 | if (apic_lvtt_period(apic)) { | 696 | return; |
681 | if (apic->lapic_timer.period < NSEC_PER_MSEC/2) | 697 | /* |
682 | apic->lapic_timer.period = NSEC_PER_MSEC/2; | 698 | * Do not allow the guest to program periodic timers with small |
683 | } | 699 | * interval, since the hrtimers are not throttled by the host |
700 | * scheduler. | ||
701 | */ | ||
702 | if (apic_lvtt_period(apic)) { | ||
703 | s64 min_period = min_timer_period_us * 1000LL; | ||
704 | |||
705 | if (apic->lapic_timer.period < min_period) { | ||
706 | pr_info_ratelimited( | ||
707 | "kvm: vcpu %i: requested %lld ns " | ||
708 | "lapic timer period limited to %lld ns\n", | ||
709 | apic->vcpu->vcpu_id, | ||
710 | apic->lapic_timer.period, min_period); | ||
711 | apic->lapic_timer.period = min_period; | ||
712 | } | ||
713 | } | ||
684 | 714 | ||
685 | hrtimer_start(&apic->lapic_timer.timer, | 715 | hrtimer_start(&apic->lapic_timer.timer, |
686 | ktime_add_ns(now, apic->lapic_timer.period), | 716 | ktime_add_ns(now, apic->lapic_timer.period), |
687 | HRTIMER_MODE_ABS); | 717 | HRTIMER_MODE_ABS); |
688 | 718 | ||
689 | apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" | 719 | apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" |
690 | PRIx64 ", " | 720 | PRIx64 ", " |
691 | "timer initial count 0x%x, period %lldns, " | 721 | "timer initial count 0x%x, period %lldns, " |
692 | "expire @ 0x%016" PRIx64 ".\n", __func__, | 722 | "expire @ 0x%016" PRIx64 ".\n", __func__, |
@@ -695,6 +725,30 @@ static void start_apic_timer(struct kvm_lapic *apic) | |||
695 | apic->lapic_timer.period, | 725 | apic->lapic_timer.period, |
696 | ktime_to_ns(ktime_add_ns(now, | 726 | ktime_to_ns(ktime_add_ns(now, |
697 | apic->lapic_timer.period))); | 727 | apic->lapic_timer.period))); |
728 | } else if (apic_lvtt_tscdeadline(apic)) { | ||
729 | /* lapic timer in tsc deadline mode */ | ||
730 | u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; | ||
731 | u64 ns = 0; | ||
732 | struct kvm_vcpu *vcpu = apic->vcpu; | ||
733 | unsigned long this_tsc_khz = vcpu_tsc_khz(vcpu); | ||
734 | unsigned long flags; | ||
735 | |||
736 | if (unlikely(!tscdeadline || !this_tsc_khz)) | ||
737 | return; | ||
738 | |||
739 | local_irq_save(flags); | ||
740 | |||
741 | now = apic->lapic_timer.timer.base->get_time(); | ||
742 | guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); | ||
743 | if (likely(tscdeadline > guest_tsc)) { | ||
744 | ns = (tscdeadline - guest_tsc) * 1000000ULL; | ||
745 | do_div(ns, this_tsc_khz); | ||
746 | } | ||
747 | hrtimer_start(&apic->lapic_timer.timer, | ||
748 | ktime_add_ns(now, ns), HRTIMER_MODE_ABS); | ||
749 | |||
750 | local_irq_restore(flags); | ||
751 | } | ||
698 | } | 752 | } |
699 | 753 | ||
700 | static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) | 754 | static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) |
@@ -782,7 +836,6 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) | |||
782 | 836 | ||
783 | case APIC_LVT0: | 837 | case APIC_LVT0: |
784 | apic_manage_nmi_watchdog(apic, val); | 838 | apic_manage_nmi_watchdog(apic, val); |
785 | case APIC_LVTT: | ||
786 | case APIC_LVTTHMR: | 839 | case APIC_LVTTHMR: |
787 | case APIC_LVTPC: | 840 | case APIC_LVTPC: |
788 | case APIC_LVT1: | 841 | case APIC_LVT1: |
@@ -796,7 +849,22 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) | |||
796 | 849 | ||
797 | break; | 850 | break; |
798 | 851 | ||
852 | case APIC_LVTT: | ||
853 | if ((apic_get_reg(apic, APIC_LVTT) & | ||
854 | apic->lapic_timer.timer_mode_mask) != | ||
855 | (val & apic->lapic_timer.timer_mode_mask)) | ||
856 | hrtimer_cancel(&apic->lapic_timer.timer); | ||
857 | |||
858 | if (!apic_sw_enabled(apic)) | ||
859 | val |= APIC_LVT_MASKED; | ||
860 | val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); | ||
861 | apic_set_reg(apic, APIC_LVTT, val); | ||
862 | break; | ||
863 | |||
799 | case APIC_TMICT: | 864 | case APIC_TMICT: |
865 | if (apic_lvtt_tscdeadline(apic)) | ||
866 | break; | ||
867 | |||
800 | hrtimer_cancel(&apic->lapic_timer.timer); | 868 | hrtimer_cancel(&apic->lapic_timer.timer); |
801 | apic_set_reg(apic, APIC_TMICT, val); | 869 | apic_set_reg(apic, APIC_TMICT, val); |
802 | start_apic_timer(apic); | 870 | start_apic_timer(apic); |
@@ -804,14 +872,14 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) | |||
804 | 872 | ||
805 | case APIC_TDCR: | 873 | case APIC_TDCR: |
806 | if (val & 4) | 874 | if (val & 4) |
807 | printk(KERN_ERR "KVM_WRITE:TDCR %x\n", val); | 875 | apic_debug("KVM_WRITE:TDCR %x\n", val); |
808 | apic_set_reg(apic, APIC_TDCR, val); | 876 | apic_set_reg(apic, APIC_TDCR, val); |
809 | update_divide_count(apic); | 877 | update_divide_count(apic); |
810 | break; | 878 | break; |
811 | 879 | ||
812 | case APIC_ESR: | 880 | case APIC_ESR: |
813 | if (apic_x2apic_mode(apic) && val != 0) { | 881 | if (apic_x2apic_mode(apic) && val != 0) { |
814 | printk(KERN_ERR "KVM_WRITE:ESR not zero %x\n", val); | 882 | apic_debug("KVM_WRITE:ESR not zero %x\n", val); |
815 | ret = 1; | 883 | ret = 1; |
816 | } | 884 | } |
817 | break; | 885 | break; |
@@ -864,6 +932,15 @@ static int apic_mmio_write(struct kvm_io_device *this, | |||
864 | return 0; | 932 | return 0; |
865 | } | 933 | } |
866 | 934 | ||
935 | void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) | ||
936 | { | ||
937 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
938 | |||
939 | if (apic) | ||
940 | apic_reg_write(vcpu->arch.apic, APIC_EOI, 0); | ||
941 | } | ||
942 | EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); | ||
943 | |||
867 | void kvm_free_lapic(struct kvm_vcpu *vcpu) | 944 | void kvm_free_lapic(struct kvm_vcpu *vcpu) |
868 | { | 945 | { |
869 | if (!vcpu->arch.apic) | 946 | if (!vcpu->arch.apic) |
@@ -883,6 +960,32 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu) | |||
883 | *---------------------------------------------------------------------- | 960 | *---------------------------------------------------------------------- |
884 | */ | 961 | */ |
885 | 962 | ||
963 | u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) | ||
964 | { | ||
965 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
966 | if (!apic) | ||
967 | return 0; | ||
968 | |||
969 | if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) | ||
970 | return 0; | ||
971 | |||
972 | return apic->lapic_timer.tscdeadline; | ||
973 | } | ||
974 | |||
975 | void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) | ||
976 | { | ||
977 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
978 | if (!apic) | ||
979 | return; | ||
980 | |||
981 | if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) | ||
982 | return; | ||
983 | |||
984 | hrtimer_cancel(&apic->lapic_timer.timer); | ||
985 | apic->lapic_timer.tscdeadline = data; | ||
986 | start_apic_timer(apic); | ||
987 | } | ||
988 | |||
886 | void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) | 989 | void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) |
887 | { | 990 | { |
888 | struct kvm_lapic *apic = vcpu->arch.apic; | 991 | struct kvm_lapic *apic = vcpu->arch.apic; |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 52c9e6b9e725..138e8cc6fea6 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -26,6 +26,7 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); | |||
26 | void kvm_lapic_reset(struct kvm_vcpu *vcpu); | 26 | void kvm_lapic_reset(struct kvm_vcpu *vcpu); |
27 | u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); | 27 | u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); |
28 | void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); | 28 | void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); |
29 | void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu); | ||
29 | void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); | 30 | void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); |
30 | u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); | 31 | u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); |
31 | void kvm_apic_set_version(struct kvm_vcpu *vcpu); | 32 | void kvm_apic_set_version(struct kvm_vcpu *vcpu); |
@@ -41,6 +42,9 @@ int kvm_lapic_enabled(struct kvm_vcpu *vcpu); | |||
41 | bool kvm_apic_present(struct kvm_vcpu *vcpu); | 42 | bool kvm_apic_present(struct kvm_vcpu *vcpu); |
42 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); | 43 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); |
43 | 44 | ||
45 | u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); | ||
46 | void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); | ||
47 | |||
44 | void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); | 48 | void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); |
45 | void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); | 49 | void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); |
46 | void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); | 50 | void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 8e8da7960dbe..f1b36cf3e3d0 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -2770,7 +2770,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | |||
2770 | 2770 | ||
2771 | ASSERT(!VALID_PAGE(root)); | 2771 | ASSERT(!VALID_PAGE(root)); |
2772 | if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) { | 2772 | if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) { |
2773 | pdptr = kvm_pdptr_read_mmu(vcpu, &vcpu->arch.mmu, i); | 2773 | pdptr = vcpu->arch.mmu.get_pdptr(vcpu, i); |
2774 | if (!is_present_gpte(pdptr)) { | 2774 | if (!is_present_gpte(pdptr)) { |
2775 | vcpu->arch.mmu.pae_root[i] = 0; | 2775 | vcpu->arch.mmu.pae_root[i] = 0; |
2776 | continue; | 2776 | continue; |
@@ -3318,6 +3318,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
3318 | context->direct_map = true; | 3318 | context->direct_map = true; |
3319 | context->set_cr3 = kvm_x86_ops->set_tdp_cr3; | 3319 | context->set_cr3 = kvm_x86_ops->set_tdp_cr3; |
3320 | context->get_cr3 = get_cr3; | 3320 | context->get_cr3 = get_cr3; |
3321 | context->get_pdptr = kvm_pdptr_read; | ||
3321 | context->inject_page_fault = kvm_inject_page_fault; | 3322 | context->inject_page_fault = kvm_inject_page_fault; |
3322 | context->nx = is_nx(vcpu); | 3323 | context->nx = is_nx(vcpu); |
3323 | 3324 | ||
@@ -3376,6 +3377,7 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu) | |||
3376 | 3377 | ||
3377 | vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3; | 3378 | vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3; |
3378 | vcpu->arch.walk_mmu->get_cr3 = get_cr3; | 3379 | vcpu->arch.walk_mmu->get_cr3 = get_cr3; |
3380 | vcpu->arch.walk_mmu->get_pdptr = kvm_pdptr_read; | ||
3379 | vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; | 3381 | vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; |
3380 | 3382 | ||
3381 | return r; | 3383 | return r; |
@@ -3386,6 +3388,7 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) | |||
3386 | struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; | 3388 | struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; |
3387 | 3389 | ||
3388 | g_context->get_cr3 = get_cr3; | 3390 | g_context->get_cr3 = get_cr3; |
3391 | g_context->get_pdptr = kvm_pdptr_read; | ||
3389 | g_context->inject_page_fault = kvm_inject_page_fault; | 3392 | g_context->inject_page_fault = kvm_inject_page_fault; |
3390 | 3393 | ||
3391 | /* | 3394 | /* |
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index 2460a265be23..746ec259d024 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c | |||
@@ -121,16 +121,16 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) | |||
121 | 121 | ||
122 | static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) | 122 | static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) |
123 | { | 123 | { |
124 | static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); | ||
124 | unsigned long *rmapp; | 125 | unsigned long *rmapp; |
125 | struct kvm_mmu_page *rev_sp; | 126 | struct kvm_mmu_page *rev_sp; |
126 | gfn_t gfn; | 127 | gfn_t gfn; |
127 | 128 | ||
128 | |||
129 | rev_sp = page_header(__pa(sptep)); | 129 | rev_sp = page_header(__pa(sptep)); |
130 | gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt); | 130 | gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt); |
131 | 131 | ||
132 | if (!gfn_to_memslot(kvm, gfn)) { | 132 | if (!gfn_to_memslot(kvm, gfn)) { |
133 | if (!printk_ratelimit()) | 133 | if (!__ratelimit(&ratelimit_state)) |
134 | return; | 134 | return; |
135 | audit_printk(kvm, "no memslot for gfn %llx\n", gfn); | 135 | audit_printk(kvm, "no memslot for gfn %llx\n", gfn); |
136 | audit_printk(kvm, "index %ld of sp (gfn=%llx)\n", | 136 | audit_printk(kvm, "index %ld of sp (gfn=%llx)\n", |
@@ -141,7 +141,7 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) | |||
141 | 141 | ||
142 | rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level); | 142 | rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level); |
143 | if (!*rmapp) { | 143 | if (!*rmapp) { |
144 | if (!printk_ratelimit()) | 144 | if (!__ratelimit(&ratelimit_state)) |
145 | return; | 145 | return; |
146 | audit_printk(kvm, "no rmap for writable spte %llx\n", | 146 | audit_printk(kvm, "no rmap for writable spte %llx\n", |
147 | *sptep); | 147 | *sptep); |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 507e2b844cfa..92994100638b 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -147,7 +147,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, | |||
147 | gfn_t table_gfn; | 147 | gfn_t table_gfn; |
148 | unsigned index, pt_access, uninitialized_var(pte_access); | 148 | unsigned index, pt_access, uninitialized_var(pte_access); |
149 | gpa_t pte_gpa; | 149 | gpa_t pte_gpa; |
150 | bool eperm; | 150 | bool eperm, last_gpte; |
151 | int offset; | 151 | int offset; |
152 | const int write_fault = access & PFERR_WRITE_MASK; | 152 | const int write_fault = access & PFERR_WRITE_MASK; |
153 | const int user_fault = access & PFERR_USER_MASK; | 153 | const int user_fault = access & PFERR_USER_MASK; |
@@ -163,7 +163,7 @@ retry_walk: | |||
163 | 163 | ||
164 | #if PTTYPE == 64 | 164 | #if PTTYPE == 64 |
165 | if (walker->level == PT32E_ROOT_LEVEL) { | 165 | if (walker->level == PT32E_ROOT_LEVEL) { |
166 | pte = kvm_pdptr_read_mmu(vcpu, mmu, (addr >> 30) & 3); | 166 | pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3); |
167 | trace_kvm_mmu_paging_element(pte, walker->level); | 167 | trace_kvm_mmu_paging_element(pte, walker->level); |
168 | if (!is_present_gpte(pte)) | 168 | if (!is_present_gpte(pte)) |
169 | goto error; | 169 | goto error; |
@@ -221,6 +221,17 @@ retry_walk: | |||
221 | eperm = true; | 221 | eperm = true; |
222 | #endif | 222 | #endif |
223 | 223 | ||
224 | last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte); | ||
225 | if (last_gpte) { | ||
226 | pte_access = pt_access & | ||
227 | FNAME(gpte_access)(vcpu, pte, true); | ||
228 | /* check if the kernel is fetching from user page */ | ||
229 | if (unlikely(pte_access & PT_USER_MASK) && | ||
230 | kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)) | ||
231 | if (fetch_fault && !user_fault) | ||
232 | eperm = true; | ||
233 | } | ||
234 | |||
224 | if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) { | 235 | if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) { |
225 | int ret; | 236 | int ret; |
226 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, | 237 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, |
@@ -238,18 +249,12 @@ retry_walk: | |||
238 | 249 | ||
239 | walker->ptes[walker->level - 1] = pte; | 250 | walker->ptes[walker->level - 1] = pte; |
240 | 251 | ||
241 | if (FNAME(is_last_gpte)(walker, vcpu, mmu, pte)) { | 252 | if (last_gpte) { |
242 | int lvl = walker->level; | 253 | int lvl = walker->level; |
243 | gpa_t real_gpa; | 254 | gpa_t real_gpa; |
244 | gfn_t gfn; | 255 | gfn_t gfn; |
245 | u32 ac; | 256 | u32 ac; |
246 | 257 | ||
247 | /* check if the kernel is fetching from user page */ | ||
248 | if (unlikely(pte_access & PT_USER_MASK) && | ||
249 | kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)) | ||
250 | if (fetch_fault && !user_fault) | ||
251 | eperm = true; | ||
252 | |||
253 | gfn = gpte_to_gfn_lvl(pte, lvl); | 258 | gfn = gpte_to_gfn_lvl(pte, lvl); |
254 | gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT; | 259 | gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT; |
255 | 260 | ||
@@ -295,7 +300,6 @@ retry_walk: | |||
295 | walker->ptes[walker->level - 1] = pte; | 300 | walker->ptes[walker->level - 1] = pte; |
296 | } | 301 | } |
297 | 302 | ||
298 | pte_access = pt_access & FNAME(gpte_access)(vcpu, pte, true); | ||
299 | walker->pt_access = pt_access; | 303 | walker->pt_access = pt_access; |
300 | walker->pte_access = pte_access; | 304 | walker->pte_access = pte_access; |
301 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", | 305 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 475d1c948501..e32243eac2f4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -1084,7 +1084,6 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
1084 | if (npt_enabled) { | 1084 | if (npt_enabled) { |
1085 | /* Setup VMCB for Nested Paging */ | 1085 | /* Setup VMCB for Nested Paging */ |
1086 | control->nested_ctl = 1; | 1086 | control->nested_ctl = 1; |
1087 | clr_intercept(svm, INTERCEPT_TASK_SWITCH); | ||
1088 | clr_intercept(svm, INTERCEPT_INVLPG); | 1087 | clr_intercept(svm, INTERCEPT_INVLPG); |
1089 | clr_exception_intercept(svm, PF_VECTOR); | 1088 | clr_exception_intercept(svm, PF_VECTOR); |
1090 | clr_cr_intercept(svm, INTERCEPT_CR3_READ); | 1089 | clr_cr_intercept(svm, INTERCEPT_CR3_READ); |
@@ -1844,6 +1843,20 @@ static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) | |||
1844 | return svm->nested.nested_cr3; | 1843 | return svm->nested.nested_cr3; |
1845 | } | 1844 | } |
1846 | 1845 | ||
1846 | static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) | ||
1847 | { | ||
1848 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1849 | u64 cr3 = svm->nested.nested_cr3; | ||
1850 | u64 pdpte; | ||
1851 | int ret; | ||
1852 | |||
1853 | ret = kvm_read_guest_page(vcpu->kvm, gpa_to_gfn(cr3), &pdpte, | ||
1854 | offset_in_page(cr3) + index * 8, 8); | ||
1855 | if (ret) | ||
1856 | return 0; | ||
1857 | return pdpte; | ||
1858 | } | ||
1859 | |||
1847 | static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, | 1860 | static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, |
1848 | unsigned long root) | 1861 | unsigned long root) |
1849 | { | 1862 | { |
@@ -1875,6 +1888,7 @@ static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) | |||
1875 | 1888 | ||
1876 | vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; | 1889 | vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; |
1877 | vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; | 1890 | vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; |
1891 | vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr; | ||
1878 | vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; | 1892 | vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; |
1879 | vcpu->arch.mmu.shadow_root_level = get_npt_level(); | 1893 | vcpu->arch.mmu.shadow_root_level = get_npt_level(); |
1880 | vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; | 1894 | vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; |
@@ -2182,7 +2196,8 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
2182 | vmcb->control.exit_info_1, | 2196 | vmcb->control.exit_info_1, |
2183 | vmcb->control.exit_info_2, | 2197 | vmcb->control.exit_info_2, |
2184 | vmcb->control.exit_int_info, | 2198 | vmcb->control.exit_int_info, |
2185 | vmcb->control.exit_int_info_err); | 2199 | vmcb->control.exit_int_info_err, |
2200 | KVM_ISA_SVM); | ||
2186 | 2201 | ||
2187 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page); | 2202 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page); |
2188 | if (!nested_vmcb) | 2203 | if (!nested_vmcb) |
@@ -2894,15 +2909,20 @@ static int cr8_write_interception(struct vcpu_svm *svm) | |||
2894 | return 0; | 2909 | return 0; |
2895 | } | 2910 | } |
2896 | 2911 | ||
2912 | u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu) | ||
2913 | { | ||
2914 | struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); | ||
2915 | return vmcb->control.tsc_offset + | ||
2916 | svm_scale_tsc(vcpu, native_read_tsc()); | ||
2917 | } | ||
2918 | |||
2897 | static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | 2919 | static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) |
2898 | { | 2920 | { |
2899 | struct vcpu_svm *svm = to_svm(vcpu); | 2921 | struct vcpu_svm *svm = to_svm(vcpu); |
2900 | 2922 | ||
2901 | switch (ecx) { | 2923 | switch (ecx) { |
2902 | case MSR_IA32_TSC: { | 2924 | case MSR_IA32_TSC: { |
2903 | struct vmcb *vmcb = get_host_vmcb(svm); | 2925 | *data = svm->vmcb->control.tsc_offset + |
2904 | |||
2905 | *data = vmcb->control.tsc_offset + | ||
2906 | svm_scale_tsc(vcpu, native_read_tsc()); | 2926 | svm_scale_tsc(vcpu, native_read_tsc()); |
2907 | 2927 | ||
2908 | break; | 2928 | break; |
@@ -3314,8 +3334,6 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
3314 | struct kvm_run *kvm_run = vcpu->run; | 3334 | struct kvm_run *kvm_run = vcpu->run; |
3315 | u32 exit_code = svm->vmcb->control.exit_code; | 3335 | u32 exit_code = svm->vmcb->control.exit_code; |
3316 | 3336 | ||
3317 | trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); | ||
3318 | |||
3319 | if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) | 3337 | if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) |
3320 | vcpu->arch.cr0 = svm->vmcb->save.cr0; | 3338 | vcpu->arch.cr0 = svm->vmcb->save.cr0; |
3321 | if (npt_enabled) | 3339 | if (npt_enabled) |
@@ -3335,7 +3353,8 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
3335 | svm->vmcb->control.exit_info_1, | 3353 | svm->vmcb->control.exit_info_1, |
3336 | svm->vmcb->control.exit_info_2, | 3354 | svm->vmcb->control.exit_info_2, |
3337 | svm->vmcb->control.exit_int_info, | 3355 | svm->vmcb->control.exit_int_info, |
3338 | svm->vmcb->control.exit_int_info_err); | 3356 | svm->vmcb->control.exit_int_info_err, |
3357 | KVM_ISA_SVM); | ||
3339 | 3358 | ||
3340 | vmexit = nested_svm_exit_special(svm); | 3359 | vmexit = nested_svm_exit_special(svm); |
3341 | 3360 | ||
@@ -3768,6 +3787,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3768 | vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; | 3787 | vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; |
3769 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; | 3788 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; |
3770 | 3789 | ||
3790 | trace_kvm_exit(svm->vmcb->control.exit_code, vcpu, KVM_ISA_SVM); | ||
3791 | |||
3771 | if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) | 3792 | if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) |
3772 | kvm_before_handle_nmi(&svm->vcpu); | 3793 | kvm_before_handle_nmi(&svm->vcpu); |
3773 | 3794 | ||
@@ -3897,60 +3918,6 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | |||
3897 | } | 3918 | } |
3898 | } | 3919 | } |
3899 | 3920 | ||
3900 | static const struct trace_print_flags svm_exit_reasons_str[] = { | ||
3901 | { SVM_EXIT_READ_CR0, "read_cr0" }, | ||
3902 | { SVM_EXIT_READ_CR3, "read_cr3" }, | ||
3903 | { SVM_EXIT_READ_CR4, "read_cr4" }, | ||
3904 | { SVM_EXIT_READ_CR8, "read_cr8" }, | ||
3905 | { SVM_EXIT_WRITE_CR0, "write_cr0" }, | ||
3906 | { SVM_EXIT_WRITE_CR3, "write_cr3" }, | ||
3907 | { SVM_EXIT_WRITE_CR4, "write_cr4" }, | ||
3908 | { SVM_EXIT_WRITE_CR8, "write_cr8" }, | ||
3909 | { SVM_EXIT_READ_DR0, "read_dr0" }, | ||
3910 | { SVM_EXIT_READ_DR1, "read_dr1" }, | ||
3911 | { SVM_EXIT_READ_DR2, "read_dr2" }, | ||
3912 | { SVM_EXIT_READ_DR3, "read_dr3" }, | ||
3913 | { SVM_EXIT_WRITE_DR0, "write_dr0" }, | ||
3914 | { SVM_EXIT_WRITE_DR1, "write_dr1" }, | ||
3915 | { SVM_EXIT_WRITE_DR2, "write_dr2" }, | ||
3916 | { SVM_EXIT_WRITE_DR3, "write_dr3" }, | ||
3917 | { SVM_EXIT_WRITE_DR5, "write_dr5" }, | ||
3918 | { SVM_EXIT_WRITE_DR7, "write_dr7" }, | ||
3919 | { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, | ||
3920 | { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, | ||
3921 | { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, | ||
3922 | { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, | ||
3923 | { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, | ||
3924 | { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, | ||
3925 | { SVM_EXIT_INTR, "interrupt" }, | ||
3926 | { SVM_EXIT_NMI, "nmi" }, | ||
3927 | { SVM_EXIT_SMI, "smi" }, | ||
3928 | { SVM_EXIT_INIT, "init" }, | ||
3929 | { SVM_EXIT_VINTR, "vintr" }, | ||
3930 | { SVM_EXIT_CPUID, "cpuid" }, | ||
3931 | { SVM_EXIT_INVD, "invd" }, | ||
3932 | { SVM_EXIT_HLT, "hlt" }, | ||
3933 | { SVM_EXIT_INVLPG, "invlpg" }, | ||
3934 | { SVM_EXIT_INVLPGA, "invlpga" }, | ||
3935 | { SVM_EXIT_IOIO, "io" }, | ||
3936 | { SVM_EXIT_MSR, "msr" }, | ||
3937 | { SVM_EXIT_TASK_SWITCH, "task_switch" }, | ||
3938 | { SVM_EXIT_SHUTDOWN, "shutdown" }, | ||
3939 | { SVM_EXIT_VMRUN, "vmrun" }, | ||
3940 | { SVM_EXIT_VMMCALL, "hypercall" }, | ||
3941 | { SVM_EXIT_VMLOAD, "vmload" }, | ||
3942 | { SVM_EXIT_VMSAVE, "vmsave" }, | ||
3943 | { SVM_EXIT_STGI, "stgi" }, | ||
3944 | { SVM_EXIT_CLGI, "clgi" }, | ||
3945 | { SVM_EXIT_SKINIT, "skinit" }, | ||
3946 | { SVM_EXIT_WBINVD, "wbinvd" }, | ||
3947 | { SVM_EXIT_MONITOR, "monitor" }, | ||
3948 | { SVM_EXIT_MWAIT, "mwait" }, | ||
3949 | { SVM_EXIT_XSETBV, "xsetbv" }, | ||
3950 | { SVM_EXIT_NPF, "npf" }, | ||
3951 | { -1, NULL } | ||
3952 | }; | ||
3953 | |||
3954 | static int svm_get_lpage_level(void) | 3921 | static int svm_get_lpage_level(void) |
3955 | { | 3922 | { |
3956 | return PT_PDPE_LEVEL; | 3923 | return PT_PDPE_LEVEL; |
@@ -4223,7 +4190,6 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4223 | .get_mt_mask = svm_get_mt_mask, | 4190 | .get_mt_mask = svm_get_mt_mask, |
4224 | 4191 | ||
4225 | .get_exit_info = svm_get_exit_info, | 4192 | .get_exit_info = svm_get_exit_info, |
4226 | .exit_reasons_str = svm_exit_reasons_str, | ||
4227 | 4193 | ||
4228 | .get_lpage_level = svm_get_lpage_level, | 4194 | .get_lpage_level = svm_get_lpage_level, |
4229 | 4195 | ||
@@ -4239,6 +4205,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4239 | .write_tsc_offset = svm_write_tsc_offset, | 4205 | .write_tsc_offset = svm_write_tsc_offset, |
4240 | .adjust_tsc_offset = svm_adjust_tsc_offset, | 4206 | .adjust_tsc_offset = svm_adjust_tsc_offset, |
4241 | .compute_tsc_offset = svm_compute_tsc_offset, | 4207 | .compute_tsc_offset = svm_compute_tsc_offset, |
4208 | .read_l1_tsc = svm_read_l1_tsc, | ||
4242 | 4209 | ||
4243 | .set_tdp_cr3 = set_tdp_cr3, | 4210 | .set_tdp_cr3 = set_tdp_cr3, |
4244 | 4211 | ||
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 3ff898c104f7..911d2641f14c 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -2,6 +2,8 @@ | |||
2 | #define _TRACE_KVM_H | 2 | #define _TRACE_KVM_H |
3 | 3 | ||
4 | #include <linux/tracepoint.h> | 4 | #include <linux/tracepoint.h> |
5 | #include <asm/vmx.h> | ||
6 | #include <asm/svm.h> | ||
5 | 7 | ||
6 | #undef TRACE_SYSTEM | 8 | #undef TRACE_SYSTEM |
7 | #define TRACE_SYSTEM kvm | 9 | #define TRACE_SYSTEM kvm |
@@ -181,6 +183,95 @@ TRACE_EVENT(kvm_apic, | |||
181 | #define KVM_ISA_VMX 1 | 183 | #define KVM_ISA_VMX 1 |
182 | #define KVM_ISA_SVM 2 | 184 | #define KVM_ISA_SVM 2 |
183 | 185 | ||
186 | #define VMX_EXIT_REASONS \ | ||
187 | { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ | ||
188 | { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ | ||
189 | { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ | ||
190 | { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \ | ||
191 | { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ | ||
192 | { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ | ||
193 | { EXIT_REASON_CPUID, "CPUID" }, \ | ||
194 | { EXIT_REASON_HLT, "HLT" }, \ | ||
195 | { EXIT_REASON_INVLPG, "INVLPG" }, \ | ||
196 | { EXIT_REASON_RDPMC, "RDPMC" }, \ | ||
197 | { EXIT_REASON_RDTSC, "RDTSC" }, \ | ||
198 | { EXIT_REASON_VMCALL, "VMCALL" }, \ | ||
199 | { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \ | ||
200 | { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \ | ||
201 | { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \ | ||
202 | { EXIT_REASON_VMPTRST, "VMPTRST" }, \ | ||
203 | { EXIT_REASON_VMREAD, "VMREAD" }, \ | ||
204 | { EXIT_REASON_VMRESUME, "VMRESUME" }, \ | ||
205 | { EXIT_REASON_VMWRITE, "VMWRITE" }, \ | ||
206 | { EXIT_REASON_VMOFF, "VMOFF" }, \ | ||
207 | { EXIT_REASON_VMON, "VMON" }, \ | ||
208 | { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \ | ||
209 | { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \ | ||
210 | { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \ | ||
211 | { EXIT_REASON_MSR_READ, "MSR_READ" }, \ | ||
212 | { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ | ||
213 | { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ | ||
214 | { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ | ||
215 | { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \ | ||
216 | { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ | ||
217 | { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ | ||
218 | { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ | ||
219 | { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ | ||
220 | { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ | ||
221 | { EXIT_REASON_WBINVD, "WBINVD" } | ||
222 | |||
223 | #define SVM_EXIT_REASONS \ | ||
224 | { SVM_EXIT_READ_CR0, "read_cr0" }, \ | ||
225 | { SVM_EXIT_READ_CR3, "read_cr3" }, \ | ||
226 | { SVM_EXIT_READ_CR4, "read_cr4" }, \ | ||
227 | { SVM_EXIT_READ_CR8, "read_cr8" }, \ | ||
228 | { SVM_EXIT_WRITE_CR0, "write_cr0" }, \ | ||
229 | { SVM_EXIT_WRITE_CR3, "write_cr3" }, \ | ||
230 | { SVM_EXIT_WRITE_CR4, "write_cr4" }, \ | ||
231 | { SVM_EXIT_WRITE_CR8, "write_cr8" }, \ | ||
232 | { SVM_EXIT_READ_DR0, "read_dr0" }, \ | ||
233 | { SVM_EXIT_READ_DR1, "read_dr1" }, \ | ||
234 | { SVM_EXIT_READ_DR2, "read_dr2" }, \ | ||
235 | { SVM_EXIT_READ_DR3, "read_dr3" }, \ | ||
236 | { SVM_EXIT_WRITE_DR0, "write_dr0" }, \ | ||
237 | { SVM_EXIT_WRITE_DR1, "write_dr1" }, \ | ||
238 | { SVM_EXIT_WRITE_DR2, "write_dr2" }, \ | ||
239 | { SVM_EXIT_WRITE_DR3, "write_dr3" }, \ | ||
240 | { SVM_EXIT_WRITE_DR5, "write_dr5" }, \ | ||
241 | { SVM_EXIT_WRITE_DR7, "write_dr7" }, \ | ||
242 | { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \ | ||
243 | { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \ | ||
244 | { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ | ||
245 | { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ | ||
246 | { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ | ||
247 | { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ | ||
248 | { SVM_EXIT_INTR, "interrupt" }, \ | ||
249 | { SVM_EXIT_NMI, "nmi" }, \ | ||
250 | { SVM_EXIT_SMI, "smi" }, \ | ||
251 | { SVM_EXIT_INIT, "init" }, \ | ||
252 | { SVM_EXIT_VINTR, "vintr" }, \ | ||
253 | { SVM_EXIT_CPUID, "cpuid" }, \ | ||
254 | { SVM_EXIT_INVD, "invd" }, \ | ||
255 | { SVM_EXIT_HLT, "hlt" }, \ | ||
256 | { SVM_EXIT_INVLPG, "invlpg" }, \ | ||
257 | { SVM_EXIT_INVLPGA, "invlpga" }, \ | ||
258 | { SVM_EXIT_IOIO, "io" }, \ | ||
259 | { SVM_EXIT_MSR, "msr" }, \ | ||
260 | { SVM_EXIT_TASK_SWITCH, "task_switch" }, \ | ||
261 | { SVM_EXIT_SHUTDOWN, "shutdown" }, \ | ||
262 | { SVM_EXIT_VMRUN, "vmrun" }, \ | ||
263 | { SVM_EXIT_VMMCALL, "hypercall" }, \ | ||
264 | { SVM_EXIT_VMLOAD, "vmload" }, \ | ||
265 | { SVM_EXIT_VMSAVE, "vmsave" }, \ | ||
266 | { SVM_EXIT_STGI, "stgi" }, \ | ||
267 | { SVM_EXIT_CLGI, "clgi" }, \ | ||
268 | { SVM_EXIT_SKINIT, "skinit" }, \ | ||
269 | { SVM_EXIT_WBINVD, "wbinvd" }, \ | ||
270 | { SVM_EXIT_MONITOR, "monitor" }, \ | ||
271 | { SVM_EXIT_MWAIT, "mwait" }, \ | ||
272 | { SVM_EXIT_XSETBV, "xsetbv" }, \ | ||
273 | { SVM_EXIT_NPF, "npf" } | ||
274 | |||
184 | /* | 275 | /* |
185 | * Tracepoint for kvm guest exit: | 276 | * Tracepoint for kvm guest exit: |
186 | */ | 277 | */ |
@@ -205,8 +296,9 @@ TRACE_EVENT(kvm_exit, | |||
205 | ), | 296 | ), |
206 | 297 | ||
207 | TP_printk("reason %s rip 0x%lx info %llx %llx", | 298 | TP_printk("reason %s rip 0x%lx info %llx %llx", |
208 | ftrace_print_symbols_seq(p, __entry->exit_reason, | 299 | (__entry->isa == KVM_ISA_VMX) ? |
209 | kvm_x86_ops->exit_reasons_str), | 300 | __print_symbolic(__entry->exit_reason, VMX_EXIT_REASONS) : |
301 | __print_symbolic(__entry->exit_reason, SVM_EXIT_REASONS), | ||
210 | __entry->guest_rip, __entry->info1, __entry->info2) | 302 | __entry->guest_rip, __entry->info1, __entry->info2) |
211 | ); | 303 | ); |
212 | 304 | ||
@@ -486,9 +578,9 @@ TRACE_EVENT(kvm_nested_intercepts, | |||
486 | TRACE_EVENT(kvm_nested_vmexit, | 578 | TRACE_EVENT(kvm_nested_vmexit, |
487 | TP_PROTO(__u64 rip, __u32 exit_code, | 579 | TP_PROTO(__u64 rip, __u32 exit_code, |
488 | __u64 exit_info1, __u64 exit_info2, | 580 | __u64 exit_info1, __u64 exit_info2, |
489 | __u32 exit_int_info, __u32 exit_int_info_err), | 581 | __u32 exit_int_info, __u32 exit_int_info_err, __u32 isa), |
490 | TP_ARGS(rip, exit_code, exit_info1, exit_info2, | 582 | TP_ARGS(rip, exit_code, exit_info1, exit_info2, |
491 | exit_int_info, exit_int_info_err), | 583 | exit_int_info, exit_int_info_err, isa), |
492 | 584 | ||
493 | TP_STRUCT__entry( | 585 | TP_STRUCT__entry( |
494 | __field( __u64, rip ) | 586 | __field( __u64, rip ) |
@@ -497,6 +589,7 @@ TRACE_EVENT(kvm_nested_vmexit, | |||
497 | __field( __u64, exit_info2 ) | 589 | __field( __u64, exit_info2 ) |
498 | __field( __u32, exit_int_info ) | 590 | __field( __u32, exit_int_info ) |
499 | __field( __u32, exit_int_info_err ) | 591 | __field( __u32, exit_int_info_err ) |
592 | __field( __u32, isa ) | ||
500 | ), | 593 | ), |
501 | 594 | ||
502 | TP_fast_assign( | 595 | TP_fast_assign( |
@@ -506,12 +599,14 @@ TRACE_EVENT(kvm_nested_vmexit, | |||
506 | __entry->exit_info2 = exit_info2; | 599 | __entry->exit_info2 = exit_info2; |
507 | __entry->exit_int_info = exit_int_info; | 600 | __entry->exit_int_info = exit_int_info; |
508 | __entry->exit_int_info_err = exit_int_info_err; | 601 | __entry->exit_int_info_err = exit_int_info_err; |
602 | __entry->isa = isa; | ||
509 | ), | 603 | ), |
510 | TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " | 604 | TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " |
511 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", | 605 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", |
512 | __entry->rip, | 606 | __entry->rip, |
513 | ftrace_print_symbols_seq(p, __entry->exit_code, | 607 | (__entry->isa == KVM_ISA_VMX) ? |
514 | kvm_x86_ops->exit_reasons_str), | 608 | __print_symbolic(__entry->exit_code, VMX_EXIT_REASONS) : |
609 | __print_symbolic(__entry->exit_code, SVM_EXIT_REASONS), | ||
515 | __entry->exit_info1, __entry->exit_info2, | 610 | __entry->exit_info1, __entry->exit_info2, |
516 | __entry->exit_int_info, __entry->exit_int_info_err) | 611 | __entry->exit_int_info, __entry->exit_int_info_err) |
517 | ); | 612 | ); |
@@ -522,9 +617,9 @@ TRACE_EVENT(kvm_nested_vmexit, | |||
522 | TRACE_EVENT(kvm_nested_vmexit_inject, | 617 | TRACE_EVENT(kvm_nested_vmexit_inject, |
523 | TP_PROTO(__u32 exit_code, | 618 | TP_PROTO(__u32 exit_code, |
524 | __u64 exit_info1, __u64 exit_info2, | 619 | __u64 exit_info1, __u64 exit_info2, |
525 | __u32 exit_int_info, __u32 exit_int_info_err), | 620 | __u32 exit_int_info, __u32 exit_int_info_err, __u32 isa), |
526 | TP_ARGS(exit_code, exit_info1, exit_info2, | 621 | TP_ARGS(exit_code, exit_info1, exit_info2, |
527 | exit_int_info, exit_int_info_err), | 622 | exit_int_info, exit_int_info_err, isa), |
528 | 623 | ||
529 | TP_STRUCT__entry( | 624 | TP_STRUCT__entry( |
530 | __field( __u32, exit_code ) | 625 | __field( __u32, exit_code ) |
@@ -532,6 +627,7 @@ TRACE_EVENT(kvm_nested_vmexit_inject, | |||
532 | __field( __u64, exit_info2 ) | 627 | __field( __u64, exit_info2 ) |
533 | __field( __u32, exit_int_info ) | 628 | __field( __u32, exit_int_info ) |
534 | __field( __u32, exit_int_info_err ) | 629 | __field( __u32, exit_int_info_err ) |
630 | __field( __u32, isa ) | ||
535 | ), | 631 | ), |
536 | 632 | ||
537 | TP_fast_assign( | 633 | TP_fast_assign( |
@@ -540,12 +636,14 @@ TRACE_EVENT(kvm_nested_vmexit_inject, | |||
540 | __entry->exit_info2 = exit_info2; | 636 | __entry->exit_info2 = exit_info2; |
541 | __entry->exit_int_info = exit_int_info; | 637 | __entry->exit_int_info = exit_int_info; |
542 | __entry->exit_int_info_err = exit_int_info_err; | 638 | __entry->exit_int_info_err = exit_int_info_err; |
639 | __entry->isa = isa; | ||
543 | ), | 640 | ), |
544 | 641 | ||
545 | TP_printk("reason: %s ext_inf1: 0x%016llx " | 642 | TP_printk("reason: %s ext_inf1: 0x%016llx " |
546 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", | 643 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", |
547 | ftrace_print_symbols_seq(p, __entry->exit_code, | 644 | (__entry->isa == KVM_ISA_VMX) ? |
548 | kvm_x86_ops->exit_reasons_str), | 645 | __print_symbolic(__entry->exit_code, VMX_EXIT_REASONS) : |
646 | __print_symbolic(__entry->exit_code, SVM_EXIT_REASONS), | ||
549 | __entry->exit_info1, __entry->exit_info2, | 647 | __entry->exit_info1, __entry->exit_info2, |
550 | __entry->exit_int_info, __entry->exit_int_info_err) | 648 | __entry->exit_int_info, __entry->exit_int_info_err) |
551 | ); | 649 | ); |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e65a158dee64..a0d6bd9ad442 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -71,6 +71,9 @@ module_param(vmm_exclusive, bool, S_IRUGO); | |||
71 | static int __read_mostly yield_on_hlt = 1; | 71 | static int __read_mostly yield_on_hlt = 1; |
72 | module_param(yield_on_hlt, bool, S_IRUGO); | 72 | module_param(yield_on_hlt, bool, S_IRUGO); |
73 | 73 | ||
74 | static int __read_mostly fasteoi = 1; | ||
75 | module_param(fasteoi, bool, S_IRUGO); | ||
76 | |||
74 | /* | 77 | /* |
75 | * If nested=1, nested virtualization is supported, i.e., guests may use | 78 | * If nested=1, nested virtualization is supported, i.e., guests may use |
76 | * VMX and be a hypervisor for its own guests. If nested=0, guests may not | 79 | * VMX and be a hypervisor for its own guests. If nested=0, guests may not |
@@ -1748,6 +1751,21 @@ static u64 guest_read_tsc(void) | |||
1748 | } | 1751 | } |
1749 | 1752 | ||
1750 | /* | 1753 | /* |
1754 | * Like guest_read_tsc, but always returns L1's notion of the timestamp | ||
1755 | * counter, even if a nested guest (L2) is currently running. | ||
1756 | */ | ||
1757 | u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu) | ||
1758 | { | ||
1759 | u64 host_tsc, tsc_offset; | ||
1760 | |||
1761 | rdtscll(host_tsc); | ||
1762 | tsc_offset = is_guest_mode(vcpu) ? | ||
1763 | to_vmx(vcpu)->nested.vmcs01_tsc_offset : | ||
1764 | vmcs_read64(TSC_OFFSET); | ||
1765 | return host_tsc + tsc_offset; | ||
1766 | } | ||
1767 | |||
1768 | /* | ||
1751 | * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ | 1769 | * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ |
1752 | * ioctl. In this case the call-back should update internal vmx state to make | 1770 | * ioctl. In this case the call-back should update internal vmx state to make |
1753 | * the changes effective. | 1771 | * the changes effective. |
@@ -1762,15 +1780,23 @@ static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) | |||
1762 | */ | 1780 | */ |
1763 | static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) | 1781 | static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) |
1764 | { | 1782 | { |
1765 | vmcs_write64(TSC_OFFSET, offset); | 1783 | if (is_guest_mode(vcpu)) { |
1766 | if (is_guest_mode(vcpu)) | ||
1767 | /* | 1784 | /* |
1768 | * We're here if L1 chose not to trap the TSC MSR. Since | 1785 | * We're here if L1 chose not to trap WRMSR to TSC. According |
1769 | * prepare_vmcs12() does not copy tsc_offset, we need to also | 1786 | * to the spec, this should set L1's TSC; The offset that L1 |
1770 | * set the vmcs12 field here. | 1787 | * set for L2 remains unchanged, and still needs to be added |
1788 | * to the newly set TSC to get L2's TSC. | ||
1771 | */ | 1789 | */ |
1772 | get_vmcs12(vcpu)->tsc_offset = offset - | 1790 | struct vmcs12 *vmcs12; |
1773 | to_vmx(vcpu)->nested.vmcs01_tsc_offset; | 1791 | to_vmx(vcpu)->nested.vmcs01_tsc_offset = offset; |
1792 | /* recalculate vmcs02.TSC_OFFSET: */ | ||
1793 | vmcs12 = get_vmcs12(vcpu); | ||
1794 | vmcs_write64(TSC_OFFSET, offset + | ||
1795 | (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ? | ||
1796 | vmcs12->tsc_offset : 0)); | ||
1797 | } else { | ||
1798 | vmcs_write64(TSC_OFFSET, offset); | ||
1799 | } | ||
1774 | } | 1800 | } |
1775 | 1801 | ||
1776 | static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) | 1802 | static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) |
@@ -2736,8 +2762,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu) | |||
2736 | 2762 | ||
2737 | guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); | 2763 | guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); |
2738 | if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { | 2764 | if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { |
2739 | printk(KERN_DEBUG "%s: tss fixup for long mode. \n", | 2765 | pr_debug_ratelimited("%s: tss fixup for long mode. \n", |
2740 | __func__); | 2766 | __func__); |
2741 | vmcs_write32(GUEST_TR_AR_BYTES, | 2767 | vmcs_write32(GUEST_TR_AR_BYTES, |
2742 | (guest_tr_ar & ~AR_TYPE_MASK) | 2768 | (guest_tr_ar & ~AR_TYPE_MASK) |
2743 | | AR_TYPE_BUSY_64_TSS); | 2769 | | AR_TYPE_BUSY_64_TSS); |
@@ -4115,8 +4141,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
4115 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); | 4141 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); |
4116 | if (is_page_fault(intr_info)) { | 4142 | if (is_page_fault(intr_info)) { |
4117 | /* EPT won't cause page fault directly */ | 4143 | /* EPT won't cause page fault directly */ |
4118 | if (enable_ept) | 4144 | BUG_ON(enable_ept); |
4119 | BUG(); | ||
4120 | cr2 = vmcs_readl(EXIT_QUALIFICATION); | 4145 | cr2 = vmcs_readl(EXIT_QUALIFICATION); |
4121 | trace_kvm_page_fault(cr2, error_code); | 4146 | trace_kvm_page_fault(cr2, error_code); |
4122 | 4147 | ||
@@ -4518,6 +4543,24 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu) | |||
4518 | 4543 | ||
4519 | static int handle_apic_access(struct kvm_vcpu *vcpu) | 4544 | static int handle_apic_access(struct kvm_vcpu *vcpu) |
4520 | { | 4545 | { |
4546 | if (likely(fasteoi)) { | ||
4547 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | ||
4548 | int access_type, offset; | ||
4549 | |||
4550 | access_type = exit_qualification & APIC_ACCESS_TYPE; | ||
4551 | offset = exit_qualification & APIC_ACCESS_OFFSET; | ||
4552 | /* | ||
4553 | * Sane guest uses MOV to write EOI, with written value | ||
4554 | * not cared. So make a short-circuit here by avoiding | ||
4555 | * heavy instruction emulation. | ||
4556 | */ | ||
4557 | if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) && | ||
4558 | (offset == APIC_EOI)) { | ||
4559 | kvm_lapic_set_eoi(vcpu); | ||
4560 | skip_emulated_instruction(vcpu); | ||
4561 | return 1; | ||
4562 | } | ||
4563 | } | ||
4521 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; | 4564 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; |
4522 | } | 4565 | } |
4523 | 4566 | ||
@@ -5591,8 +5634,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
5591 | return 0; | 5634 | return 0; |
5592 | 5635 | ||
5593 | if (unlikely(vmx->fail)) { | 5636 | if (unlikely(vmx->fail)) { |
5594 | printk(KERN_INFO "%s failed vm entry %x\n", | 5637 | pr_info_ratelimited("%s failed vm entry %x\n", __func__, |
5595 | __func__, vmcs_read32(VM_INSTRUCTION_ERROR)); | 5638 | vmcs_read32(VM_INSTRUCTION_ERROR)); |
5596 | return 1; | 5639 | return 1; |
5597 | } | 5640 | } |
5598 | 5641 | ||
@@ -5696,8 +5739,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
5696 | u32 exit_reason = vmx->exit_reason; | 5739 | u32 exit_reason = vmx->exit_reason; |
5697 | u32 vectoring_info = vmx->idt_vectoring_info; | 5740 | u32 vectoring_info = vmx->idt_vectoring_info; |
5698 | 5741 | ||
5699 | trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); | ||
5700 | |||
5701 | /* If guest state is invalid, start emulating */ | 5742 | /* If guest state is invalid, start emulating */ |
5702 | if (vmx->emulation_required && emulate_invalid_guest_state) | 5743 | if (vmx->emulation_required && emulate_invalid_guest_state) |
5703 | return handle_invalid_guest_state(vcpu); | 5744 | return handle_invalid_guest_state(vcpu); |
@@ -6101,6 +6142,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6101 | vmx->loaded_vmcs->launched = 1; | 6142 | vmx->loaded_vmcs->launched = 1; |
6102 | 6143 | ||
6103 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); | 6144 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); |
6145 | trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX); | ||
6104 | 6146 | ||
6105 | vmx_complete_atomic_exit(vmx); | 6147 | vmx_complete_atomic_exit(vmx); |
6106 | vmx_recover_nmi_blocking(vmx); | 6148 | vmx_recover_nmi_blocking(vmx); |
@@ -6241,49 +6283,6 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | |||
6241 | return ret; | 6283 | return ret; |
6242 | } | 6284 | } |
6243 | 6285 | ||
6244 | #define _ER(x) { EXIT_REASON_##x, #x } | ||
6245 | |||
6246 | static const struct trace_print_flags vmx_exit_reasons_str[] = { | ||
6247 | _ER(EXCEPTION_NMI), | ||
6248 | _ER(EXTERNAL_INTERRUPT), | ||
6249 | _ER(TRIPLE_FAULT), | ||
6250 | _ER(PENDING_INTERRUPT), | ||
6251 | _ER(NMI_WINDOW), | ||
6252 | _ER(TASK_SWITCH), | ||
6253 | _ER(CPUID), | ||
6254 | _ER(HLT), | ||
6255 | _ER(INVLPG), | ||
6256 | _ER(RDPMC), | ||
6257 | _ER(RDTSC), | ||
6258 | _ER(VMCALL), | ||
6259 | _ER(VMCLEAR), | ||
6260 | _ER(VMLAUNCH), | ||
6261 | _ER(VMPTRLD), | ||
6262 | _ER(VMPTRST), | ||
6263 | _ER(VMREAD), | ||
6264 | _ER(VMRESUME), | ||
6265 | _ER(VMWRITE), | ||
6266 | _ER(VMOFF), | ||
6267 | _ER(VMON), | ||
6268 | _ER(CR_ACCESS), | ||
6269 | _ER(DR_ACCESS), | ||
6270 | _ER(IO_INSTRUCTION), | ||
6271 | _ER(MSR_READ), | ||
6272 | _ER(MSR_WRITE), | ||
6273 | _ER(MWAIT_INSTRUCTION), | ||
6274 | _ER(MONITOR_INSTRUCTION), | ||
6275 | _ER(PAUSE_INSTRUCTION), | ||
6276 | _ER(MCE_DURING_VMENTRY), | ||
6277 | _ER(TPR_BELOW_THRESHOLD), | ||
6278 | _ER(APIC_ACCESS), | ||
6279 | _ER(EPT_VIOLATION), | ||
6280 | _ER(EPT_MISCONFIG), | ||
6281 | _ER(WBINVD), | ||
6282 | { -1, NULL } | ||
6283 | }; | ||
6284 | |||
6285 | #undef _ER | ||
6286 | |||
6287 | static int vmx_get_lpage_level(void) | 6286 | static int vmx_get_lpage_level(void) |
6288 | { | 6287 | { |
6289 | if (enable_ept && !cpu_has_vmx_ept_1g_page()) | 6288 | if (enable_ept && !cpu_has_vmx_ept_1g_page()) |
@@ -6514,8 +6513,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
6514 | 6513 | ||
6515 | set_cr4_guest_host_mask(vmx); | 6514 | set_cr4_guest_host_mask(vmx); |
6516 | 6515 | ||
6517 | vmcs_write64(TSC_OFFSET, | 6516 | if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) |
6518 | vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset); | 6517 | vmcs_write64(TSC_OFFSET, |
6518 | vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset); | ||
6519 | else | ||
6520 | vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); | ||
6519 | 6521 | ||
6520 | if (enable_vpid) { | 6522 | if (enable_vpid) { |
6521 | /* | 6523 | /* |
@@ -6610,9 +6612,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
6610 | if (vmcs12->vm_entry_msr_load_count > 0 || | 6612 | if (vmcs12->vm_entry_msr_load_count > 0 || |
6611 | vmcs12->vm_exit_msr_load_count > 0 || | 6613 | vmcs12->vm_exit_msr_load_count > 0 || |
6612 | vmcs12->vm_exit_msr_store_count > 0) { | 6614 | vmcs12->vm_exit_msr_store_count > 0) { |
6613 | if (printk_ratelimit()) | 6615 | pr_warn_ratelimited("%s: VMCS MSR_{LOAD,STORE} unsupported\n", |
6614 | printk(KERN_WARNING | 6616 | __func__); |
6615 | "%s: VMCS MSR_{LOAD,STORE} unsupported\n", __func__); | ||
6616 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 6617 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); |
6617 | return 1; | 6618 | return 1; |
6618 | } | 6619 | } |
@@ -6922,7 +6923,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | |||
6922 | 6923 | ||
6923 | load_vmcs12_host_state(vcpu, vmcs12); | 6924 | load_vmcs12_host_state(vcpu, vmcs12); |
6924 | 6925 | ||
6925 | /* Update TSC_OFFSET if vmx_adjust_tsc_offset() was used while L2 ran */ | 6926 | /* Update TSC_OFFSET if TSC was changed while L2 ran */ |
6926 | vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); | 6927 | vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); |
6927 | 6928 | ||
6928 | /* This is needed for same reason as it was needed in prepare_vmcs02 */ | 6929 | /* This is needed for same reason as it was needed in prepare_vmcs02 */ |
@@ -7039,7 +7040,6 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
7039 | .get_mt_mask = vmx_get_mt_mask, | 7040 | .get_mt_mask = vmx_get_mt_mask, |
7040 | 7041 | ||
7041 | .get_exit_info = vmx_get_exit_info, | 7042 | .get_exit_info = vmx_get_exit_info, |
7042 | .exit_reasons_str = vmx_exit_reasons_str, | ||
7043 | 7043 | ||
7044 | .get_lpage_level = vmx_get_lpage_level, | 7044 | .get_lpage_level = vmx_get_lpage_level, |
7045 | 7045 | ||
@@ -7055,6 +7055,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
7055 | .write_tsc_offset = vmx_write_tsc_offset, | 7055 | .write_tsc_offset = vmx_write_tsc_offset, |
7056 | .adjust_tsc_offset = vmx_adjust_tsc_offset, | 7056 | .adjust_tsc_offset = vmx_adjust_tsc_offset, |
7057 | .compute_tsc_offset = vmx_compute_tsc_offset, | 7057 | .compute_tsc_offset = vmx_compute_tsc_offset, |
7058 | .read_l1_tsc = vmx_read_l1_tsc, | ||
7058 | 7059 | ||
7059 | .set_tdp_cr3 = vmx_set_cr3, | 7060 | .set_tdp_cr3 = vmx_set_cr3, |
7060 | 7061 | ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 84a28ea45fa4..cf269096eadf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -83,6 +83,7 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); | |||
83 | static void update_cr8_intercept(struct kvm_vcpu *vcpu); | 83 | static void update_cr8_intercept(struct kvm_vcpu *vcpu); |
84 | static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | 84 | static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, |
85 | struct kvm_cpuid_entry2 __user *entries); | 85 | struct kvm_cpuid_entry2 __user *entries); |
86 | static void process_nmi(struct kvm_vcpu *vcpu); | ||
86 | 87 | ||
87 | struct kvm_x86_ops *kvm_x86_ops; | 88 | struct kvm_x86_ops *kvm_x86_ops; |
88 | EXPORT_SYMBOL_GPL(kvm_x86_ops); | 89 | EXPORT_SYMBOL_GPL(kvm_x86_ops); |
@@ -359,8 +360,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) | |||
359 | 360 | ||
360 | void kvm_inject_nmi(struct kvm_vcpu *vcpu) | 361 | void kvm_inject_nmi(struct kvm_vcpu *vcpu) |
361 | { | 362 | { |
362 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 363 | atomic_inc(&vcpu->arch.nmi_queued); |
363 | vcpu->arch.nmi_pending = 1; | 364 | kvm_make_request(KVM_REQ_NMI, vcpu); |
364 | } | 365 | } |
365 | EXPORT_SYMBOL_GPL(kvm_inject_nmi); | 366 | EXPORT_SYMBOL_GPL(kvm_inject_nmi); |
366 | 367 | ||
@@ -599,6 +600,8 @@ static bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu) | |||
599 | static void update_cpuid(struct kvm_vcpu *vcpu) | 600 | static void update_cpuid(struct kvm_vcpu *vcpu) |
600 | { | 601 | { |
601 | struct kvm_cpuid_entry2 *best; | 602 | struct kvm_cpuid_entry2 *best; |
603 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
604 | u32 timer_mode_mask; | ||
602 | 605 | ||
603 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | 606 | best = kvm_find_cpuid_entry(vcpu, 1, 0); |
604 | if (!best) | 607 | if (!best) |
@@ -610,6 +613,16 @@ static void update_cpuid(struct kvm_vcpu *vcpu) | |||
610 | if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) | 613 | if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) |
611 | best->ecx |= bit(X86_FEATURE_OSXSAVE); | 614 | best->ecx |= bit(X86_FEATURE_OSXSAVE); |
612 | } | 615 | } |
616 | |||
617 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && | ||
618 | best->function == 0x1) { | ||
619 | best->ecx |= bit(X86_FEATURE_TSC_DEADLINE_TIMER); | ||
620 | timer_mode_mask = 3 << 17; | ||
621 | } else | ||
622 | timer_mode_mask = 1 << 17; | ||
623 | |||
624 | if (apic) | ||
625 | apic->lapic_timer.timer_mode_mask = timer_mode_mask; | ||
613 | } | 626 | } |
614 | 627 | ||
615 | int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 628 | int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
@@ -825,6 +838,7 @@ static u32 msrs_to_save[] = { | |||
825 | static unsigned num_msrs_to_save; | 838 | static unsigned num_msrs_to_save; |
826 | 839 | ||
827 | static u32 emulated_msrs[] = { | 840 | static u32 emulated_msrs[] = { |
841 | MSR_IA32_TSCDEADLINE, | ||
828 | MSR_IA32_MISC_ENABLE, | 842 | MSR_IA32_MISC_ENABLE, |
829 | MSR_IA32_MCG_STATUS, | 843 | MSR_IA32_MCG_STATUS, |
830 | MSR_IA32_MCG_CTL, | 844 | MSR_IA32_MCG_CTL, |
@@ -1000,7 +1014,7 @@ static inline int kvm_tsc_changes_freq(void) | |||
1000 | return ret; | 1014 | return ret; |
1001 | } | 1015 | } |
1002 | 1016 | ||
1003 | static u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) | 1017 | u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) |
1004 | { | 1018 | { |
1005 | if (vcpu->arch.virtual_tsc_khz) | 1019 | if (vcpu->arch.virtual_tsc_khz) |
1006 | return vcpu->arch.virtual_tsc_khz; | 1020 | return vcpu->arch.virtual_tsc_khz; |
@@ -1098,7 +1112,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1098 | 1112 | ||
1099 | /* Keep irq disabled to prevent changes to the clock */ | 1113 | /* Keep irq disabled to prevent changes to the clock */ |
1100 | local_irq_save(flags); | 1114 | local_irq_save(flags); |
1101 | kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); | 1115 | tsc_timestamp = kvm_x86_ops->read_l1_tsc(v); |
1102 | kernel_ns = get_kernel_ns(); | 1116 | kernel_ns = get_kernel_ns(); |
1103 | this_tsc_khz = vcpu_tsc_khz(v); | 1117 | this_tsc_khz = vcpu_tsc_khz(v); |
1104 | if (unlikely(this_tsc_khz == 0)) { | 1118 | if (unlikely(this_tsc_khz == 0)) { |
@@ -1564,6 +1578,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1564 | break; | 1578 | break; |
1565 | case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: | 1579 | case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: |
1566 | return kvm_x2apic_msr_write(vcpu, msr, data); | 1580 | return kvm_x2apic_msr_write(vcpu, msr, data); |
1581 | case MSR_IA32_TSCDEADLINE: | ||
1582 | kvm_set_lapic_tscdeadline_msr(vcpu, data); | ||
1583 | break; | ||
1567 | case MSR_IA32_MISC_ENABLE: | 1584 | case MSR_IA32_MISC_ENABLE: |
1568 | vcpu->arch.ia32_misc_enable_msr = data; | 1585 | vcpu->arch.ia32_misc_enable_msr = data; |
1569 | break; | 1586 | break; |
@@ -1825,6 +1842,9 @@ static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1825 | return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); | 1842 | return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); |
1826 | case HV_X64_MSR_TPR: | 1843 | case HV_X64_MSR_TPR: |
1827 | return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); | 1844 | return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); |
1845 | case HV_X64_MSR_APIC_ASSIST_PAGE: | ||
1846 | data = vcpu->arch.hv_vapic; | ||
1847 | break; | ||
1828 | default: | 1848 | default: |
1829 | pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | 1849 | pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); |
1830 | return 1; | 1850 | return 1; |
@@ -1839,7 +1859,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1839 | 1859 | ||
1840 | switch (msr) { | 1860 | switch (msr) { |
1841 | case MSR_IA32_PLATFORM_ID: | 1861 | case MSR_IA32_PLATFORM_ID: |
1842 | case MSR_IA32_UCODE_REV: | ||
1843 | case MSR_IA32_EBL_CR_POWERON: | 1862 | case MSR_IA32_EBL_CR_POWERON: |
1844 | case MSR_IA32_DEBUGCTLMSR: | 1863 | case MSR_IA32_DEBUGCTLMSR: |
1845 | case MSR_IA32_LASTBRANCHFROMIP: | 1864 | case MSR_IA32_LASTBRANCHFROMIP: |
@@ -1860,6 +1879,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1860 | case MSR_FAM10H_MMIO_CONF_BASE: | 1879 | case MSR_FAM10H_MMIO_CONF_BASE: |
1861 | data = 0; | 1880 | data = 0; |
1862 | break; | 1881 | break; |
1882 | case MSR_IA32_UCODE_REV: | ||
1883 | data = 0x100000000ULL; | ||
1884 | break; | ||
1863 | case MSR_MTRRcap: | 1885 | case MSR_MTRRcap: |
1864 | data = 0x500 | KVM_NR_VAR_MTRR; | 1886 | data = 0x500 | KVM_NR_VAR_MTRR; |
1865 | break; | 1887 | break; |
@@ -1888,6 +1910,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1888 | case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: | 1910 | case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: |
1889 | return kvm_x2apic_msr_read(vcpu, msr, pdata); | 1911 | return kvm_x2apic_msr_read(vcpu, msr, pdata); |
1890 | break; | 1912 | break; |
1913 | case MSR_IA32_TSCDEADLINE: | ||
1914 | data = kvm_get_lapic_tscdeadline_msr(vcpu); | ||
1915 | break; | ||
1891 | case MSR_IA32_MISC_ENABLE: | 1916 | case MSR_IA32_MISC_ENABLE: |
1892 | data = vcpu->arch.ia32_misc_enable_msr; | 1917 | data = vcpu->arch.ia32_misc_enable_msr; |
1893 | break; | 1918 | break; |
@@ -2086,6 +2111,9 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
2086 | r = !kvm_x86_ops->cpu_has_accelerated_tpr(); | 2111 | r = !kvm_x86_ops->cpu_has_accelerated_tpr(); |
2087 | break; | 2112 | break; |
2088 | case KVM_CAP_NR_VCPUS: | 2113 | case KVM_CAP_NR_VCPUS: |
2114 | r = KVM_SOFT_MAX_VCPUS; | ||
2115 | break; | ||
2116 | case KVM_CAP_MAX_VCPUS: | ||
2089 | r = KVM_MAX_VCPUS; | 2117 | r = KVM_MAX_VCPUS; |
2090 | break; | 2118 | break; |
2091 | case KVM_CAP_NR_MEMSLOTS: | 2119 | case KVM_CAP_NR_MEMSLOTS: |
@@ -2210,7 +2238,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
2210 | s64 tsc_delta; | 2238 | s64 tsc_delta; |
2211 | u64 tsc; | 2239 | u64 tsc; |
2212 | 2240 | ||
2213 | kvm_get_msr(vcpu, MSR_IA32_TSC, &tsc); | 2241 | tsc = kvm_x86_ops->read_l1_tsc(vcpu); |
2214 | tsc_delta = !vcpu->arch.last_guest_tsc ? 0 : | 2242 | tsc_delta = !vcpu->arch.last_guest_tsc ? 0 : |
2215 | tsc - vcpu->arch.last_guest_tsc; | 2243 | tsc - vcpu->arch.last_guest_tsc; |
2216 | 2244 | ||
@@ -2234,7 +2262,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | |||
2234 | { | 2262 | { |
2235 | kvm_x86_ops->vcpu_put(vcpu); | 2263 | kvm_x86_ops->vcpu_put(vcpu); |
2236 | kvm_put_guest_fpu(vcpu); | 2264 | kvm_put_guest_fpu(vcpu); |
2237 | kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); | 2265 | vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); |
2238 | } | 2266 | } |
2239 | 2267 | ||
2240 | static int is_efer_nx(void) | 2268 | static int is_efer_nx(void) |
@@ -2819,6 +2847,7 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, | |||
2819 | static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | 2847 | static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, |
2820 | struct kvm_vcpu_events *events) | 2848 | struct kvm_vcpu_events *events) |
2821 | { | 2849 | { |
2850 | process_nmi(vcpu); | ||
2822 | events->exception.injected = | 2851 | events->exception.injected = |
2823 | vcpu->arch.exception.pending && | 2852 | vcpu->arch.exception.pending && |
2824 | !kvm_exception_is_soft(vcpu->arch.exception.nr); | 2853 | !kvm_exception_is_soft(vcpu->arch.exception.nr); |
@@ -2836,7 +2865,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
2836 | KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI); | 2865 | KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI); |
2837 | 2866 | ||
2838 | events->nmi.injected = vcpu->arch.nmi_injected; | 2867 | events->nmi.injected = vcpu->arch.nmi_injected; |
2839 | events->nmi.pending = vcpu->arch.nmi_pending; | 2868 | events->nmi.pending = vcpu->arch.nmi_pending != 0; |
2840 | events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); | 2869 | events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); |
2841 | events->nmi.pad = 0; | 2870 | events->nmi.pad = 0; |
2842 | 2871 | ||
@@ -2856,6 +2885,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2856 | | KVM_VCPUEVENT_VALID_SHADOW)) | 2885 | | KVM_VCPUEVENT_VALID_SHADOW)) |
2857 | return -EINVAL; | 2886 | return -EINVAL; |
2858 | 2887 | ||
2888 | process_nmi(vcpu); | ||
2859 | vcpu->arch.exception.pending = events->exception.injected; | 2889 | vcpu->arch.exception.pending = events->exception.injected; |
2860 | vcpu->arch.exception.nr = events->exception.nr; | 2890 | vcpu->arch.exception.nr = events->exception.nr; |
2861 | vcpu->arch.exception.has_error_code = events->exception.has_error_code; | 2891 | vcpu->arch.exception.has_error_code = events->exception.has_error_code; |
@@ -3556,7 +3586,11 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3556 | if (r) { | 3586 | if (r) { |
3557 | mutex_lock(&kvm->slots_lock); | 3587 | mutex_lock(&kvm->slots_lock); |
3558 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, | 3588 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, |
3559 | &vpic->dev); | 3589 | &vpic->dev_master); |
3590 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, | ||
3591 | &vpic->dev_slave); | ||
3592 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, | ||
3593 | &vpic->dev_eclr); | ||
3560 | mutex_unlock(&kvm->slots_lock); | 3594 | mutex_unlock(&kvm->slots_lock); |
3561 | kfree(vpic); | 3595 | kfree(vpic); |
3562 | goto create_irqchip_unlock; | 3596 | goto create_irqchip_unlock; |
@@ -4045,84 +4079,105 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, | |||
4045 | return 0; | 4079 | return 0; |
4046 | } | 4080 | } |
4047 | 4081 | ||
4048 | static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, | 4082 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, |
4049 | unsigned long addr, | 4083 | const void *val, int bytes) |
4050 | void *val, | ||
4051 | unsigned int bytes, | ||
4052 | struct x86_exception *exception) | ||
4053 | { | 4084 | { |
4054 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | 4085 | int ret; |
4055 | gpa_t gpa; | ||
4056 | int handled, ret; | ||
4057 | 4086 | ||
4087 | ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); | ||
4088 | if (ret < 0) | ||
4089 | return 0; | ||
4090 | kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1); | ||
4091 | return 1; | ||
4092 | } | ||
4093 | |||
4094 | struct read_write_emulator_ops { | ||
4095 | int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val, | ||
4096 | int bytes); | ||
4097 | int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa, | ||
4098 | void *val, int bytes); | ||
4099 | int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa, | ||
4100 | int bytes, void *val); | ||
4101 | int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa, | ||
4102 | void *val, int bytes); | ||
4103 | bool write; | ||
4104 | }; | ||
4105 | |||
4106 | static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes) | ||
4107 | { | ||
4058 | if (vcpu->mmio_read_completed) { | 4108 | if (vcpu->mmio_read_completed) { |
4059 | memcpy(val, vcpu->mmio_data, bytes); | 4109 | memcpy(val, vcpu->mmio_data, bytes); |
4060 | trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, | 4110 | trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, |
4061 | vcpu->mmio_phys_addr, *(u64 *)val); | 4111 | vcpu->mmio_phys_addr, *(u64 *)val); |
4062 | vcpu->mmio_read_completed = 0; | 4112 | vcpu->mmio_read_completed = 0; |
4063 | return X86EMUL_CONTINUE; | 4113 | return 1; |
4064 | } | 4114 | } |
4065 | 4115 | ||
4066 | ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, false); | 4116 | return 0; |
4067 | 4117 | } | |
4068 | if (ret < 0) | ||
4069 | return X86EMUL_PROPAGATE_FAULT; | ||
4070 | |||
4071 | if (ret) | ||
4072 | goto mmio; | ||
4073 | |||
4074 | if (kvm_read_guest_virt(ctxt, addr, val, bytes, exception) | ||
4075 | == X86EMUL_CONTINUE) | ||
4076 | return X86EMUL_CONTINUE; | ||
4077 | 4118 | ||
4078 | mmio: | 4119 | static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, |
4079 | /* | 4120 | void *val, int bytes) |
4080 | * Is this MMIO handled locally? | 4121 | { |
4081 | */ | 4122 | return !kvm_read_guest(vcpu->kvm, gpa, val, bytes); |
4082 | handled = vcpu_mmio_read(vcpu, gpa, bytes, val); | 4123 | } |
4083 | 4124 | ||
4084 | if (handled == bytes) | 4125 | static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, |
4085 | return X86EMUL_CONTINUE; | 4126 | void *val, int bytes) |
4127 | { | ||
4128 | return emulator_write_phys(vcpu, gpa, val, bytes); | ||
4129 | } | ||
4086 | 4130 | ||
4087 | gpa += handled; | 4131 | static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val) |
4088 | bytes -= handled; | 4132 | { |
4089 | val += handled; | 4133 | trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val); |
4134 | return vcpu_mmio_write(vcpu, gpa, bytes, val); | ||
4135 | } | ||
4090 | 4136 | ||
4137 | static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, | ||
4138 | void *val, int bytes) | ||
4139 | { | ||
4091 | trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); | 4140 | trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); |
4092 | |||
4093 | vcpu->mmio_needed = 1; | ||
4094 | vcpu->run->exit_reason = KVM_EXIT_MMIO; | ||
4095 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; | ||
4096 | vcpu->mmio_size = bytes; | ||
4097 | vcpu->run->mmio.len = min(vcpu->mmio_size, 8); | ||
4098 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0; | ||
4099 | vcpu->mmio_index = 0; | ||
4100 | |||
4101 | return X86EMUL_IO_NEEDED; | 4141 | return X86EMUL_IO_NEEDED; |
4102 | } | 4142 | } |
4103 | 4143 | ||
4104 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | 4144 | static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, |
4105 | const void *val, int bytes) | 4145 | void *val, int bytes) |
4106 | { | 4146 | { |
4107 | int ret; | 4147 | memcpy(vcpu->mmio_data, val, bytes); |
4108 | 4148 | memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8); | |
4109 | ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); | 4149 | return X86EMUL_CONTINUE; |
4110 | if (ret < 0) | ||
4111 | return 0; | ||
4112 | kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1); | ||
4113 | return 1; | ||
4114 | } | 4150 | } |
4115 | 4151 | ||
4116 | static int emulator_write_emulated_onepage(unsigned long addr, | 4152 | static struct read_write_emulator_ops read_emultor = { |
4117 | const void *val, | 4153 | .read_write_prepare = read_prepare, |
4118 | unsigned int bytes, | 4154 | .read_write_emulate = read_emulate, |
4119 | struct x86_exception *exception, | 4155 | .read_write_mmio = vcpu_mmio_read, |
4120 | struct kvm_vcpu *vcpu) | 4156 | .read_write_exit_mmio = read_exit_mmio, |
4157 | }; | ||
4158 | |||
4159 | static struct read_write_emulator_ops write_emultor = { | ||
4160 | .read_write_emulate = write_emulate, | ||
4161 | .read_write_mmio = write_mmio, | ||
4162 | .read_write_exit_mmio = write_exit_mmio, | ||
4163 | .write = true, | ||
4164 | }; | ||
4165 | |||
4166 | static int emulator_read_write_onepage(unsigned long addr, void *val, | ||
4167 | unsigned int bytes, | ||
4168 | struct x86_exception *exception, | ||
4169 | struct kvm_vcpu *vcpu, | ||
4170 | struct read_write_emulator_ops *ops) | ||
4121 | { | 4171 | { |
4122 | gpa_t gpa; | 4172 | gpa_t gpa; |
4123 | int handled, ret; | 4173 | int handled, ret; |
4174 | bool write = ops->write; | ||
4124 | 4175 | ||
4125 | ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, true); | 4176 | if (ops->read_write_prepare && |
4177 | ops->read_write_prepare(vcpu, val, bytes)) | ||
4178 | return X86EMUL_CONTINUE; | ||
4179 | |||
4180 | ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write); | ||
4126 | 4181 | ||
4127 | if (ret < 0) | 4182 | if (ret < 0) |
4128 | return X86EMUL_PROPAGATE_FAULT; | 4183 | return X86EMUL_PROPAGATE_FAULT; |
@@ -4131,15 +4186,14 @@ static int emulator_write_emulated_onepage(unsigned long addr, | |||
4131 | if (ret) | 4186 | if (ret) |
4132 | goto mmio; | 4187 | goto mmio; |
4133 | 4188 | ||
4134 | if (emulator_write_phys(vcpu, gpa, val, bytes)) | 4189 | if (ops->read_write_emulate(vcpu, gpa, val, bytes)) |
4135 | return X86EMUL_CONTINUE; | 4190 | return X86EMUL_CONTINUE; |
4136 | 4191 | ||
4137 | mmio: | 4192 | mmio: |
4138 | trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val); | ||
4139 | /* | 4193 | /* |
4140 | * Is this MMIO handled locally? | 4194 | * Is this MMIO handled locally? |
4141 | */ | 4195 | */ |
4142 | handled = vcpu_mmio_write(vcpu, gpa, bytes, val); | 4196 | handled = ops->read_write_mmio(vcpu, gpa, bytes, val); |
4143 | if (handled == bytes) | 4197 | if (handled == bytes) |
4144 | return X86EMUL_CONTINUE; | 4198 | return X86EMUL_CONTINUE; |
4145 | 4199 | ||
@@ -4148,23 +4202,20 @@ mmio: | |||
4148 | val += handled; | 4202 | val += handled; |
4149 | 4203 | ||
4150 | vcpu->mmio_needed = 1; | 4204 | vcpu->mmio_needed = 1; |
4151 | memcpy(vcpu->mmio_data, val, bytes); | ||
4152 | vcpu->run->exit_reason = KVM_EXIT_MMIO; | 4205 | vcpu->run->exit_reason = KVM_EXIT_MMIO; |
4153 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; | 4206 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; |
4154 | vcpu->mmio_size = bytes; | 4207 | vcpu->mmio_size = bytes; |
4155 | vcpu->run->mmio.len = min(vcpu->mmio_size, 8); | 4208 | vcpu->run->mmio.len = min(vcpu->mmio_size, 8); |
4156 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1; | 4209 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = write; |
4157 | memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8); | ||
4158 | vcpu->mmio_index = 0; | 4210 | vcpu->mmio_index = 0; |
4159 | 4211 | ||
4160 | return X86EMUL_CONTINUE; | 4212 | return ops->read_write_exit_mmio(vcpu, gpa, val, bytes); |
4161 | } | 4213 | } |
4162 | 4214 | ||
4163 | int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, | 4215 | int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, |
4164 | unsigned long addr, | 4216 | void *val, unsigned int bytes, |
4165 | const void *val, | 4217 | struct x86_exception *exception, |
4166 | unsigned int bytes, | 4218 | struct read_write_emulator_ops *ops) |
4167 | struct x86_exception *exception) | ||
4168 | { | 4219 | { |
4169 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | 4220 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); |
4170 | 4221 | ||
@@ -4173,16 +4224,38 @@ int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, | |||
4173 | int rc, now; | 4224 | int rc, now; |
4174 | 4225 | ||
4175 | now = -addr & ~PAGE_MASK; | 4226 | now = -addr & ~PAGE_MASK; |
4176 | rc = emulator_write_emulated_onepage(addr, val, now, exception, | 4227 | rc = emulator_read_write_onepage(addr, val, now, exception, |
4177 | vcpu); | 4228 | vcpu, ops); |
4229 | |||
4178 | if (rc != X86EMUL_CONTINUE) | 4230 | if (rc != X86EMUL_CONTINUE) |
4179 | return rc; | 4231 | return rc; |
4180 | addr += now; | 4232 | addr += now; |
4181 | val += now; | 4233 | val += now; |
4182 | bytes -= now; | 4234 | bytes -= now; |
4183 | } | 4235 | } |
4184 | return emulator_write_emulated_onepage(addr, val, bytes, exception, | 4236 | |
4185 | vcpu); | 4237 | return emulator_read_write_onepage(addr, val, bytes, exception, |
4238 | vcpu, ops); | ||
4239 | } | ||
4240 | |||
4241 | static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, | ||
4242 | unsigned long addr, | ||
4243 | void *val, | ||
4244 | unsigned int bytes, | ||
4245 | struct x86_exception *exception) | ||
4246 | { | ||
4247 | return emulator_read_write(ctxt, addr, val, bytes, | ||
4248 | exception, &read_emultor); | ||
4249 | } | ||
4250 | |||
4251 | int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, | ||
4252 | unsigned long addr, | ||
4253 | const void *val, | ||
4254 | unsigned int bytes, | ||
4255 | struct x86_exception *exception) | ||
4256 | { | ||
4257 | return emulator_read_write(ctxt, addr, (void *)val, bytes, | ||
4258 | exception, &write_emultor); | ||
4186 | } | 4259 | } |
4187 | 4260 | ||
4188 | #define CMPXCHG_TYPE(t, ptr, old, new) \ | 4261 | #define CMPXCHG_TYPE(t, ptr, old, new) \ |
@@ -4712,7 +4785,7 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) | |||
4712 | kvm_set_rflags(vcpu, ctxt->eflags); | 4785 | kvm_set_rflags(vcpu, ctxt->eflags); |
4713 | 4786 | ||
4714 | if (irq == NMI_VECTOR) | 4787 | if (irq == NMI_VECTOR) |
4715 | vcpu->arch.nmi_pending = false; | 4788 | vcpu->arch.nmi_pending = 0; |
4716 | else | 4789 | else |
4717 | vcpu->arch.interrupt.pending = false; | 4790 | vcpu->arch.interrupt.pending = false; |
4718 | 4791 | ||
@@ -4788,7 +4861,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4788 | 4861 | ||
4789 | trace_kvm_emulate_insn_start(vcpu); | 4862 | trace_kvm_emulate_insn_start(vcpu); |
4790 | ++vcpu->stat.insn_emulation; | 4863 | ++vcpu->stat.insn_emulation; |
4791 | if (r) { | 4864 | if (r != EMULATION_OK) { |
4792 | if (emulation_type & EMULTYPE_TRAP_UD) | 4865 | if (emulation_type & EMULTYPE_TRAP_UD) |
4793 | return EMULATE_FAIL; | 4866 | return EMULATE_FAIL; |
4794 | if (reexecute_instruction(vcpu, cr2)) | 4867 | if (reexecute_instruction(vcpu, cr2)) |
@@ -5521,7 +5594,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) | |||
5521 | /* try to inject new event if pending */ | 5594 | /* try to inject new event if pending */ |
5522 | if (vcpu->arch.nmi_pending) { | 5595 | if (vcpu->arch.nmi_pending) { |
5523 | if (kvm_x86_ops->nmi_allowed(vcpu)) { | 5596 | if (kvm_x86_ops->nmi_allowed(vcpu)) { |
5524 | vcpu->arch.nmi_pending = false; | 5597 | --vcpu->arch.nmi_pending; |
5525 | vcpu->arch.nmi_injected = true; | 5598 | vcpu->arch.nmi_injected = true; |
5526 | kvm_x86_ops->set_nmi(vcpu); | 5599 | kvm_x86_ops->set_nmi(vcpu); |
5527 | } | 5600 | } |
@@ -5553,10 +5626,26 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) | |||
5553 | } | 5626 | } |
5554 | } | 5627 | } |
5555 | 5628 | ||
5629 | static void process_nmi(struct kvm_vcpu *vcpu) | ||
5630 | { | ||
5631 | unsigned limit = 2; | ||
5632 | |||
5633 | /* | ||
5634 | * x86 is limited to one NMI running, and one NMI pending after it. | ||
5635 | * If an NMI is already in progress, limit further NMIs to just one. | ||
5636 | * Otherwise, allow two (and we'll inject the first one immediately). | ||
5637 | */ | ||
5638 | if (kvm_x86_ops->get_nmi_mask(vcpu) || vcpu->arch.nmi_injected) | ||
5639 | limit = 1; | ||
5640 | |||
5641 | vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0); | ||
5642 | vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit); | ||
5643 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
5644 | } | ||
5645 | |||
5556 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | 5646 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
5557 | { | 5647 | { |
5558 | int r; | 5648 | int r; |
5559 | bool nmi_pending; | ||
5560 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && | 5649 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && |
5561 | vcpu->run->request_interrupt_window; | 5650 | vcpu->run->request_interrupt_window; |
5562 | 5651 | ||
@@ -5596,6 +5685,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5596 | } | 5685 | } |
5597 | if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) | 5686 | if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) |
5598 | record_steal_time(vcpu); | 5687 | record_steal_time(vcpu); |
5688 | if (kvm_check_request(KVM_REQ_NMI, vcpu)) | ||
5689 | process_nmi(vcpu); | ||
5599 | 5690 | ||
5600 | } | 5691 | } |
5601 | 5692 | ||
@@ -5603,19 +5694,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5603 | if (unlikely(r)) | 5694 | if (unlikely(r)) |
5604 | goto out; | 5695 | goto out; |
5605 | 5696 | ||
5606 | /* | ||
5607 | * An NMI can be injected between local nmi_pending read and | ||
5608 | * vcpu->arch.nmi_pending read inside inject_pending_event(). | ||
5609 | * But in that case, KVM_REQ_EVENT will be set, which makes | ||
5610 | * the race described above benign. | ||
5611 | */ | ||
5612 | nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending); | ||
5613 | |||
5614 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { | 5697 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { |
5615 | inject_pending_event(vcpu); | 5698 | inject_pending_event(vcpu); |
5616 | 5699 | ||
5617 | /* enable NMI/IRQ window open exits if needed */ | 5700 | /* enable NMI/IRQ window open exits if needed */ |
5618 | if (nmi_pending) | 5701 | if (vcpu->arch.nmi_pending) |
5619 | kvm_x86_ops->enable_nmi_window(vcpu); | 5702 | kvm_x86_ops->enable_nmi_window(vcpu); |
5620 | else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) | 5703 | else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) |
5621 | kvm_x86_ops->enable_irq_window(vcpu); | 5704 | kvm_x86_ops->enable_irq_window(vcpu); |
@@ -5678,7 +5761,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5678 | if (hw_breakpoint_active()) | 5761 | if (hw_breakpoint_active()) |
5679 | hw_breakpoint_restore(); | 5762 | hw_breakpoint_restore(); |
5680 | 5763 | ||
5681 | kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); | 5764 | vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); |
5682 | 5765 | ||
5683 | vcpu->mode = OUTSIDE_GUEST_MODE; | 5766 | vcpu->mode = OUTSIDE_GUEST_MODE; |
5684 | smp_wmb(); | 5767 | smp_wmb(); |
@@ -6323,7 +6406,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | |||
6323 | 6406 | ||
6324 | int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | 6407 | int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) |
6325 | { | 6408 | { |
6326 | vcpu->arch.nmi_pending = false; | 6409 | atomic_set(&vcpu->arch.nmi_queued, 0); |
6410 | vcpu->arch.nmi_pending = 0; | ||
6327 | vcpu->arch.nmi_injected = false; | 6411 | vcpu->arch.nmi_injected = false; |
6328 | 6412 | ||
6329 | vcpu->arch.switch_db_regs = 0; | 6413 | vcpu->arch.switch_db_regs = 0; |
@@ -6598,7 +6682,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
6598 | !vcpu->arch.apf.halted) | 6682 | !vcpu->arch.apf.halted) |
6599 | || !list_empty_careful(&vcpu->async_pf.done) | 6683 | || !list_empty_careful(&vcpu->async_pf.done) |
6600 | || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED | 6684 | || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED |
6601 | || vcpu->arch.nmi_pending || | 6685 | || atomic_read(&vcpu->arch.nmi_queued) || |
6602 | (kvm_arch_interrupt_allowed(vcpu) && | 6686 | (kvm_arch_interrupt_allowed(vcpu) && |
6603 | kvm_cpu_has_interrupt(vcpu)); | 6687 | kvm_cpu_has_interrupt(vcpu)); |
6604 | } | 6688 | } |
diff --git a/include/linux/kvm.h b/include/linux/kvm.h index aace6b8691a2..f47fcd30273d 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h | |||
@@ -371,6 +371,7 @@ struct kvm_s390_psw { | |||
371 | #define KVM_S390_INT_VIRTIO 0xffff2603u | 371 | #define KVM_S390_INT_VIRTIO 0xffff2603u |
372 | #define KVM_S390_INT_SERVICE 0xffff2401u | 372 | #define KVM_S390_INT_SERVICE 0xffff2401u |
373 | #define KVM_S390_INT_EMERGENCY 0xffff1201u | 373 | #define KVM_S390_INT_EMERGENCY 0xffff1201u |
374 | #define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u | ||
374 | 375 | ||
375 | struct kvm_s390_interrupt { | 376 | struct kvm_s390_interrupt { |
376 | __u32 type; | 377 | __u32 type; |
@@ -463,7 +464,7 @@ struct kvm_ppc_pvinfo { | |||
463 | #define KVM_CAP_VAPIC 6 | 464 | #define KVM_CAP_VAPIC 6 |
464 | #define KVM_CAP_EXT_CPUID 7 | 465 | #define KVM_CAP_EXT_CPUID 7 |
465 | #define KVM_CAP_CLOCKSOURCE 8 | 466 | #define KVM_CAP_CLOCKSOURCE 8 |
466 | #define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */ | 467 | #define KVM_CAP_NR_VCPUS 9 /* returns recommended max vcpus per vm */ |
467 | #define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */ | 468 | #define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */ |
468 | #define KVM_CAP_PIT 11 | 469 | #define KVM_CAP_PIT 11 |
469 | #define KVM_CAP_NOP_IO_DELAY 12 | 470 | #define KVM_CAP_NOP_IO_DELAY 12 |
@@ -553,6 +554,9 @@ struct kvm_ppc_pvinfo { | |||
553 | #define KVM_CAP_SPAPR_TCE 63 | 554 | #define KVM_CAP_SPAPR_TCE 63 |
554 | #define KVM_CAP_PPC_SMT 64 | 555 | #define KVM_CAP_PPC_SMT 64 |
555 | #define KVM_CAP_PPC_RMA 65 | 556 | #define KVM_CAP_PPC_RMA 65 |
557 | #define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */ | ||
558 | #define KVM_CAP_PPC_HIOR 67 | ||
559 | #define KVM_CAP_PPC_PAPR 68 | ||
556 | #define KVM_CAP_S390_GMAP 71 | 560 | #define KVM_CAP_S390_GMAP 71 |
557 | 561 | ||
558 | #ifdef KVM_CAP_IRQ_ROUTING | 562 | #ifdef KVM_CAP_IRQ_ROUTING |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index eabb21a30c34..d52623199978 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/msi.h> | 18 | #include <linux/msi.h> |
19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | #include <linux/rcupdate.h> | 20 | #include <linux/rcupdate.h> |
21 | #include <linux/ratelimit.h> | ||
21 | #include <asm/signal.h> | 22 | #include <asm/signal.h> |
22 | 23 | ||
23 | #include <linux/kvm.h> | 24 | #include <linux/kvm.h> |
@@ -48,6 +49,7 @@ | |||
48 | #define KVM_REQ_EVENT 11 | 49 | #define KVM_REQ_EVENT 11 |
49 | #define KVM_REQ_APF_HALT 12 | 50 | #define KVM_REQ_APF_HALT 12 |
50 | #define KVM_REQ_STEAL_UPDATE 13 | 51 | #define KVM_REQ_STEAL_UPDATE 13 |
52 | #define KVM_REQ_NMI 14 | ||
51 | 53 | ||
52 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 | 54 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 |
53 | 55 | ||
@@ -55,16 +57,16 @@ struct kvm; | |||
55 | struct kvm_vcpu; | 57 | struct kvm_vcpu; |
56 | extern struct kmem_cache *kvm_vcpu_cache; | 58 | extern struct kmem_cache *kvm_vcpu_cache; |
57 | 59 | ||
58 | /* | 60 | struct kvm_io_range { |
59 | * It would be nice to use something smarter than a linear search, TBD... | 61 | gpa_t addr; |
60 | * Thankfully we dont expect many devices to register (famous last words :), | 62 | int len; |
61 | * so until then it will suffice. At least its abstracted so we can change | 63 | struct kvm_io_device *dev; |
62 | * in one place. | 64 | }; |
63 | */ | 65 | |
64 | struct kvm_io_bus { | 66 | struct kvm_io_bus { |
65 | int dev_count; | 67 | int dev_count; |
66 | #define NR_IOBUS_DEVS 200 | 68 | #define NR_IOBUS_DEVS 300 |
67 | struct kvm_io_device *devs[NR_IOBUS_DEVS]; | 69 | struct kvm_io_range range[NR_IOBUS_DEVS]; |
68 | }; | 70 | }; |
69 | 71 | ||
70 | enum kvm_bus { | 72 | enum kvm_bus { |
@@ -77,8 +79,8 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
77 | int len, const void *val); | 79 | int len, const void *val); |
78 | int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, | 80 | int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, |
79 | void *val); | 81 | void *val); |
80 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, | 82 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
81 | struct kvm_io_device *dev); | 83 | int len, struct kvm_io_device *dev); |
82 | int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | 84 | int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
83 | struct kvm_io_device *dev); | 85 | struct kvm_io_device *dev); |
84 | 86 | ||
@@ -256,8 +258,9 @@ struct kvm { | |||
256 | struct kvm_arch arch; | 258 | struct kvm_arch arch; |
257 | atomic_t users_count; | 259 | atomic_t users_count; |
258 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 260 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
259 | struct kvm_coalesced_mmio_dev *coalesced_mmio_dev; | ||
260 | struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; | 261 | struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; |
262 | spinlock_t ring_lock; | ||
263 | struct list_head coalesced_zones; | ||
261 | #endif | 264 | #endif |
262 | 265 | ||
263 | struct mutex irq_lock; | 266 | struct mutex irq_lock; |
@@ -281,11 +284,8 @@ struct kvm { | |||
281 | 284 | ||
282 | /* The guest did something we don't support. */ | 285 | /* The guest did something we don't support. */ |
283 | #define pr_unimpl(vcpu, fmt, ...) \ | 286 | #define pr_unimpl(vcpu, fmt, ...) \ |
284 | do { \ | 287 | pr_err_ratelimited("kvm: %i: cpu%i " fmt, \ |
285 | if (printk_ratelimit()) \ | 288 | current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__) |
286 | printk(KERN_ERR "kvm: %i: cpu%i " fmt, \ | ||
287 | current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__); \ | ||
288 | } while (0) | ||
289 | 289 | ||
290 | #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt) | 290 | #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt) |
291 | #define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt) | 291 | #define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt) |
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index eaf3a50f9769..3ad0925d23a9 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c | |||
@@ -58,8 +58,6 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel | |||
58 | static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id) | 58 | static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id) |
59 | { | 59 | { |
60 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | 60 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; |
61 | u32 vector; | ||
62 | int index; | ||
63 | 61 | ||
64 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_INTX) { | 62 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_INTX) { |
65 | spin_lock(&assigned_dev->intx_lock); | 63 | spin_lock(&assigned_dev->intx_lock); |
@@ -68,31 +66,35 @@ static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id) | |||
68 | spin_unlock(&assigned_dev->intx_lock); | 66 | spin_unlock(&assigned_dev->intx_lock); |
69 | } | 67 | } |
70 | 68 | ||
71 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | 69 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, |
72 | index = find_index_from_host_irq(assigned_dev, irq); | 70 | assigned_dev->guest_irq, 1); |
73 | if (index >= 0) { | 71 | |
74 | vector = assigned_dev-> | 72 | return IRQ_HANDLED; |
75 | guest_msix_entries[index].vector; | 73 | } |
76 | kvm_set_irq(assigned_dev->kvm, | 74 | |
77 | assigned_dev->irq_source_id, vector, 1); | 75 | #ifdef __KVM_HAVE_MSIX |
78 | } | 76 | static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) |
79 | } else | 77 | { |
78 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | ||
79 | int index = find_index_from_host_irq(assigned_dev, irq); | ||
80 | u32 vector; | ||
81 | |||
82 | if (index >= 0) { | ||
83 | vector = assigned_dev->guest_msix_entries[index].vector; | ||
80 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | 84 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, |
81 | assigned_dev->guest_irq, 1); | 85 | vector, 1); |
86 | } | ||
82 | 87 | ||
83 | return IRQ_HANDLED; | 88 | return IRQ_HANDLED; |
84 | } | 89 | } |
90 | #endif | ||
85 | 91 | ||
86 | /* Ack the irq line for an assigned device */ | 92 | /* Ack the irq line for an assigned device */ |
87 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | 93 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) |
88 | { | 94 | { |
89 | struct kvm_assigned_dev_kernel *dev; | 95 | struct kvm_assigned_dev_kernel *dev = |
90 | 96 | container_of(kian, struct kvm_assigned_dev_kernel, | |
91 | if (kian->gsi == -1) | 97 | ack_notifier); |
92 | return; | ||
93 | |||
94 | dev = container_of(kian, struct kvm_assigned_dev_kernel, | ||
95 | ack_notifier); | ||
96 | 98 | ||
97 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); | 99 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); |
98 | 100 | ||
@@ -110,8 +112,9 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | |||
110 | static void deassign_guest_irq(struct kvm *kvm, | 112 | static void deassign_guest_irq(struct kvm *kvm, |
111 | struct kvm_assigned_dev_kernel *assigned_dev) | 113 | struct kvm_assigned_dev_kernel *assigned_dev) |
112 | { | 114 | { |
113 | kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); | 115 | if (assigned_dev->ack_notifier.gsi != -1) |
114 | assigned_dev->ack_notifier.gsi = -1; | 116 | kvm_unregister_irq_ack_notifier(kvm, |
117 | &assigned_dev->ack_notifier); | ||
115 | 118 | ||
116 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | 119 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, |
117 | assigned_dev->guest_irq, 0); | 120 | assigned_dev->guest_irq, 0); |
@@ -143,7 +146,7 @@ static void deassign_host_irq(struct kvm *kvm, | |||
143 | 146 | ||
144 | for (i = 0; i < assigned_dev->entries_nr; i++) | 147 | for (i = 0; i < assigned_dev->entries_nr; i++) |
145 | free_irq(assigned_dev->host_msix_entries[i].vector, | 148 | free_irq(assigned_dev->host_msix_entries[i].vector, |
146 | (void *)assigned_dev); | 149 | assigned_dev); |
147 | 150 | ||
148 | assigned_dev->entries_nr = 0; | 151 | assigned_dev->entries_nr = 0; |
149 | kfree(assigned_dev->host_msix_entries); | 152 | kfree(assigned_dev->host_msix_entries); |
@@ -153,7 +156,7 @@ static void deassign_host_irq(struct kvm *kvm, | |||
153 | /* Deal with MSI and INTx */ | 156 | /* Deal with MSI and INTx */ |
154 | disable_irq(assigned_dev->host_irq); | 157 | disable_irq(assigned_dev->host_irq); |
155 | 158 | ||
156 | free_irq(assigned_dev->host_irq, (void *)assigned_dev); | 159 | free_irq(assigned_dev->host_irq, assigned_dev); |
157 | 160 | ||
158 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) | 161 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) |
159 | pci_disable_msi(assigned_dev->dev); | 162 | pci_disable_msi(assigned_dev->dev); |
@@ -239,7 +242,7 @@ static int assigned_device_enable_host_intx(struct kvm *kvm, | |||
239 | * are going to be long delays in accepting, acking, etc. | 242 | * are going to be long delays in accepting, acking, etc. |
240 | */ | 243 | */ |
241 | if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, | 244 | if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, |
242 | IRQF_ONESHOT, dev->irq_name, (void *)dev)) | 245 | IRQF_ONESHOT, dev->irq_name, dev)) |
243 | return -EIO; | 246 | return -EIO; |
244 | return 0; | 247 | return 0; |
245 | } | 248 | } |
@@ -258,7 +261,7 @@ static int assigned_device_enable_host_msi(struct kvm *kvm, | |||
258 | 261 | ||
259 | dev->host_irq = dev->dev->irq; | 262 | dev->host_irq = dev->dev->irq; |
260 | if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, | 263 | if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, |
261 | 0, dev->irq_name, (void *)dev)) { | 264 | 0, dev->irq_name, dev)) { |
262 | pci_disable_msi(dev->dev); | 265 | pci_disable_msi(dev->dev); |
263 | return -EIO; | 266 | return -EIO; |
264 | } | 267 | } |
@@ -284,8 +287,8 @@ static int assigned_device_enable_host_msix(struct kvm *kvm, | |||
284 | 287 | ||
285 | for (i = 0; i < dev->entries_nr; i++) { | 288 | for (i = 0; i < dev->entries_nr; i++) { |
286 | r = request_threaded_irq(dev->host_msix_entries[i].vector, | 289 | r = request_threaded_irq(dev->host_msix_entries[i].vector, |
287 | NULL, kvm_assigned_dev_thread, | 290 | NULL, kvm_assigned_dev_thread_msix, |
288 | 0, dev->irq_name, (void *)dev); | 291 | 0, dev->irq_name, dev); |
289 | if (r) | 292 | if (r) |
290 | goto err; | 293 | goto err; |
291 | } | 294 | } |
@@ -293,7 +296,7 @@ static int assigned_device_enable_host_msix(struct kvm *kvm, | |||
293 | return 0; | 296 | return 0; |
294 | err: | 297 | err: |
295 | for (i -= 1; i >= 0; i--) | 298 | for (i -= 1; i >= 0; i--) |
296 | free_irq(dev->host_msix_entries[i].vector, (void *)dev); | 299 | free_irq(dev->host_msix_entries[i].vector, dev); |
297 | pci_disable_msix(dev->dev); | 300 | pci_disable_msix(dev->dev); |
298 | return r; | 301 | return r; |
299 | } | 302 | } |
@@ -406,7 +409,8 @@ static int assign_guest_irq(struct kvm *kvm, | |||
406 | 409 | ||
407 | if (!r) { | 410 | if (!r) { |
408 | dev->irq_requested_type |= guest_irq_type; | 411 | dev->irq_requested_type |= guest_irq_type; |
409 | kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); | 412 | if (dev->ack_notifier.gsi != -1) |
413 | kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); | ||
410 | } else | 414 | } else |
411 | kvm_free_irq_source_id(kvm, dev->irq_source_id); | 415 | kvm_free_irq_source_id(kvm, dev->irq_source_id); |
412 | 416 | ||
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index fc8487564d1f..a6ec206f36ba 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c | |||
@@ -24,10 +24,19 @@ static inline struct kvm_coalesced_mmio_dev *to_mmio(struct kvm_io_device *dev) | |||
24 | static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, | 24 | static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, |
25 | gpa_t addr, int len) | 25 | gpa_t addr, int len) |
26 | { | 26 | { |
27 | struct kvm_coalesced_mmio_zone *zone; | 27 | /* is it in a batchable area ? |
28 | * (addr,len) is fully included in | ||
29 | * (zone->addr, zone->size) | ||
30 | */ | ||
31 | |||
32 | return (dev->zone.addr <= addr && | ||
33 | addr + len <= dev->zone.addr + dev->zone.size); | ||
34 | } | ||
35 | |||
36 | static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev) | ||
37 | { | ||
28 | struct kvm_coalesced_mmio_ring *ring; | 38 | struct kvm_coalesced_mmio_ring *ring; |
29 | unsigned avail; | 39 | unsigned avail; |
30 | int i; | ||
31 | 40 | ||
32 | /* Are we able to batch it ? */ | 41 | /* Are we able to batch it ? */ |
33 | 42 | ||
@@ -37,25 +46,12 @@ static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, | |||
37 | */ | 46 | */ |
38 | ring = dev->kvm->coalesced_mmio_ring; | 47 | ring = dev->kvm->coalesced_mmio_ring; |
39 | avail = (ring->first - ring->last - 1) % KVM_COALESCED_MMIO_MAX; | 48 | avail = (ring->first - ring->last - 1) % KVM_COALESCED_MMIO_MAX; |
40 | if (avail < KVM_MAX_VCPUS) { | 49 | if (avail == 0) { |
41 | /* full */ | 50 | /* full */ |
42 | return 0; | 51 | return 0; |
43 | } | 52 | } |
44 | 53 | ||
45 | /* is it in a batchable area ? */ | 54 | return 1; |
46 | |||
47 | for (i = 0; i < dev->nb_zones; i++) { | ||
48 | zone = &dev->zone[i]; | ||
49 | |||
50 | /* (addr,len) is fully included in | ||
51 | * (zone->addr, zone->size) | ||
52 | */ | ||
53 | |||
54 | if (zone->addr <= addr && | ||
55 | addr + len <= zone->addr + zone->size) | ||
56 | return 1; | ||
57 | } | ||
58 | return 0; | ||
59 | } | 55 | } |
60 | 56 | ||
61 | static int coalesced_mmio_write(struct kvm_io_device *this, | 57 | static int coalesced_mmio_write(struct kvm_io_device *this, |
@@ -63,10 +59,16 @@ static int coalesced_mmio_write(struct kvm_io_device *this, | |||
63 | { | 59 | { |
64 | struct kvm_coalesced_mmio_dev *dev = to_mmio(this); | 60 | struct kvm_coalesced_mmio_dev *dev = to_mmio(this); |
65 | struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; | 61 | struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; |
62 | |||
66 | if (!coalesced_mmio_in_range(dev, addr, len)) | 63 | if (!coalesced_mmio_in_range(dev, addr, len)) |
67 | return -EOPNOTSUPP; | 64 | return -EOPNOTSUPP; |
68 | 65 | ||
69 | spin_lock(&dev->lock); | 66 | spin_lock(&dev->kvm->ring_lock); |
67 | |||
68 | if (!coalesced_mmio_has_room(dev)) { | ||
69 | spin_unlock(&dev->kvm->ring_lock); | ||
70 | return -EOPNOTSUPP; | ||
71 | } | ||
70 | 72 | ||
71 | /* copy data in first free entry of the ring */ | 73 | /* copy data in first free entry of the ring */ |
72 | 74 | ||
@@ -75,7 +77,7 @@ static int coalesced_mmio_write(struct kvm_io_device *this, | |||
75 | memcpy(ring->coalesced_mmio[ring->last].data, val, len); | 77 | memcpy(ring->coalesced_mmio[ring->last].data, val, len); |
76 | smp_wmb(); | 78 | smp_wmb(); |
77 | ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX; | 79 | ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX; |
78 | spin_unlock(&dev->lock); | 80 | spin_unlock(&dev->kvm->ring_lock); |
79 | return 0; | 81 | return 0; |
80 | } | 82 | } |
81 | 83 | ||
@@ -83,6 +85,8 @@ static void coalesced_mmio_destructor(struct kvm_io_device *this) | |||
83 | { | 85 | { |
84 | struct kvm_coalesced_mmio_dev *dev = to_mmio(this); | 86 | struct kvm_coalesced_mmio_dev *dev = to_mmio(this); |
85 | 87 | ||
88 | list_del(&dev->list); | ||
89 | |||
86 | kfree(dev); | 90 | kfree(dev); |
87 | } | 91 | } |
88 | 92 | ||
@@ -93,7 +97,6 @@ static const struct kvm_io_device_ops coalesced_mmio_ops = { | |||
93 | 97 | ||
94 | int kvm_coalesced_mmio_init(struct kvm *kvm) | 98 | int kvm_coalesced_mmio_init(struct kvm *kvm) |
95 | { | 99 | { |
96 | struct kvm_coalesced_mmio_dev *dev; | ||
97 | struct page *page; | 100 | struct page *page; |
98 | int ret; | 101 | int ret; |
99 | 102 | ||
@@ -101,31 +104,18 @@ int kvm_coalesced_mmio_init(struct kvm *kvm) | |||
101 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 104 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
102 | if (!page) | 105 | if (!page) |
103 | goto out_err; | 106 | goto out_err; |
104 | kvm->coalesced_mmio_ring = page_address(page); | ||
105 | |||
106 | ret = -ENOMEM; | ||
107 | dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); | ||
108 | if (!dev) | ||
109 | goto out_free_page; | ||
110 | spin_lock_init(&dev->lock); | ||
111 | kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); | ||
112 | dev->kvm = kvm; | ||
113 | kvm->coalesced_mmio_dev = dev; | ||
114 | 107 | ||
115 | mutex_lock(&kvm->slots_lock); | 108 | ret = 0; |
116 | ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev); | 109 | kvm->coalesced_mmio_ring = page_address(page); |
117 | mutex_unlock(&kvm->slots_lock); | ||
118 | if (ret < 0) | ||
119 | goto out_free_dev; | ||
120 | 110 | ||
121 | return ret; | 111 | /* |
112 | * We're using this spinlock to sync access to the coalesced ring. | ||
113 | * The list doesn't need it's own lock since device registration and | ||
114 | * unregistration should only happen when kvm->slots_lock is held. | ||
115 | */ | ||
116 | spin_lock_init(&kvm->ring_lock); | ||
117 | INIT_LIST_HEAD(&kvm->coalesced_zones); | ||
122 | 118 | ||
123 | out_free_dev: | ||
124 | kvm->coalesced_mmio_dev = NULL; | ||
125 | kfree(dev); | ||
126 | out_free_page: | ||
127 | kvm->coalesced_mmio_ring = NULL; | ||
128 | __free_page(page); | ||
129 | out_err: | 119 | out_err: |
130 | return ret; | 120 | return ret; |
131 | } | 121 | } |
@@ -139,51 +129,50 @@ void kvm_coalesced_mmio_free(struct kvm *kvm) | |||
139 | int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, | 129 | int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, |
140 | struct kvm_coalesced_mmio_zone *zone) | 130 | struct kvm_coalesced_mmio_zone *zone) |
141 | { | 131 | { |
142 | struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; | 132 | int ret; |
133 | struct kvm_coalesced_mmio_dev *dev; | ||
143 | 134 | ||
144 | if (dev == NULL) | 135 | dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); |
145 | return -ENXIO; | 136 | if (!dev) |
137 | return -ENOMEM; | ||
138 | |||
139 | kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); | ||
140 | dev->kvm = kvm; | ||
141 | dev->zone = *zone; | ||
146 | 142 | ||
147 | mutex_lock(&kvm->slots_lock); | 143 | mutex_lock(&kvm->slots_lock); |
148 | if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { | 144 | ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, zone->addr, |
149 | mutex_unlock(&kvm->slots_lock); | 145 | zone->size, &dev->dev); |
150 | return -ENOBUFS; | 146 | if (ret < 0) |
151 | } | 147 | goto out_free_dev; |
148 | list_add_tail(&dev->list, &kvm->coalesced_zones); | ||
149 | mutex_unlock(&kvm->slots_lock); | ||
152 | 150 | ||
153 | dev->zone[dev->nb_zones] = *zone; | 151 | return ret; |
154 | dev->nb_zones++; | ||
155 | 152 | ||
153 | out_free_dev: | ||
156 | mutex_unlock(&kvm->slots_lock); | 154 | mutex_unlock(&kvm->slots_lock); |
155 | |||
156 | kfree(dev); | ||
157 | |||
158 | if (dev == NULL) | ||
159 | return -ENXIO; | ||
160 | |||
157 | return 0; | 161 | return 0; |
158 | } | 162 | } |
159 | 163 | ||
160 | int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, | 164 | int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, |
161 | struct kvm_coalesced_mmio_zone *zone) | 165 | struct kvm_coalesced_mmio_zone *zone) |
162 | { | 166 | { |
163 | int i; | 167 | struct kvm_coalesced_mmio_dev *dev, *tmp; |
164 | struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; | ||
165 | struct kvm_coalesced_mmio_zone *z; | ||
166 | |||
167 | if (dev == NULL) | ||
168 | return -ENXIO; | ||
169 | 168 | ||
170 | mutex_lock(&kvm->slots_lock); | 169 | mutex_lock(&kvm->slots_lock); |
171 | 170 | ||
172 | i = dev->nb_zones; | 171 | list_for_each_entry_safe(dev, tmp, &kvm->coalesced_zones, list) |
173 | while (i) { | 172 | if (coalesced_mmio_in_range(dev, zone->addr, zone->size)) { |
174 | z = &dev->zone[i - 1]; | 173 | kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dev->dev); |
175 | 174 | kvm_iodevice_destructor(&dev->dev); | |
176 | /* unregister all zones | ||
177 | * included in (zone->addr, zone->size) | ||
178 | */ | ||
179 | |||
180 | if (zone->addr <= z->addr && | ||
181 | z->addr + z->size <= zone->addr + zone->size) { | ||
182 | dev->nb_zones--; | ||
183 | *z = dev->zone[dev->nb_zones]; | ||
184 | } | 175 | } |
185 | i--; | ||
186 | } | ||
187 | 176 | ||
188 | mutex_unlock(&kvm->slots_lock); | 177 | mutex_unlock(&kvm->slots_lock); |
189 | 178 | ||
diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h index 8a5959e3535f..b280c20444d1 100644 --- a/virt/kvm/coalesced_mmio.h +++ b/virt/kvm/coalesced_mmio.h | |||
@@ -12,14 +12,13 @@ | |||
12 | 12 | ||
13 | #ifdef CONFIG_KVM_MMIO | 13 | #ifdef CONFIG_KVM_MMIO |
14 | 14 | ||
15 | #define KVM_COALESCED_MMIO_ZONE_MAX 100 | 15 | #include <linux/list.h> |
16 | 16 | ||
17 | struct kvm_coalesced_mmio_dev { | 17 | struct kvm_coalesced_mmio_dev { |
18 | struct list_head list; | ||
18 | struct kvm_io_device dev; | 19 | struct kvm_io_device dev; |
19 | struct kvm *kvm; | 20 | struct kvm *kvm; |
20 | spinlock_t lock; | 21 | struct kvm_coalesced_mmio_zone zone; |
21 | int nb_zones; | ||
22 | struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX]; | ||
23 | }; | 22 | }; |
24 | 23 | ||
25 | int kvm_coalesced_mmio_init(struct kvm *kvm); | 24 | int kvm_coalesced_mmio_init(struct kvm *kvm); |
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 73358d256fa2..f59c1e8de7a2 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
@@ -586,7 +586,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | |||
586 | 586 | ||
587 | kvm_iodevice_init(&p->dev, &ioeventfd_ops); | 587 | kvm_iodevice_init(&p->dev, &ioeventfd_ops); |
588 | 588 | ||
589 | ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev); | 589 | ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length, |
590 | &p->dev); | ||
590 | if (ret < 0) | 591 | if (ret < 0) |
591 | goto unlock_fail; | 592 | goto unlock_fail; |
592 | 593 | ||
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 8df1ca104a7f..3eed61eb4867 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c | |||
@@ -394,7 +394,8 @@ int kvm_ioapic_init(struct kvm *kvm) | |||
394 | kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); | 394 | kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); |
395 | ioapic->kvm = kvm; | 395 | ioapic->kvm = kvm; |
396 | mutex_lock(&kvm->slots_lock); | 396 | mutex_lock(&kvm->slots_lock); |
397 | ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); | 397 | ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address, |
398 | IOAPIC_MEM_LENGTH, &ioapic->dev); | ||
398 | mutex_unlock(&kvm->slots_lock); | 399 | mutex_unlock(&kvm->slots_lock); |
399 | if (ret < 0) { | 400 | if (ret < 0) { |
400 | kvm->arch.vioapic = NULL; | 401 | kvm->arch.vioapic = NULL; |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index aefdda390f5e..d9cfb782cb81 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -47,6 +47,8 @@ | |||
47 | #include <linux/srcu.h> | 47 | #include <linux/srcu.h> |
48 | #include <linux/hugetlb.h> | 48 | #include <linux/hugetlb.h> |
49 | #include <linux/slab.h> | 49 | #include <linux/slab.h> |
50 | #include <linux/sort.h> | ||
51 | #include <linux/bsearch.h> | ||
50 | 52 | ||
51 | #include <asm/processor.h> | 53 | #include <asm/processor.h> |
52 | #include <asm/io.h> | 54 | #include <asm/io.h> |
@@ -2391,24 +2393,92 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) | |||
2391 | int i; | 2393 | int i; |
2392 | 2394 | ||
2393 | for (i = 0; i < bus->dev_count; i++) { | 2395 | for (i = 0; i < bus->dev_count; i++) { |
2394 | struct kvm_io_device *pos = bus->devs[i]; | 2396 | struct kvm_io_device *pos = bus->range[i].dev; |
2395 | 2397 | ||
2396 | kvm_iodevice_destructor(pos); | 2398 | kvm_iodevice_destructor(pos); |
2397 | } | 2399 | } |
2398 | kfree(bus); | 2400 | kfree(bus); |
2399 | } | 2401 | } |
2400 | 2402 | ||
2403 | int kvm_io_bus_sort_cmp(const void *p1, const void *p2) | ||
2404 | { | ||
2405 | const struct kvm_io_range *r1 = p1; | ||
2406 | const struct kvm_io_range *r2 = p2; | ||
2407 | |||
2408 | if (r1->addr < r2->addr) | ||
2409 | return -1; | ||
2410 | if (r1->addr + r1->len > r2->addr + r2->len) | ||
2411 | return 1; | ||
2412 | return 0; | ||
2413 | } | ||
2414 | |||
2415 | int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, | ||
2416 | gpa_t addr, int len) | ||
2417 | { | ||
2418 | if (bus->dev_count == NR_IOBUS_DEVS) | ||
2419 | return -ENOSPC; | ||
2420 | |||
2421 | bus->range[bus->dev_count++] = (struct kvm_io_range) { | ||
2422 | .addr = addr, | ||
2423 | .len = len, | ||
2424 | .dev = dev, | ||
2425 | }; | ||
2426 | |||
2427 | sort(bus->range, bus->dev_count, sizeof(struct kvm_io_range), | ||
2428 | kvm_io_bus_sort_cmp, NULL); | ||
2429 | |||
2430 | return 0; | ||
2431 | } | ||
2432 | |||
2433 | int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, | ||
2434 | gpa_t addr, int len) | ||
2435 | { | ||
2436 | struct kvm_io_range *range, key; | ||
2437 | int off; | ||
2438 | |||
2439 | key = (struct kvm_io_range) { | ||
2440 | .addr = addr, | ||
2441 | .len = len, | ||
2442 | }; | ||
2443 | |||
2444 | range = bsearch(&key, bus->range, bus->dev_count, | ||
2445 | sizeof(struct kvm_io_range), kvm_io_bus_sort_cmp); | ||
2446 | if (range == NULL) | ||
2447 | return -ENOENT; | ||
2448 | |||
2449 | off = range - bus->range; | ||
2450 | |||
2451 | while (off > 0 && kvm_io_bus_sort_cmp(&key, &bus->range[off-1]) == 0) | ||
2452 | off--; | ||
2453 | |||
2454 | return off; | ||
2455 | } | ||
2456 | |||
2401 | /* kvm_io_bus_write - called under kvm->slots_lock */ | 2457 | /* kvm_io_bus_write - called under kvm->slots_lock */ |
2402 | int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | 2458 | int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
2403 | int len, const void *val) | 2459 | int len, const void *val) |
2404 | { | 2460 | { |
2405 | int i; | 2461 | int idx; |
2406 | struct kvm_io_bus *bus; | 2462 | struct kvm_io_bus *bus; |
2463 | struct kvm_io_range range; | ||
2464 | |||
2465 | range = (struct kvm_io_range) { | ||
2466 | .addr = addr, | ||
2467 | .len = len, | ||
2468 | }; | ||
2407 | 2469 | ||
2408 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | 2470 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); |
2409 | for (i = 0; i < bus->dev_count; i++) | 2471 | idx = kvm_io_bus_get_first_dev(bus, addr, len); |
2410 | if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) | 2472 | if (idx < 0) |
2473 | return -EOPNOTSUPP; | ||
2474 | |||
2475 | while (idx < bus->dev_count && | ||
2476 | kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) { | ||
2477 | if (!kvm_iodevice_write(bus->range[idx].dev, addr, len, val)) | ||
2411 | return 0; | 2478 | return 0; |
2479 | idx++; | ||
2480 | } | ||
2481 | |||
2412 | return -EOPNOTSUPP; | 2482 | return -EOPNOTSUPP; |
2413 | } | 2483 | } |
2414 | 2484 | ||
@@ -2416,19 +2486,33 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
2416 | int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | 2486 | int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
2417 | int len, void *val) | 2487 | int len, void *val) |
2418 | { | 2488 | { |
2419 | int i; | 2489 | int idx; |
2420 | struct kvm_io_bus *bus; | 2490 | struct kvm_io_bus *bus; |
2491 | struct kvm_io_range range; | ||
2492 | |||
2493 | range = (struct kvm_io_range) { | ||
2494 | .addr = addr, | ||
2495 | .len = len, | ||
2496 | }; | ||
2421 | 2497 | ||
2422 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | 2498 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); |
2423 | for (i = 0; i < bus->dev_count; i++) | 2499 | idx = kvm_io_bus_get_first_dev(bus, addr, len); |
2424 | if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) | 2500 | if (idx < 0) |
2501 | return -EOPNOTSUPP; | ||
2502 | |||
2503 | while (idx < bus->dev_count && | ||
2504 | kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) { | ||
2505 | if (!kvm_iodevice_read(bus->range[idx].dev, addr, len, val)) | ||
2425 | return 0; | 2506 | return 0; |
2507 | idx++; | ||
2508 | } | ||
2509 | |||
2426 | return -EOPNOTSUPP; | 2510 | return -EOPNOTSUPP; |
2427 | } | 2511 | } |
2428 | 2512 | ||
2429 | /* Caller must hold slots_lock. */ | 2513 | /* Caller must hold slots_lock. */ |
2430 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, | 2514 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
2431 | struct kvm_io_device *dev) | 2515 | int len, struct kvm_io_device *dev) |
2432 | { | 2516 | { |
2433 | struct kvm_io_bus *new_bus, *bus; | 2517 | struct kvm_io_bus *new_bus, *bus; |
2434 | 2518 | ||
@@ -2440,7 +2524,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, | |||
2440 | if (!new_bus) | 2524 | if (!new_bus) |
2441 | return -ENOMEM; | 2525 | return -ENOMEM; |
2442 | memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); | 2526 | memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); |
2443 | new_bus->devs[new_bus->dev_count++] = dev; | 2527 | kvm_io_bus_insert_dev(new_bus, dev, addr, len); |
2444 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); | 2528 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); |
2445 | synchronize_srcu_expedited(&kvm->srcu); | 2529 | synchronize_srcu_expedited(&kvm->srcu); |
2446 | kfree(bus); | 2530 | kfree(bus); |
@@ -2464,9 +2548,13 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | |||
2464 | 2548 | ||
2465 | r = -ENOENT; | 2549 | r = -ENOENT; |
2466 | for (i = 0; i < new_bus->dev_count; i++) | 2550 | for (i = 0; i < new_bus->dev_count; i++) |
2467 | if (new_bus->devs[i] == dev) { | 2551 | if (new_bus->range[i].dev == dev) { |
2468 | r = 0; | 2552 | r = 0; |
2469 | new_bus->devs[i] = new_bus->devs[--new_bus->dev_count]; | 2553 | new_bus->dev_count--; |
2554 | new_bus->range[i] = new_bus->range[new_bus->dev_count]; | ||
2555 | sort(new_bus->range, new_bus->dev_count, | ||
2556 | sizeof(struct kvm_io_range), | ||
2557 | kvm_io_bus_sort_cmp, NULL); | ||
2470 | break; | 2558 | break; |
2471 | } | 2559 | } |
2472 | 2560 | ||