diff options
60 files changed, 2449 insertions, 1369 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 93413ce96883..27e0488d54d2 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
| @@ -1201,6 +1201,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
| 1201 | [KVM,Intel] Disable FlexPriority feature (TPR shadow). | 1201 | [KVM,Intel] Disable FlexPriority feature (TPR shadow). |
| 1202 | Default is 1 (enabled) | 1202 | Default is 1 (enabled) |
| 1203 | 1203 | ||
| 1204 | kvm-intel.nested= | ||
| 1205 | [KVM,Intel] Enable VMX nesting (nVMX). | ||
| 1206 | Default is 0 (disabled) | ||
| 1207 | |||
| 1204 | kvm-intel.unrestricted_guest= | 1208 | kvm-intel.unrestricted_guest= |
| 1205 | [KVM,Intel] Disable unrestricted guest feature | 1209 | [KVM,Intel] Disable unrestricted guest feature |
| 1206 | (virtualized real and unpaged mode) on capable | 1210 | (virtualized real and unpaged mode) on capable |
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index b0e4b9cd6a66..7945b0bd35e2 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
| @@ -175,10 +175,30 @@ Parameters: vcpu id (apic id on x86) | |||
| 175 | Returns: vcpu fd on success, -1 on error | 175 | Returns: vcpu fd on success, -1 on error |
| 176 | 176 | ||
| 177 | This API adds a vcpu to a virtual machine. The vcpu id is a small integer | 177 | This API adds a vcpu to a virtual machine. The vcpu id is a small integer |
| 178 | in the range [0, max_vcpus). You can use KVM_CAP_NR_VCPUS of the | 178 | in the range [0, max_vcpus). |
| 179 | KVM_CHECK_EXTENSION ioctl() to determine the value for max_vcpus at run-time. | 179 | |
| 180 | The recommended max_vcpus value can be retrieved using the KVM_CAP_NR_VCPUS of | ||
| 181 | the KVM_CHECK_EXTENSION ioctl() at run-time. | ||
| 182 | The maximum possible value for max_vcpus can be retrieved using the | ||
| 183 | KVM_CAP_MAX_VCPUS of the KVM_CHECK_EXTENSION ioctl() at run-time. | ||
| 184 | |||
| 180 | If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4 | 185 | If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4 |
| 181 | cpus max. | 186 | cpus max. |
| 187 | If the KVM_CAP_MAX_VCPUS does not exist, you should assume that max_vcpus is | ||
| 188 | same as the value returned from KVM_CAP_NR_VCPUS. | ||
| 189 | |||
| 190 | On powerpc using book3s_hv mode, the vcpus are mapped onto virtual | ||
| 191 | threads in one or more virtual CPU cores. (This is because the | ||
| 192 | hardware requires all the hardware threads in a CPU core to be in the | ||
| 193 | same partition.) The KVM_CAP_PPC_SMT capability indicates the number | ||
| 194 | of vcpus per virtual core (vcore). The vcore id is obtained by | ||
| 195 | dividing the vcpu id by the number of vcpus per vcore. The vcpus in a | ||
| 196 | given vcore will always be in the same physical core as each other | ||
| 197 | (though that might be a different physical core from time to time). | ||
| 198 | Userspace can control the threading (SMT) mode of the guest by its | ||
| 199 | allocation of vcpu ids. For example, if userspace wants | ||
| 200 | single-threaded guest vcpus, it should make all vcpu ids be a multiple | ||
| 201 | of the number of vcpus per vcore. | ||
| 182 | 202 | ||
| 183 | On powerpc using book3s_hv mode, the vcpus are mapped onto virtual | 203 | On powerpc using book3s_hv mode, the vcpus are mapped onto virtual |
| 184 | threads in one or more virtual CPU cores. (This is because the | 204 | threads in one or more virtual CPU cores. (This is because the |
| @@ -1633,3 +1653,50 @@ developer registration required to access it). | |||
| 1633 | char padding[256]; | 1653 | char padding[256]; |
| 1634 | }; | 1654 | }; |
| 1635 | }; | 1655 | }; |
| 1656 | |||
| 1657 | 6. Capabilities that can be enabled | ||
| 1658 | |||
| 1659 | There are certain capabilities that change the behavior of the virtual CPU when | ||
| 1660 | enabled. To enable them, please see section 4.37. Below you can find a list of | ||
| 1661 | capabilities and what their effect on the vCPU is when enabling them. | ||
| 1662 | |||
| 1663 | The following information is provided along with the description: | ||
| 1664 | |||
| 1665 | Architectures: which instruction set architectures provide this ioctl. | ||
| 1666 | x86 includes both i386 and x86_64. | ||
| 1667 | |||
| 1668 | Parameters: what parameters are accepted by the capability. | ||
| 1669 | |||
| 1670 | Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL) | ||
| 1671 | are not detailed, but errors with specific meanings are. | ||
| 1672 | |||
| 1673 | 6.1 KVM_CAP_PPC_OSI | ||
| 1674 | |||
| 1675 | Architectures: ppc | ||
| 1676 | Parameters: none | ||
| 1677 | Returns: 0 on success; -1 on error | ||
| 1678 | |||
| 1679 | This capability enables interception of OSI hypercalls that otherwise would | ||
| 1680 | be treated as normal system calls to be injected into the guest. OSI hypercalls | ||
| 1681 | were invented by Mac-on-Linux to have a standardized communication mechanism | ||
| 1682 | between the guest and the host. | ||
| 1683 | |||
| 1684 | When this capability is enabled, KVM_EXIT_OSI can occur. | ||
| 1685 | |||
| 1686 | 6.2 KVM_CAP_PPC_PAPR | ||
| 1687 | |||
| 1688 | Architectures: ppc | ||
| 1689 | Parameters: none | ||
| 1690 | Returns: 0 on success; -1 on error | ||
| 1691 | |||
| 1692 | This capability enables interception of PAPR hypercalls. PAPR hypercalls are | ||
| 1693 | done using the hypercall instruction "sc 1". | ||
| 1694 | |||
| 1695 | It also sets the guest privilege level to "supervisor" mode. Usually the guest | ||
| 1696 | runs in "hypervisor" privilege mode with a few missing features. | ||
| 1697 | |||
| 1698 | In addition to the above, it changes the semantics of SDR1. In this mode, the | ||
| 1699 | HTAB address part of SDR1 contains an HVA instead of a GPA, as PAPR keeps the | ||
| 1700 | HTAB invisible to the guest. | ||
| 1701 | |||
| 1702 | When this capability is enabled, KVM_EXIT_PAPR_HCALL can occur. | ||
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index a4f6c85431f8..08fe69edcd10 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h | |||
| @@ -149,6 +149,12 @@ struct kvm_regs { | |||
| 149 | #define KVM_SREGS_E_UPDATE_DBSR (1 << 3) | 149 | #define KVM_SREGS_E_UPDATE_DBSR (1 << 3) |
| 150 | 150 | ||
| 151 | /* | 151 | /* |
| 152 | * Book3S special bits to indicate contents in the struct by maintaining | ||
| 153 | * backwards compatibility with older structs. If adding a new field, | ||
| 154 | * please make sure to add a flag for that new field */ | ||
| 155 | #define KVM_SREGS_S_HIOR (1 << 0) | ||
| 156 | |||
| 157 | /* | ||
| 152 | * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a | 158 | * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a |
| 153 | * previous KVM_GET_REGS. | 159 | * previous KVM_GET_REGS. |
| 154 | * | 160 | * |
| @@ -173,6 +179,8 @@ struct kvm_sregs { | |||
| 173 | __u64 ibat[8]; | 179 | __u64 ibat[8]; |
| 174 | __u64 dbat[8]; | 180 | __u64 dbat[8]; |
| 175 | } ppc32; | 181 | } ppc32; |
| 182 | __u64 flags; /* KVM_SREGS_S_ */ | ||
| 183 | __u64 hior; | ||
| 176 | } s; | 184 | } s; |
| 177 | struct { | 185 | struct { |
| 178 | union { | 186 | union { |
| @@ -276,6 +284,11 @@ struct kvm_guest_debug_arch { | |||
| 276 | #define KVM_INTERRUPT_UNSET -2U | 284 | #define KVM_INTERRUPT_UNSET -2U |
| 277 | #define KVM_INTERRUPT_SET_LEVEL -3U | 285 | #define KVM_INTERRUPT_SET_LEVEL -3U |
| 278 | 286 | ||
| 287 | #define KVM_CPU_440 1 | ||
| 288 | #define KVM_CPU_E500V2 2 | ||
| 289 | #define KVM_CPU_3S_32 3 | ||
| 290 | #define KVM_CPU_3S_64 4 | ||
| 291 | |||
| 279 | /* for KVM_CAP_SPAPR_TCE */ | 292 | /* for KVM_CAP_SPAPR_TCE */ |
| 280 | struct kvm_create_spapr_tce { | 293 | struct kvm_create_spapr_tce { |
| 281 | __u64 liobn; | 294 | __u64 liobn; |
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 98da010252a3..a384ffdf33de 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h | |||
| @@ -90,6 +90,8 @@ struct kvmppc_vcpu_book3s { | |||
| 90 | #endif | 90 | #endif |
| 91 | int context_id[SID_CONTEXTS]; | 91 | int context_id[SID_CONTEXTS]; |
| 92 | 92 | ||
| 93 | bool hior_sregs; /* HIOR is set by SREGS, not PVR */ | ||
| 94 | |||
| 93 | struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE]; | 95 | struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE]; |
| 94 | struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG]; | 96 | struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG]; |
| 95 | struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE]; | 97 | struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE]; |
| @@ -139,15 +141,14 @@ extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); | |||
| 139 | extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); | 141 | extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); |
| 140 | extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); | 142 | extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); |
| 141 | 143 | ||
| 142 | extern void kvmppc_handler_lowmem_trampoline(void); | 144 | extern void kvmppc_entry_trampoline(void); |
| 143 | extern void kvmppc_handler_trampoline_enter(void); | ||
| 144 | extern void kvmppc_rmcall(ulong srr0, ulong srr1); | ||
| 145 | extern void kvmppc_hv_entry_trampoline(void); | 145 | extern void kvmppc_hv_entry_trampoline(void); |
| 146 | extern void kvmppc_load_up_fpu(void); | 146 | extern void kvmppc_load_up_fpu(void); |
| 147 | extern void kvmppc_load_up_altivec(void); | 147 | extern void kvmppc_load_up_altivec(void); |
| 148 | extern void kvmppc_load_up_vsx(void); | 148 | extern void kvmppc_load_up_vsx(void); |
| 149 | extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst); | 149 | extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst); |
| 150 | extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst); | 150 | extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst); |
| 151 | extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd); | ||
| 151 | 152 | ||
| 152 | static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) | 153 | static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) |
| 153 | { | 154 | { |
| @@ -382,6 +383,39 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) | |||
| 382 | } | 383 | } |
| 383 | #endif | 384 | #endif |
| 384 | 385 | ||
| 386 | static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, | ||
| 387 | unsigned long pte_index) | ||
| 388 | { | ||
| 389 | unsigned long rb, va_low; | ||
| 390 | |||
| 391 | rb = (v & ~0x7fUL) << 16; /* AVA field */ | ||
| 392 | va_low = pte_index >> 3; | ||
| 393 | if (v & HPTE_V_SECONDARY) | ||
| 394 | va_low = ~va_low; | ||
| 395 | /* xor vsid from AVA */ | ||
| 396 | if (!(v & HPTE_V_1TB_SEG)) | ||
| 397 | va_low ^= v >> 12; | ||
| 398 | else | ||
| 399 | va_low ^= v >> 24; | ||
| 400 | va_low &= 0x7ff; | ||
| 401 | if (v & HPTE_V_LARGE) { | ||
| 402 | rb |= 1; /* L field */ | ||
| 403 | if (cpu_has_feature(CPU_FTR_ARCH_206) && | ||
| 404 | (r & 0xff000)) { | ||
| 405 | /* non-16MB large page, must be 64k */ | ||
| 406 | /* (masks depend on page size) */ | ||
| 407 | rb |= 0x1000; /* page encoding in LP field */ | ||
| 408 | rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */ | ||
| 409 | rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */ | ||
| 410 | } | ||
| 411 | } else { | ||
| 412 | /* 4kB page */ | ||
| 413 | rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */ | ||
| 414 | } | ||
| 415 | rb |= (v >> 54) & 0x300; /* B field */ | ||
| 416 | return rb; | ||
| 417 | } | ||
| 418 | |||
| 385 | /* Magic register values loaded into r3 and r4 before the 'sc' assembly | 419 | /* Magic register values loaded into r3 and r4 before the 'sc' assembly |
| 386 | * instruction for the OSI hypercalls */ | 420 | * instruction for the OSI hypercalls */ |
| 387 | #define OSI_SC_MAGIC_R3 0x113724FA | 421 | #define OSI_SC_MAGIC_R3 0x113724FA |
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index ef7b3688c3b6..1f2f5b6156bd 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h | |||
| @@ -75,6 +75,8 @@ struct kvmppc_host_state { | |||
| 75 | ulong scratch0; | 75 | ulong scratch0; |
| 76 | ulong scratch1; | 76 | ulong scratch1; |
| 77 | u8 in_guest; | 77 | u8 in_guest; |
| 78 | u8 restore_hid5; | ||
| 79 | u8 napping; | ||
| 78 | 80 | ||
| 79 | #ifdef CONFIG_KVM_BOOK3S_64_HV | 81 | #ifdef CONFIG_KVM_BOOK3S_64_HV |
| 80 | struct kvm_vcpu *kvm_vcpu; | 82 | struct kvm_vcpu *kvm_vcpu; |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index cc22b282d755..bf8af5d5d5dc 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
| @@ -198,21 +198,29 @@ struct kvm_arch { | |||
| 198 | */ | 198 | */ |
| 199 | struct kvmppc_vcore { | 199 | struct kvmppc_vcore { |
| 200 | int n_runnable; | 200 | int n_runnable; |
| 201 | int n_blocked; | 201 | int n_busy; |
| 202 | int num_threads; | 202 | int num_threads; |
| 203 | int entry_exit_count; | 203 | int entry_exit_count; |
| 204 | int n_woken; | 204 | int n_woken; |
| 205 | int nap_count; | 205 | int nap_count; |
| 206 | int napping_threads; | ||
| 206 | u16 pcpu; | 207 | u16 pcpu; |
| 207 | u8 vcore_running; | 208 | u8 vcore_state; |
| 208 | u8 in_guest; | 209 | u8 in_guest; |
| 209 | struct list_head runnable_threads; | 210 | struct list_head runnable_threads; |
| 210 | spinlock_t lock; | 211 | spinlock_t lock; |
| 212 | wait_queue_head_t wq; | ||
| 211 | }; | 213 | }; |
| 212 | 214 | ||
| 213 | #define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) | 215 | #define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) |
| 214 | #define VCORE_EXIT_COUNT(vc) ((vc)->entry_exit_count >> 8) | 216 | #define VCORE_EXIT_COUNT(vc) ((vc)->entry_exit_count >> 8) |
| 215 | 217 | ||
| 218 | /* Values for vcore_state */ | ||
| 219 | #define VCORE_INACTIVE 0 | ||
| 220 | #define VCORE_RUNNING 1 | ||
| 221 | #define VCORE_EXITING 2 | ||
| 222 | #define VCORE_SLEEPING 3 | ||
| 223 | |||
| 216 | struct kvmppc_pte { | 224 | struct kvmppc_pte { |
| 217 | ulong eaddr; | 225 | ulong eaddr; |
| 218 | u64 vpage; | 226 | u64 vpage; |
| @@ -258,14 +266,6 @@ struct kvm_vcpu_arch { | |||
| 258 | ulong host_stack; | 266 | ulong host_stack; |
| 259 | u32 host_pid; | 267 | u32 host_pid; |
| 260 | #ifdef CONFIG_PPC_BOOK3S | 268 | #ifdef CONFIG_PPC_BOOK3S |
| 261 | ulong host_msr; | ||
| 262 | ulong host_r2; | ||
| 263 | void *host_retip; | ||
| 264 | ulong trampoline_lowmem; | ||
| 265 | ulong trampoline_enter; | ||
| 266 | ulong highmem_handler; | ||
| 267 | ulong rmcall; | ||
| 268 | ulong host_paca_phys; | ||
| 269 | struct kvmppc_slb slb[64]; | 269 | struct kvmppc_slb slb[64]; |
| 270 | int slb_max; /* 1 + index of last valid entry in slb[] */ | 270 | int slb_max; /* 1 + index of last valid entry in slb[] */ |
| 271 | int slb_nr; /* total number of entries in SLB */ | 271 | int slb_nr; /* total number of entries in SLB */ |
| @@ -389,6 +389,9 @@ struct kvm_vcpu_arch { | |||
| 389 | u8 dcr_is_write; | 389 | u8 dcr_is_write; |
| 390 | u8 osi_needed; | 390 | u8 osi_needed; |
| 391 | u8 osi_enabled; | 391 | u8 osi_enabled; |
| 392 | u8 papr_enabled; | ||
| 393 | u8 sane; | ||
| 394 | u8 cpu_type; | ||
| 392 | u8 hcall_needed; | 395 | u8 hcall_needed; |
| 393 | 396 | ||
| 394 | u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ | 397 | u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ |
| @@ -408,11 +411,13 @@ struct kvm_vcpu_arch { | |||
| 408 | struct dtl *dtl; | 411 | struct dtl *dtl; |
| 409 | struct dtl *dtl_end; | 412 | struct dtl *dtl_end; |
| 410 | 413 | ||
| 414 | wait_queue_head_t *wqp; | ||
| 411 | struct kvmppc_vcore *vcore; | 415 | struct kvmppc_vcore *vcore; |
| 412 | int ret; | 416 | int ret; |
| 413 | int trap; | 417 | int trap; |
| 414 | int state; | 418 | int state; |
| 415 | int ptid; | 419 | int ptid; |
| 420 | bool timer_running; | ||
| 416 | wait_queue_head_t cpu_run; | 421 | wait_queue_head_t cpu_run; |
| 417 | 422 | ||
| 418 | struct kvm_vcpu_arch_shared *shared; | 423 | struct kvm_vcpu_arch_shared *shared; |
| @@ -428,8 +433,9 @@ struct kvm_vcpu_arch { | |||
| 428 | #endif | 433 | #endif |
| 429 | }; | 434 | }; |
| 430 | 435 | ||
| 431 | #define KVMPPC_VCPU_BUSY_IN_HOST 0 | 436 | /* Values for vcpu->arch.state */ |
| 432 | #define KVMPPC_VCPU_BLOCKED 1 | 437 | #define KVMPPC_VCPU_STOPPED 0 |
| 438 | #define KVMPPC_VCPU_BUSY_IN_HOST 1 | ||
| 433 | #define KVMPPC_VCPU_RUNNABLE 2 | 439 | #define KVMPPC_VCPU_RUNNABLE 2 |
| 434 | 440 | ||
| 435 | #endif /* __POWERPC_KVM_HOST_H__ */ | 441 | #endif /* __POWERPC_KVM_HOST_H__ */ |
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index d121f49d62b8..46efd1a265c9 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
| @@ -66,6 +66,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run, | |||
| 66 | extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); | 66 | extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); |
| 67 | extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); | 67 | extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); |
| 68 | extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); | 68 | extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); |
| 69 | extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu); | ||
| 69 | 70 | ||
| 70 | /* Core-specific hooks */ | 71 | /* Core-specific hooks */ |
| 71 | 72 | ||
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 5f078bc2063e..69f7ffe7f674 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
| @@ -44,6 +44,7 @@ | |||
| 44 | #include <asm/compat.h> | 44 | #include <asm/compat.h> |
| 45 | #include <asm/mmu.h> | 45 | #include <asm/mmu.h> |
| 46 | #include <asm/hvcall.h> | 46 | #include <asm/hvcall.h> |
| 47 | #include <asm/xics.h> | ||
| 47 | #endif | 48 | #endif |
| 48 | #ifdef CONFIG_PPC_ISERIES | 49 | #ifdef CONFIG_PPC_ISERIES |
| 49 | #include <asm/iseries/alpaca.h> | 50 | #include <asm/iseries/alpaca.h> |
| @@ -449,8 +450,6 @@ int main(void) | |||
| 449 | #ifdef CONFIG_PPC_BOOK3S | 450 | #ifdef CONFIG_PPC_BOOK3S |
| 450 | DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); | 451 | DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); |
| 451 | DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); | 452 | DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); |
| 452 | DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip)); | ||
| 453 | DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr)); | ||
| 454 | DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr)); | 453 | DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr)); |
| 455 | DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr)); | 454 | DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr)); |
| 456 | DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr)); | 455 | DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr)); |
| @@ -458,14 +457,12 @@ int main(void) | |||
| 458 | DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor)); | 457 | DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor)); |
| 459 | DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl)); | 458 | DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl)); |
| 460 | DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr)); | 459 | DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr)); |
| 461 | DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem)); | ||
| 462 | DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter)); | ||
| 463 | DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); | ||
| 464 | DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall)); | ||
| 465 | DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); | 460 | DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); |
| 466 | DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec)); | 461 | DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec)); |
| 467 | DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires)); | 462 | DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires)); |
| 468 | DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions)); | 463 | DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions)); |
| 464 | DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded)); | ||
| 465 | DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded)); | ||
| 469 | DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa)); | 466 | DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa)); |
| 470 | DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); | 467 | DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); |
| 471 | DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); | 468 | DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); |
| @@ -481,6 +478,7 @@ int main(void) | |||
| 481 | DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count)); | 478 | DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count)); |
| 482 | DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count)); | 479 | DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count)); |
| 483 | DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); | 480 | DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); |
| 481 | DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads)); | ||
| 484 | DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - | 482 | DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - |
| 485 | offsetof(struct kvmppc_vcpu_book3s, vcpu)); | 483 | offsetof(struct kvmppc_vcpu_book3s, vcpu)); |
| 486 | DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); | 484 | DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); |
| @@ -537,6 +535,8 @@ int main(void) | |||
| 537 | HSTATE_FIELD(HSTATE_SCRATCH0, scratch0); | 535 | HSTATE_FIELD(HSTATE_SCRATCH0, scratch0); |
| 538 | HSTATE_FIELD(HSTATE_SCRATCH1, scratch1); | 536 | HSTATE_FIELD(HSTATE_SCRATCH1, scratch1); |
| 539 | HSTATE_FIELD(HSTATE_IN_GUEST, in_guest); | 537 | HSTATE_FIELD(HSTATE_IN_GUEST, in_guest); |
| 538 | HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5); | ||
| 539 | HSTATE_FIELD(HSTATE_NAPPING, napping); | ||
| 540 | 540 | ||
| 541 | #ifdef CONFIG_KVM_BOOK3S_64_HV | 541 | #ifdef CONFIG_KVM_BOOK3S_64_HV |
| 542 | HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); | 542 | HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); |
| @@ -549,6 +549,7 @@ int main(void) | |||
| 549 | HSTATE_FIELD(HSTATE_DSCR, host_dscr); | 549 | HSTATE_FIELD(HSTATE_DSCR, host_dscr); |
| 550 | HSTATE_FIELD(HSTATE_DABR, dabr); | 550 | HSTATE_FIELD(HSTATE_DABR, dabr); |
| 551 | HSTATE_FIELD(HSTATE_DECEXP, dec_expires); | 551 | HSTATE_FIELD(HSTATE_DECEXP, dec_expires); |
| 552 | DEFINE(IPI_PRIORITY, IPI_PRIORITY); | ||
| 552 | #endif /* CONFIG_KVM_BOOK3S_64_HV */ | 553 | #endif /* CONFIG_KVM_BOOK3S_64_HV */ |
| 553 | 554 | ||
| 554 | #else /* CONFIG_PPC_BOOK3S */ | 555 | #else /* CONFIG_PPC_BOOK3S */ |
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 41b02c792aa3..29ddd8b1c274 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S | |||
| @@ -427,16 +427,6 @@ slb_miss_user_pseries: | |||
| 427 | b . /* prevent spec. execution */ | 427 | b . /* prevent spec. execution */ |
| 428 | #endif /* __DISABLED__ */ | 428 | #endif /* __DISABLED__ */ |
| 429 | 429 | ||
| 430 | /* KVM's trampoline code needs to be close to the interrupt handlers */ | ||
| 431 | |||
| 432 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER | ||
| 433 | #ifdef CONFIG_KVM_BOOK3S_PR | ||
| 434 | #include "../kvm/book3s_rmhandlers.S" | ||
| 435 | #else | ||
| 436 | #include "../kvm/book3s_hv_rmhandlers.S" | ||
| 437 | #endif | ||
| 438 | #endif | ||
| 439 | |||
| 440 | .align 7 | 430 | .align 7 |
| 441 | .globl __end_interrupts | 431 | .globl __end_interrupts |
| 442 | __end_interrupts: | 432 | __end_interrupts: |
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index da3a1225c0ac..ca1f88b3dc59 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c | |||
| @@ -78,6 +78,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) | |||
| 78 | for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) | 78 | for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) |
| 79 | vcpu_44x->shadow_refs[i].gtlb_index = -1; | 79 | vcpu_44x->shadow_refs[i].gtlb_index = -1; |
| 80 | 80 | ||
| 81 | vcpu->arch.cpu_type = KVM_CPU_440; | ||
| 82 | |||
| 81 | return 0; | 83 | return 0; |
| 82 | } | 84 | } |
| 83 | 85 | ||
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 08428e2c188d..3688aeecc4b2 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile | |||
| @@ -43,18 +43,22 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ | |||
| 43 | fpu.o \ | 43 | fpu.o \ |
| 44 | book3s_paired_singles.o \ | 44 | book3s_paired_singles.o \ |
| 45 | book3s_pr.o \ | 45 | book3s_pr.o \ |
| 46 | book3s_pr_papr.o \ | ||
| 46 | book3s_emulate.o \ | 47 | book3s_emulate.o \ |
| 47 | book3s_interrupts.o \ | 48 | book3s_interrupts.o \ |
| 48 | book3s_mmu_hpte.o \ | 49 | book3s_mmu_hpte.o \ |
| 49 | book3s_64_mmu_host.o \ | 50 | book3s_64_mmu_host.o \ |
| 50 | book3s_64_mmu.o \ | 51 | book3s_64_mmu.o \ |
| 51 | book3s_32_mmu.o | 52 | book3s_32_mmu.o |
| 53 | kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ | ||
| 54 | book3s_rmhandlers.o | ||
| 52 | 55 | ||
| 53 | kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ | 56 | kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ |
| 54 | book3s_hv.o \ | 57 | book3s_hv.o \ |
| 55 | book3s_hv_interrupts.o \ | 58 | book3s_hv_interrupts.o \ |
| 56 | book3s_64_mmu_hv.o | 59 | book3s_64_mmu_hv.o |
| 57 | kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ | 60 | kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ |
| 61 | book3s_hv_rmhandlers.o \ | ||
| 58 | book3s_hv_rm_mmu.o \ | 62 | book3s_hv_rm_mmu.o \ |
| 59 | book3s_64_vio_hv.o \ | 63 | book3s_64_vio_hv.o \ |
| 60 | book3s_hv_builtin.o | 64 | book3s_hv_builtin.o |
diff --git a/arch/powerpc/kvm/book3s_32_sr.S b/arch/powerpc/kvm/book3s_32_sr.S index 3608471ad2d8..7e06a6fc8d07 100644 --- a/arch/powerpc/kvm/book3s_32_sr.S +++ b/arch/powerpc/kvm/book3s_32_sr.S | |||
| @@ -31,7 +31,7 @@ | |||
| 31 | * R1 = host R1 | 31 | * R1 = host R1 |
| 32 | * R2 = host R2 | 32 | * R2 = host R2 |
| 33 | * R3 = shadow vcpu | 33 | * R3 = shadow vcpu |
| 34 | * all other volatile GPRS = free | 34 | * all other volatile GPRS = free except R4, R6 |
| 35 | * SVCPU[CR] = guest CR | 35 | * SVCPU[CR] = guest CR |
| 36 | * SVCPU[XER] = guest XER | 36 | * SVCPU[XER] = guest XER |
| 37 | * SVCPU[CTR] = guest CTR | 37 | * SVCPU[CTR] = guest CTR |
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index c6d3e194b6b4..b871721c0050 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c | |||
| @@ -128,7 +128,13 @@ static hva_t kvmppc_mmu_book3s_64_get_pteg( | |||
| 128 | dprintk("MMU: page=0x%x sdr1=0x%llx pteg=0x%llx vsid=0x%llx\n", | 128 | dprintk("MMU: page=0x%x sdr1=0x%llx pteg=0x%llx vsid=0x%llx\n", |
| 129 | page, vcpu_book3s->sdr1, pteg, slbe->vsid); | 129 | page, vcpu_book3s->sdr1, pteg, slbe->vsid); |
| 130 | 130 | ||
| 131 | r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT); | 131 | /* When running a PAPR guest, SDR1 contains a HVA address instead |
| 132 | of a GPA */ | ||
| 133 | if (vcpu_book3s->vcpu.arch.papr_enabled) | ||
| 134 | r = pteg; | ||
| 135 | else | ||
| 136 | r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT); | ||
| 137 | |||
| 132 | if (kvm_is_error_hva(r)) | 138 | if (kvm_is_error_hva(r)) |
| 133 | return r; | 139 | return r; |
| 134 | return r | (pteg & ~PAGE_MASK); | 140 | return r | (pteg & ~PAGE_MASK); |
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S index 04e7d3bbfe8b..f2e6e48ea463 100644 --- a/arch/powerpc/kvm/book3s_64_slb.S +++ b/arch/powerpc/kvm/book3s_64_slb.S | |||
| @@ -53,7 +53,7 @@ slb_exit_skip_ ## num: | |||
| 53 | * R1 = host R1 | 53 | * R1 = host R1 |
| 54 | * R2 = host R2 | 54 | * R2 = host R2 |
| 55 | * R3 = shadow vcpu | 55 | * R3 = shadow vcpu |
| 56 | * all other volatile GPRS = free | 56 | * all other volatile GPRS = free except R4, R6 |
| 57 | * SVCPU[CR] = guest CR | 57 | * SVCPU[CR] = guest CR |
| 58 | * SVCPU[XER] = guest XER | 58 | * SVCPU[XER] = guest XER |
| 59 | * SVCPU[CTR] = guest CTR | 59 | * SVCPU[CTR] = guest CTR |
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 466846557089..0c9dc62532d0 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c | |||
| @@ -63,6 +63,25 @@ | |||
| 63 | * function pointers, so let's just disable the define. */ | 63 | * function pointers, so let's just disable the define. */ |
| 64 | #undef mfsrin | 64 | #undef mfsrin |
| 65 | 65 | ||
| 66 | enum priv_level { | ||
| 67 | PRIV_PROBLEM = 0, | ||
| 68 | PRIV_SUPER = 1, | ||
| 69 | PRIV_HYPER = 2, | ||
| 70 | }; | ||
| 71 | |||
| 72 | static bool spr_allowed(struct kvm_vcpu *vcpu, enum priv_level level) | ||
| 73 | { | ||
| 74 | /* PAPR VMs only access supervisor SPRs */ | ||
| 75 | if (vcpu->arch.papr_enabled && (level > PRIV_SUPER)) | ||
| 76 | return false; | ||
| 77 | |||
| 78 | /* Limit user space to its own small SPR set */ | ||
| 79 | if ((vcpu->arch.shared->msr & MSR_PR) && level > PRIV_PROBLEM) | ||
| 80 | return false; | ||
| 81 | |||
| 82 | return true; | ||
| 83 | } | ||
| 84 | |||
| 66 | int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | 85 | int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, |
| 67 | unsigned int inst, int *advance) | 86 | unsigned int inst, int *advance) |
| 68 | { | 87 | { |
| @@ -296,6 +315,8 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) | |||
| 296 | 315 | ||
| 297 | switch (sprn) { | 316 | switch (sprn) { |
| 298 | case SPRN_SDR1: | 317 | case SPRN_SDR1: |
| 318 | if (!spr_allowed(vcpu, PRIV_HYPER)) | ||
| 319 | goto unprivileged; | ||
| 299 | to_book3s(vcpu)->sdr1 = spr_val; | 320 | to_book3s(vcpu)->sdr1 = spr_val; |
| 300 | break; | 321 | break; |
| 301 | case SPRN_DSISR: | 322 | case SPRN_DSISR: |
| @@ -390,6 +411,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) | |||
| 390 | case SPRN_PMC4_GEKKO: | 411 | case SPRN_PMC4_GEKKO: |
| 391 | case SPRN_WPAR_GEKKO: | 412 | case SPRN_WPAR_GEKKO: |
| 392 | break; | 413 | break; |
| 414 | unprivileged: | ||
| 393 | default: | 415 | default: |
| 394 | printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn); | 416 | printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn); |
| 395 | #ifndef DEBUG_SPR | 417 | #ifndef DEBUG_SPR |
| @@ -421,6 +443,8 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) | |||
| 421 | break; | 443 | break; |
| 422 | } | 444 | } |
| 423 | case SPRN_SDR1: | 445 | case SPRN_SDR1: |
| 446 | if (!spr_allowed(vcpu, PRIV_HYPER)) | ||
| 447 | goto unprivileged; | ||
| 424 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1); | 448 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1); |
| 425 | break; | 449 | break; |
| 426 | case SPRN_DSISR: | 450 | case SPRN_DSISR: |
| @@ -449,6 +473,10 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) | |||
| 449 | case SPRN_HID5: | 473 | case SPRN_HID5: |
| 450 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]); | 474 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]); |
| 451 | break; | 475 | break; |
| 476 | case SPRN_CFAR: | ||
| 477 | case SPRN_PURR: | ||
| 478 | kvmppc_set_gpr(vcpu, rt, 0); | ||
| 479 | break; | ||
| 452 | case SPRN_GQR0: | 480 | case SPRN_GQR0: |
| 453 | case SPRN_GQR1: | 481 | case SPRN_GQR1: |
| 454 | case SPRN_GQR2: | 482 | case SPRN_GQR2: |
| @@ -476,6 +504,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) | |||
| 476 | kvmppc_set_gpr(vcpu, rt, 0); | 504 | kvmppc_set_gpr(vcpu, rt, 0); |
| 477 | break; | 505 | break; |
| 478 | default: | 506 | default: |
| 507 | unprivileged: | ||
| 479 | printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn); | 508 | printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn); |
| 480 | #ifndef DEBUG_SPR | 509 | #ifndef DEBUG_SPR |
| 481 | emulated = EMULATE_FAIL; | 510 | emulated = EMULATE_FAIL; |
diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c index 88c8f26add02..f7f63a00ab1f 100644 --- a/arch/powerpc/kvm/book3s_exports.c +++ b/arch/powerpc/kvm/book3s_exports.c | |||
| @@ -23,9 +23,7 @@ | |||
| 23 | #ifdef CONFIG_KVM_BOOK3S_64_HV | 23 | #ifdef CONFIG_KVM_BOOK3S_64_HV |
| 24 | EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline); | 24 | EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline); |
| 25 | #else | 25 | #else |
| 26 | EXPORT_SYMBOL_GPL(kvmppc_handler_trampoline_enter); | 26 | EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline); |
| 27 | EXPORT_SYMBOL_GPL(kvmppc_handler_lowmem_trampoline); | ||
| 28 | EXPORT_SYMBOL_GPL(kvmppc_rmcall); | ||
| 29 | EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu); | 27 | EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu); |
| 30 | #ifdef CONFIG_ALTIVEC | 28 | #ifdef CONFIG_ALTIVEC |
| 31 | EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec); | 29 | EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec); |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index cc0d7f1b19ab..4644c7986d80 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
| @@ -62,6 +62,8 @@ | |||
| 62 | /* #define EXIT_DEBUG_SIMPLE */ | 62 | /* #define EXIT_DEBUG_SIMPLE */ |
| 63 | /* #define EXIT_DEBUG_INT */ | 63 | /* #define EXIT_DEBUG_INT */ |
| 64 | 64 | ||
| 65 | static void kvmppc_end_cede(struct kvm_vcpu *vcpu); | ||
| 66 | |||
| 65 | void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 67 | void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
| 66 | { | 68 | { |
| 67 | local_paca->kvm_hstate.kvm_vcpu = vcpu; | 69 | local_paca->kvm_hstate.kvm_vcpu = vcpu; |
| @@ -72,40 +74,10 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) | |||
| 72 | { | 74 | { |
| 73 | } | 75 | } |
| 74 | 76 | ||
| 75 | static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu); | ||
| 76 | static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu); | ||
| 77 | |||
| 78 | void kvmppc_vcpu_block(struct kvm_vcpu *vcpu) | ||
| 79 | { | ||
| 80 | u64 now; | ||
| 81 | unsigned long dec_nsec; | ||
| 82 | |||
| 83 | now = get_tb(); | ||
| 84 | if (now >= vcpu->arch.dec_expires && !kvmppc_core_pending_dec(vcpu)) | ||
| 85 | kvmppc_core_queue_dec(vcpu); | ||
| 86 | if (vcpu->arch.pending_exceptions) | ||
| 87 | return; | ||
| 88 | if (vcpu->arch.dec_expires != ~(u64)0) { | ||
| 89 | dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC / | ||
| 90 | tb_ticks_per_sec; | ||
| 91 | hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec), | ||
| 92 | HRTIMER_MODE_REL); | ||
| 93 | } | ||
| 94 | |||
| 95 | kvmppc_vcpu_blocked(vcpu); | ||
| 96 | |||
| 97 | kvm_vcpu_block(vcpu); | ||
| 98 | vcpu->stat.halt_wakeup++; | ||
| 99 | |||
| 100 | if (vcpu->arch.dec_expires != ~(u64)0) | ||
| 101 | hrtimer_try_to_cancel(&vcpu->arch.dec_timer); | ||
| 102 | |||
| 103 | kvmppc_vcpu_unblocked(vcpu); | ||
| 104 | } | ||
| 105 | |||
| 106 | void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) | 77 | void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) |
| 107 | { | 78 | { |
| 108 | vcpu->arch.shregs.msr = msr; | 79 | vcpu->arch.shregs.msr = msr; |
| 80 | kvmppc_end_cede(vcpu); | ||
| 109 | } | 81 | } |
| 110 | 82 | ||
| 111 | void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) | 83 | void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) |
| @@ -257,15 +229,6 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | |||
| 257 | 229 | ||
| 258 | switch (req) { | 230 | switch (req) { |
| 259 | case H_CEDE: | 231 | case H_CEDE: |
| 260 | vcpu->arch.shregs.msr |= MSR_EE; | ||
| 261 | vcpu->arch.ceded = 1; | ||
| 262 | smp_mb(); | ||
| 263 | if (!vcpu->arch.prodded) | ||
| 264 | kvmppc_vcpu_block(vcpu); | ||
| 265 | else | ||
| 266 | vcpu->arch.prodded = 0; | ||
| 267 | smp_mb(); | ||
| 268 | vcpu->arch.ceded = 0; | ||
| 269 | break; | 232 | break; |
| 270 | case H_PROD: | 233 | case H_PROD: |
| 271 | target = kvmppc_get_gpr(vcpu, 4); | 234 | target = kvmppc_get_gpr(vcpu, 4); |
| @@ -388,20 +351,6 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 388 | break; | 351 | break; |
| 389 | } | 352 | } |
| 390 | 353 | ||
| 391 | |||
| 392 | if (!(r & RESUME_HOST)) { | ||
| 393 | /* To avoid clobbering exit_reason, only check for signals if | ||
| 394 | * we aren't already exiting to userspace for some other | ||
| 395 | * reason. */ | ||
| 396 | if (signal_pending(tsk)) { | ||
| 397 | vcpu->stat.signal_exits++; | ||
| 398 | run->exit_reason = KVM_EXIT_INTR; | ||
| 399 | r = -EINTR; | ||
| 400 | } else { | ||
| 401 | kvmppc_core_deliver_interrupts(vcpu); | ||
| 402 | } | ||
| 403 | } | ||
| 404 | |||
| 405 | return r; | 354 | return r; |
| 406 | } | 355 | } |
| 407 | 356 | ||
| @@ -479,13 +428,9 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | |||
| 479 | kvmppc_mmu_book3s_hv_init(vcpu); | 428 | kvmppc_mmu_book3s_hv_init(vcpu); |
| 480 | 429 | ||
| 481 | /* | 430 | /* |
| 482 | * Some vcpus may start out in stopped state. If we initialize | 431 | * We consider the vcpu stopped until we see the first run ioctl for it. |
| 483 | * them to busy-in-host state they will stop other vcpus in the | ||
| 484 | * vcore from running. Instead we initialize them to blocked | ||
| 485 | * state, effectively considering them to be stopped until we | ||
| 486 | * see the first run ioctl for them. | ||
| 487 | */ | 432 | */ |
| 488 | vcpu->arch.state = KVMPPC_VCPU_BLOCKED; | 433 | vcpu->arch.state = KVMPPC_VCPU_STOPPED; |
| 489 | 434 | ||
| 490 | init_waitqueue_head(&vcpu->arch.cpu_run); | 435 | init_waitqueue_head(&vcpu->arch.cpu_run); |
| 491 | 436 | ||
| @@ -496,6 +441,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | |||
| 496 | if (vcore) { | 441 | if (vcore) { |
| 497 | INIT_LIST_HEAD(&vcore->runnable_threads); | 442 | INIT_LIST_HEAD(&vcore->runnable_threads); |
| 498 | spin_lock_init(&vcore->lock); | 443 | spin_lock_init(&vcore->lock); |
| 444 | init_waitqueue_head(&vcore->wq); | ||
| 499 | } | 445 | } |
| 500 | kvm->arch.vcores[core] = vcore; | 446 | kvm->arch.vcores[core] = vcore; |
| 501 | } | 447 | } |
| @@ -506,10 +452,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | |||
| 506 | 452 | ||
| 507 | spin_lock(&vcore->lock); | 453 | spin_lock(&vcore->lock); |
| 508 | ++vcore->num_threads; | 454 | ++vcore->num_threads; |
| 509 | ++vcore->n_blocked; | ||
| 510 | spin_unlock(&vcore->lock); | 455 | spin_unlock(&vcore->lock); |
| 511 | vcpu->arch.vcore = vcore; | 456 | vcpu->arch.vcore = vcore; |
| 512 | 457 | ||
| 458 | vcpu->arch.cpu_type = KVM_CPU_3S_64; | ||
| 459 | kvmppc_sanity_check(vcpu); | ||
| 460 | |||
| 513 | return vcpu; | 461 | return vcpu; |
| 514 | 462 | ||
| 515 | free_vcpu: | 463 | free_vcpu: |
| @@ -524,30 +472,31 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) | |||
| 524 | kfree(vcpu); | 472 | kfree(vcpu); |
| 525 | } | 473 | } |
| 526 | 474 | ||
| 527 | static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu) | 475 | static void kvmppc_set_timer(struct kvm_vcpu *vcpu) |
| 528 | { | 476 | { |
| 529 | struct kvmppc_vcore *vc = vcpu->arch.vcore; | 477 | unsigned long dec_nsec, now; |
| 530 | 478 | ||
| 531 | spin_lock(&vc->lock); | 479 | now = get_tb(); |
| 532 | vcpu->arch.state = KVMPPC_VCPU_BLOCKED; | 480 | if (now > vcpu->arch.dec_expires) { |
| 533 | ++vc->n_blocked; | 481 | /* decrementer has already gone negative */ |
| 534 | if (vc->n_runnable > 0 && | 482 | kvmppc_core_queue_dec(vcpu); |
| 535 | vc->n_runnable + vc->n_blocked == vc->num_threads) { | 483 | kvmppc_core_deliver_interrupts(vcpu); |
| 536 | vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu, | 484 | return; |
| 537 | arch.run_list); | ||
| 538 | wake_up(&vcpu->arch.cpu_run); | ||
| 539 | } | 485 | } |
| 540 | spin_unlock(&vc->lock); | 486 | dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC |
| 487 | / tb_ticks_per_sec; | ||
| 488 | hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec), | ||
| 489 | HRTIMER_MODE_REL); | ||
| 490 | vcpu->arch.timer_running = 1; | ||
| 541 | } | 491 | } |
| 542 | 492 | ||
| 543 | static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu) | 493 | static void kvmppc_end_cede(struct kvm_vcpu *vcpu) |
| 544 | { | 494 | { |
| 545 | struct kvmppc_vcore *vc = vcpu->arch.vcore; | 495 | vcpu->arch.ceded = 0; |
| 546 | 496 | if (vcpu->arch.timer_running) { | |
| 547 | spin_lock(&vc->lock); | 497 | hrtimer_try_to_cancel(&vcpu->arch.dec_timer); |
| 548 | vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; | 498 | vcpu->arch.timer_running = 0; |
| 549 | --vc->n_blocked; | 499 | } |
| 550 | spin_unlock(&vc->lock); | ||
| 551 | } | 500 | } |
| 552 | 501 | ||
| 553 | extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); | 502 | extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); |
| @@ -562,6 +511,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, | |||
| 562 | return; | 511 | return; |
| 563 | vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; | 512 | vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; |
| 564 | --vc->n_runnable; | 513 | --vc->n_runnable; |
| 514 | ++vc->n_busy; | ||
| 565 | /* decrement the physical thread id of each following vcpu */ | 515 | /* decrement the physical thread id of each following vcpu */ |
| 566 | v = vcpu; | 516 | v = vcpu; |
| 567 | list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list) | 517 | list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list) |
| @@ -575,15 +525,20 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu) | |||
| 575 | struct paca_struct *tpaca; | 525 | struct paca_struct *tpaca; |
| 576 | struct kvmppc_vcore *vc = vcpu->arch.vcore; | 526 | struct kvmppc_vcore *vc = vcpu->arch.vcore; |
| 577 | 527 | ||
| 528 | if (vcpu->arch.timer_running) { | ||
| 529 | hrtimer_try_to_cancel(&vcpu->arch.dec_timer); | ||
| 530 | vcpu->arch.timer_running = 0; | ||
| 531 | } | ||
| 578 | cpu = vc->pcpu + vcpu->arch.ptid; | 532 | cpu = vc->pcpu + vcpu->arch.ptid; |
| 579 | tpaca = &paca[cpu]; | 533 | tpaca = &paca[cpu]; |
| 580 | tpaca->kvm_hstate.kvm_vcpu = vcpu; | 534 | tpaca->kvm_hstate.kvm_vcpu = vcpu; |
| 581 | tpaca->kvm_hstate.kvm_vcore = vc; | 535 | tpaca->kvm_hstate.kvm_vcore = vc; |
| 536 | tpaca->kvm_hstate.napping = 0; | ||
| 537 | vcpu->cpu = vc->pcpu; | ||
| 582 | smp_wmb(); | 538 | smp_wmb(); |
| 583 | #ifdef CONFIG_PPC_ICP_NATIVE | 539 | #ifdef CONFIG_PPC_ICP_NATIVE |
| 584 | if (vcpu->arch.ptid) { | 540 | if (vcpu->arch.ptid) { |
| 585 | tpaca->cpu_start = 0x80; | 541 | tpaca->cpu_start = 0x80; |
| 586 | tpaca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST; | ||
| 587 | wmb(); | 542 | wmb(); |
| 588 | xics_wake_cpu(cpu); | 543 | xics_wake_cpu(cpu); |
| 589 | ++vc->n_woken; | 544 | ++vc->n_woken; |
| @@ -631,9 +586,10 @@ static int on_primary_thread(void) | |||
| 631 | */ | 586 | */ |
| 632 | static int kvmppc_run_core(struct kvmppc_vcore *vc) | 587 | static int kvmppc_run_core(struct kvmppc_vcore *vc) |
| 633 | { | 588 | { |
| 634 | struct kvm_vcpu *vcpu, *vnext; | 589 | struct kvm_vcpu *vcpu, *vcpu0, *vnext; |
| 635 | long ret; | 590 | long ret; |
| 636 | u64 now; | 591 | u64 now; |
| 592 | int ptid; | ||
| 637 | 593 | ||
| 638 | /* don't start if any threads have a signal pending */ | 594 | /* don't start if any threads have a signal pending */ |
| 639 | list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) | 595 | list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) |
| @@ -652,29 +608,50 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) | |||
| 652 | goto out; | 608 | goto out; |
| 653 | } | 609 | } |
| 654 | 610 | ||
| 611 | /* | ||
| 612 | * Assign physical thread IDs, first to non-ceded vcpus | ||
| 613 | * and then to ceded ones. | ||
| 614 | */ | ||
| 615 | ptid = 0; | ||
| 616 | vcpu0 = NULL; | ||
| 617 | list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { | ||
| 618 | if (!vcpu->arch.ceded) { | ||
| 619 | if (!ptid) | ||
| 620 | vcpu0 = vcpu; | ||
| 621 | vcpu->arch.ptid = ptid++; | ||
| 622 | } | ||
| 623 | } | ||
| 624 | if (!vcpu0) | ||
| 625 | return 0; /* nothing to run */ | ||
| 626 | list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) | ||
| 627 | if (vcpu->arch.ceded) | ||
| 628 | vcpu->arch.ptid = ptid++; | ||
| 629 | |||
| 655 | vc->n_woken = 0; | 630 | vc->n_woken = 0; |
| 656 | vc->nap_count = 0; | 631 | vc->nap_count = 0; |
| 657 | vc->entry_exit_count = 0; | 632 | vc->entry_exit_count = 0; |
| 658 | vc->vcore_running = 1; | 633 | vc->vcore_state = VCORE_RUNNING; |
| 659 | vc->in_guest = 0; | 634 | vc->in_guest = 0; |
| 660 | vc->pcpu = smp_processor_id(); | 635 | vc->pcpu = smp_processor_id(); |
| 636 | vc->napping_threads = 0; | ||
| 661 | list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) | 637 | list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) |
| 662 | kvmppc_start_thread(vcpu); | 638 | kvmppc_start_thread(vcpu); |
| 663 | vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu, | ||
| 664 | arch.run_list); | ||
| 665 | 639 | ||
| 640 | preempt_disable(); | ||
| 666 | spin_unlock(&vc->lock); | 641 | spin_unlock(&vc->lock); |
| 667 | 642 | ||
| 668 | preempt_disable(); | ||
| 669 | kvm_guest_enter(); | 643 | kvm_guest_enter(); |
| 670 | __kvmppc_vcore_entry(NULL, vcpu); | 644 | __kvmppc_vcore_entry(NULL, vcpu0); |
| 671 | 645 | ||
| 672 | /* wait for secondary threads to finish writing their state to memory */ | ||
| 673 | spin_lock(&vc->lock); | 646 | spin_lock(&vc->lock); |
| 647 | /* disable sending of IPIs on virtual external irqs */ | ||
| 648 | list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) | ||
| 649 | vcpu->cpu = -1; | ||
| 650 | /* wait for secondary threads to finish writing their state to memory */ | ||
| 674 | if (vc->nap_count < vc->n_woken) | 651 | if (vc->nap_count < vc->n_woken) |
| 675 | kvmppc_wait_for_nap(vc); | 652 | kvmppc_wait_for_nap(vc); |
| 676 | /* prevent other vcpu threads from doing kvmppc_start_thread() now */ | 653 | /* prevent other vcpu threads from doing kvmppc_start_thread() now */ |
| 677 | vc->vcore_running = 2; | 654 | vc->vcore_state = VCORE_EXITING; |
| 678 | spin_unlock(&vc->lock); | 655 | spin_unlock(&vc->lock); |
| 679 | 656 | ||
| 680 | /* make sure updates to secondary vcpu structs are visible now */ | 657 | /* make sure updates to secondary vcpu structs are visible now */ |
| @@ -690,22 +667,26 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) | |||
| 690 | if (now < vcpu->arch.dec_expires && | 667 | if (now < vcpu->arch.dec_expires && |
| 691 | kvmppc_core_pending_dec(vcpu)) | 668 | kvmppc_core_pending_dec(vcpu)) |
| 692 | kvmppc_core_dequeue_dec(vcpu); | 669 | kvmppc_core_dequeue_dec(vcpu); |
| 693 | if (!vcpu->arch.trap) { | 670 | |
| 694 | if (signal_pending(vcpu->arch.run_task)) { | 671 | ret = RESUME_GUEST; |
| 695 | vcpu->arch.kvm_run->exit_reason = KVM_EXIT_INTR; | 672 | if (vcpu->arch.trap) |
| 696 | vcpu->arch.ret = -EINTR; | 673 | ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu, |
| 697 | } | 674 | vcpu->arch.run_task); |
| 698 | continue; /* didn't get to run */ | 675 | |
| 699 | } | ||
| 700 | ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu, | ||
| 701 | vcpu->arch.run_task); | ||
| 702 | vcpu->arch.ret = ret; | 676 | vcpu->arch.ret = ret; |
| 703 | vcpu->arch.trap = 0; | 677 | vcpu->arch.trap = 0; |
| 678 | |||
| 679 | if (vcpu->arch.ceded) { | ||
| 680 | if (ret != RESUME_GUEST) | ||
| 681 | kvmppc_end_cede(vcpu); | ||
| 682 | else | ||
| 683 | kvmppc_set_timer(vcpu); | ||
| 684 | } | ||
| 704 | } | 685 | } |
| 705 | 686 | ||
| 706 | spin_lock(&vc->lock); | 687 | spin_lock(&vc->lock); |
| 707 | out: | 688 | out: |
| 708 | vc->vcore_running = 0; | 689 | vc->vcore_state = VCORE_INACTIVE; |
| 709 | list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, | 690 | list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, |
| 710 | arch.run_list) { | 691 | arch.run_list) { |
| 711 | if (vcpu->arch.ret != RESUME_GUEST) { | 692 | if (vcpu->arch.ret != RESUME_GUEST) { |
| @@ -717,82 +698,130 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) | |||
| 717 | return 1; | 698 | return 1; |
| 718 | } | 699 | } |
| 719 | 700 | ||
| 720 | static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | 701 | /* |
| 702 | * Wait for some other vcpu thread to execute us, and | ||
| 703 | * wake us up when we need to handle something in the host. | ||
| 704 | */ | ||
| 705 | static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state) | ||
| 721 | { | 706 | { |
| 722 | int ptid; | ||
| 723 | int wait_state; | ||
| 724 | struct kvmppc_vcore *vc; | ||
| 725 | DEFINE_WAIT(wait); | 707 | DEFINE_WAIT(wait); |
| 726 | 708 | ||
| 727 | /* No need to go into the guest when all we do is going out */ | 709 | prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state); |
| 728 | if (signal_pending(current)) { | 710 | if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) |
| 729 | kvm_run->exit_reason = KVM_EXIT_INTR; | 711 | schedule(); |
| 730 | return -EINTR; | 712 | finish_wait(&vcpu->arch.cpu_run, &wait); |
| 713 | } | ||
| 714 | |||
| 715 | /* | ||
| 716 | * All the vcpus in this vcore are idle, so wait for a decrementer | ||
| 717 | * or external interrupt to one of the vcpus. vc->lock is held. | ||
| 718 | */ | ||
| 719 | static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) | ||
| 720 | { | ||
| 721 | DEFINE_WAIT(wait); | ||
| 722 | struct kvm_vcpu *v; | ||
| 723 | int all_idle = 1; | ||
| 724 | |||
| 725 | prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE); | ||
| 726 | vc->vcore_state = VCORE_SLEEPING; | ||
| 727 | spin_unlock(&vc->lock); | ||
| 728 | list_for_each_entry(v, &vc->runnable_threads, arch.run_list) { | ||
| 729 | if (!v->arch.ceded || v->arch.pending_exceptions) { | ||
| 730 | all_idle = 0; | ||
| 731 | break; | ||
| 732 | } | ||
| 731 | } | 733 | } |
| 734 | if (all_idle) | ||
| 735 | schedule(); | ||
| 736 | finish_wait(&vc->wq, &wait); | ||
| 737 | spin_lock(&vc->lock); | ||
| 738 | vc->vcore_state = VCORE_INACTIVE; | ||
| 739 | } | ||
| 732 | 740 | ||
| 733 | /* On PPC970, check that we have an RMA region */ | 741 | static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) |
| 734 | if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201)) | 742 | { |
| 735 | return -EPERM; | 743 | int n_ceded; |
| 744 | int prev_state; | ||
| 745 | struct kvmppc_vcore *vc; | ||
| 746 | struct kvm_vcpu *v, *vn; | ||
| 736 | 747 | ||
| 737 | kvm_run->exit_reason = 0; | 748 | kvm_run->exit_reason = 0; |
| 738 | vcpu->arch.ret = RESUME_GUEST; | 749 | vcpu->arch.ret = RESUME_GUEST; |
| 739 | vcpu->arch.trap = 0; | 750 | vcpu->arch.trap = 0; |
| 740 | 751 | ||
| 741 | flush_fp_to_thread(current); | ||
| 742 | flush_altivec_to_thread(current); | ||
| 743 | flush_vsx_to_thread(current); | ||
| 744 | |||
| 745 | /* | 752 | /* |
| 746 | * Synchronize with other threads in this virtual core | 753 | * Synchronize with other threads in this virtual core |
| 747 | */ | 754 | */ |
| 748 | vc = vcpu->arch.vcore; | 755 | vc = vcpu->arch.vcore; |
| 749 | spin_lock(&vc->lock); | 756 | spin_lock(&vc->lock); |
| 750 | /* This happens the first time this is called for a vcpu */ | 757 | vcpu->arch.ceded = 0; |
| 751 | if (vcpu->arch.state == KVMPPC_VCPU_BLOCKED) | ||
| 752 | --vc->n_blocked; | ||
| 753 | vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; | ||
| 754 | ptid = vc->n_runnable; | ||
| 755 | vcpu->arch.run_task = current; | 758 | vcpu->arch.run_task = current; |
| 756 | vcpu->arch.kvm_run = kvm_run; | 759 | vcpu->arch.kvm_run = kvm_run; |
| 757 | vcpu->arch.ptid = ptid; | 760 | prev_state = vcpu->arch.state; |
| 761 | vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; | ||
| 758 | list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads); | 762 | list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads); |
| 759 | ++vc->n_runnable; | 763 | ++vc->n_runnable; |
| 760 | 764 | ||
| 761 | wait_state = TASK_INTERRUPTIBLE; | 765 | /* |
| 762 | while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { | 766 | * This happens the first time this is called for a vcpu. |
| 763 | if (signal_pending(current)) { | 767 | * If the vcore is already running, we may be able to start |
| 764 | if (!vc->vcore_running) { | 768 | * this thread straight away and have it join in. |
| 765 | kvm_run->exit_reason = KVM_EXIT_INTR; | 769 | */ |
| 766 | vcpu->arch.ret = -EINTR; | 770 | if (prev_state == KVMPPC_VCPU_STOPPED) { |
| 767 | break; | 771 | if (vc->vcore_state == VCORE_RUNNING && |
| 768 | } | 772 | VCORE_EXIT_COUNT(vc) == 0) { |
| 769 | /* have to wait for vcore to stop executing guest */ | 773 | vcpu->arch.ptid = vc->n_runnable - 1; |
| 770 | wait_state = TASK_UNINTERRUPTIBLE; | 774 | kvmppc_start_thread(vcpu); |
| 771 | smp_send_reschedule(vc->pcpu); | ||
| 772 | } | 775 | } |
| 773 | 776 | ||
| 774 | if (!vc->vcore_running && | 777 | } else if (prev_state == KVMPPC_VCPU_BUSY_IN_HOST) |
| 775 | vc->n_runnable + vc->n_blocked == vc->num_threads) { | 778 | --vc->n_busy; |
| 776 | /* we can run now */ | ||
| 777 | if (kvmppc_run_core(vc)) | ||
| 778 | continue; | ||
| 779 | } | ||
| 780 | 779 | ||
| 781 | if (vc->vcore_running == 1 && VCORE_EXIT_COUNT(vc) == 0) | 780 | while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && |
| 782 | kvmppc_start_thread(vcpu); | 781 | !signal_pending(current)) { |
| 782 | if (vc->n_busy || vc->vcore_state != VCORE_INACTIVE) { | ||
| 783 | spin_unlock(&vc->lock); | ||
| 784 | kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE); | ||
| 785 | spin_lock(&vc->lock); | ||
| 786 | continue; | ||
| 787 | } | ||
| 788 | n_ceded = 0; | ||
| 789 | list_for_each_entry(v, &vc->runnable_threads, arch.run_list) | ||
| 790 | n_ceded += v->arch.ceded; | ||
| 791 | if (n_ceded == vc->n_runnable) | ||
| 792 | kvmppc_vcore_blocked(vc); | ||
| 793 | else | ||
| 794 | kvmppc_run_core(vc); | ||
| 795 | |||
| 796 | list_for_each_entry_safe(v, vn, &vc->runnable_threads, | ||
| 797 | arch.run_list) { | ||
| 798 | kvmppc_core_deliver_interrupts(v); | ||
| 799 | if (signal_pending(v->arch.run_task)) { | ||
| 800 | kvmppc_remove_runnable(vc, v); | ||
| 801 | v->stat.signal_exits++; | ||
| 802 | v->arch.kvm_run->exit_reason = KVM_EXIT_INTR; | ||
| 803 | v->arch.ret = -EINTR; | ||
| 804 | wake_up(&v->arch.cpu_run); | ||
| 805 | } | ||
| 806 | } | ||
| 807 | } | ||
| 783 | 808 | ||
| 784 | /* wait for other threads to come in, or wait for vcore */ | 809 | if (signal_pending(current)) { |
| 785 | prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state); | 810 | if (vc->vcore_state == VCORE_RUNNING || |
| 786 | spin_unlock(&vc->lock); | 811 | vc->vcore_state == VCORE_EXITING) { |
| 787 | schedule(); | 812 | spin_unlock(&vc->lock); |
| 788 | finish_wait(&vcpu->arch.cpu_run, &wait); | 813 | kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE); |
| 789 | spin_lock(&vc->lock); | 814 | spin_lock(&vc->lock); |
| 815 | } | ||
| 816 | if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { | ||
| 817 | kvmppc_remove_runnable(vc, vcpu); | ||
| 818 | vcpu->stat.signal_exits++; | ||
| 819 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
| 820 | vcpu->arch.ret = -EINTR; | ||
| 821 | } | ||
| 790 | } | 822 | } |
| 791 | 823 | ||
| 792 | if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) | ||
| 793 | kvmppc_remove_runnable(vc, vcpu); | ||
| 794 | spin_unlock(&vc->lock); | 824 | spin_unlock(&vc->lock); |
| 795 | |||
| 796 | return vcpu->arch.ret; | 825 | return vcpu->arch.ret; |
| 797 | } | 826 | } |
| 798 | 827 | ||
| @@ -800,6 +829,26 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
| 800 | { | 829 | { |
| 801 | int r; | 830 | int r; |
| 802 | 831 | ||
| 832 | if (!vcpu->arch.sane) { | ||
| 833 | run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
| 834 | return -EINVAL; | ||
| 835 | } | ||
| 836 | |||
| 837 | /* No need to go into the guest when all we'll do is come back out */ | ||
| 838 | if (signal_pending(current)) { | ||
| 839 | run->exit_reason = KVM_EXIT_INTR; | ||
| 840 | return -EINTR; | ||
| 841 | } | ||
| 842 | |||
| 843 | /* On PPC970, check that we have an RMA region */ | ||
| 844 | if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201)) | ||
| 845 | return -EPERM; | ||
| 846 | |||
| 847 | flush_fp_to_thread(current); | ||
| 848 | flush_altivec_to_thread(current); | ||
| 849 | flush_vsx_to_thread(current); | ||
| 850 | vcpu->arch.wqp = &vcpu->arch.vcore->wq; | ||
| 851 | |||
| 803 | do { | 852 | do { |
| 804 | r = kvmppc_run_vcpu(run, vcpu); | 853 | r = kvmppc_run_vcpu(run, vcpu); |
| 805 | 854 | ||
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index fcfe6b055558..bacb0cfa3602 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c | |||
| @@ -110,39 +110,6 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | |||
| 110 | return H_SUCCESS; | 110 | return H_SUCCESS; |
| 111 | } | 111 | } |
| 112 | 112 | ||
| 113 | static unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, | ||
| 114 | unsigned long pte_index) | ||
| 115 | { | ||
| 116 | unsigned long rb, va_low; | ||
| 117 | |||
| 118 | rb = (v & ~0x7fUL) << 16; /* AVA field */ | ||
| 119 | va_low = pte_index >> 3; | ||
| 120 | if (v & HPTE_V_SECONDARY) | ||
| 121 | va_low = ~va_low; | ||
| 122 | /* xor vsid from AVA */ | ||
| 123 | if (!(v & HPTE_V_1TB_SEG)) | ||
| 124 | va_low ^= v >> 12; | ||
| 125 | else | ||
| 126 | va_low ^= v >> 24; | ||
| 127 | va_low &= 0x7ff; | ||
| 128 | if (v & HPTE_V_LARGE) { | ||
| 129 | rb |= 1; /* L field */ | ||
| 130 | if (cpu_has_feature(CPU_FTR_ARCH_206) && | ||
| 131 | (r & 0xff000)) { | ||
| 132 | /* non-16MB large page, must be 64k */ | ||
| 133 | /* (masks depend on page size) */ | ||
| 134 | rb |= 0x1000; /* page encoding in LP field */ | ||
| 135 | rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */ | ||
| 136 | rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */ | ||
| 137 | } | ||
| 138 | } else { | ||
| 139 | /* 4kB page */ | ||
| 140 | rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */ | ||
| 141 | } | ||
| 142 | rb |= (v >> 54) & 0x300; /* B field */ | ||
| 143 | return rb; | ||
| 144 | } | ||
| 145 | |||
| 146 | #define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) | 113 | #define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) |
| 147 | 114 | ||
| 148 | static inline int try_lock_tlbie(unsigned int *lock) | 115 | static inline int try_lock_tlbie(unsigned int *lock) |
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index de2950135e6e..f422231d9235 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S | |||
| @@ -20,7 +20,10 @@ | |||
| 20 | #include <asm/ppc_asm.h> | 20 | #include <asm/ppc_asm.h> |
| 21 | #include <asm/kvm_asm.h> | 21 | #include <asm/kvm_asm.h> |
| 22 | #include <asm/reg.h> | 22 | #include <asm/reg.h> |
| 23 | #include <asm/mmu.h> | ||
| 23 | #include <asm/page.h> | 24 | #include <asm/page.h> |
| 25 | #include <asm/ptrace.h> | ||
| 26 | #include <asm/hvcall.h> | ||
| 24 | #include <asm/asm-offsets.h> | 27 | #include <asm/asm-offsets.h> |
| 25 | #include <asm/exception-64s.h> | 28 | #include <asm/exception-64s.h> |
| 26 | 29 | ||
| @@ -49,7 +52,7 @@ kvmppc_skip_Hinterrupt: | |||
| 49 | b . | 52 | b . |
| 50 | 53 | ||
| 51 | /* | 54 | /* |
| 52 | * Call kvmppc_handler_trampoline_enter in real mode. | 55 | * Call kvmppc_hv_entry in real mode. |
| 53 | * Must be called with interrupts hard-disabled. | 56 | * Must be called with interrupts hard-disabled. |
| 54 | * | 57 | * |
| 55 | * Input Registers: | 58 | * Input Registers: |
| @@ -89,6 +92,12 @@ _GLOBAL(kvmppc_hv_entry_trampoline) | |||
| 89 | kvm_start_guest: | 92 | kvm_start_guest: |
| 90 | ld r1,PACAEMERGSP(r13) | 93 | ld r1,PACAEMERGSP(r13) |
| 91 | subi r1,r1,STACK_FRAME_OVERHEAD | 94 | subi r1,r1,STACK_FRAME_OVERHEAD |
| 95 | ld r2,PACATOC(r13) | ||
| 96 | |||
| 97 | /* were we napping due to cede? */ | ||
| 98 | lbz r0,HSTATE_NAPPING(r13) | ||
| 99 | cmpwi r0,0 | ||
| 100 | bne kvm_end_cede | ||
| 92 | 101 | ||
| 93 | /* get vcpu pointer */ | 102 | /* get vcpu pointer */ |
| 94 | ld r4, HSTATE_KVM_VCPU(r13) | 103 | ld r4, HSTATE_KVM_VCPU(r13) |
| @@ -276,15 +285,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) | |||
| 276 | cmpwi r0,0 | 285 | cmpwi r0,0 |
| 277 | beq 20b | 286 | beq 20b |
| 278 | 287 | ||
| 279 | /* Set LPCR. Set the MER bit if there is a pending external irq. */ | 288 | /* Set LPCR and RMOR. */ |
| 280 | 10: ld r8,KVM_LPCR(r9) | 289 | 10: ld r8,KVM_LPCR(r9) |
| 281 | ld r0,VCPU_PENDING_EXC(r4) | 290 | mtspr SPRN_LPCR,r8 |
| 282 | li r7,(1 << BOOK3S_IRQPRIO_EXTERNAL) | ||
| 283 | oris r7,r7,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h | ||
| 284 | and. r0,r0,r7 | ||
| 285 | beq 11f | ||
| 286 | ori r8,r8,LPCR_MER | ||
| 287 | 11: mtspr SPRN_LPCR,r8 | ||
| 288 | ld r8,KVM_RMOR(r9) | 291 | ld r8,KVM_RMOR(r9) |
| 289 | mtspr SPRN_RMOR,r8 | 292 | mtspr SPRN_RMOR,r8 |
| 290 | isync | 293 | isync |
| @@ -448,19 +451,50 @@ toc_tlbie_lock: | |||
| 448 | mtctr r6 | 451 | mtctr r6 |
| 449 | mtxer r7 | 452 | mtxer r7 |
| 450 | 453 | ||
| 451 | /* Move SRR0 and SRR1 into the respective regs */ | 454 | kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */ |
| 452 | ld r6, VCPU_SRR0(r4) | 455 | ld r6, VCPU_SRR0(r4) |
| 453 | ld r7, VCPU_SRR1(r4) | 456 | ld r7, VCPU_SRR1(r4) |
| 454 | mtspr SPRN_SRR0, r6 | ||
| 455 | mtspr SPRN_SRR1, r7 | ||
| 456 | |||
| 457 | ld r10, VCPU_PC(r4) | 457 | ld r10, VCPU_PC(r4) |
| 458 | ld r11, VCPU_MSR(r4) /* r11 = vcpu->arch.msr & ~MSR_HV */ | ||
| 458 | 459 | ||
| 459 | ld r11, VCPU_MSR(r4) /* r10 = vcpu->arch.msr & ~MSR_HV */ | ||
| 460 | rldicl r11, r11, 63 - MSR_HV_LG, 1 | 460 | rldicl r11, r11, 63 - MSR_HV_LG, 1 |
| 461 | rotldi r11, r11, 1 + MSR_HV_LG | 461 | rotldi r11, r11, 1 + MSR_HV_LG |
| 462 | ori r11, r11, MSR_ME | 462 | ori r11, r11, MSR_ME |
| 463 | 463 | ||
| 464 | /* Check if we can deliver an external or decrementer interrupt now */ | ||
| 465 | ld r0,VCPU_PENDING_EXC(r4) | ||
| 466 | li r8,(1 << BOOK3S_IRQPRIO_EXTERNAL) | ||
| 467 | oris r8,r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h | ||
| 468 | and r0,r0,r8 | ||
| 469 | cmpdi cr1,r0,0 | ||
| 470 | andi. r0,r11,MSR_EE | ||
| 471 | beq cr1,11f | ||
| 472 | BEGIN_FTR_SECTION | ||
| 473 | mfspr r8,SPRN_LPCR | ||
| 474 | ori r8,r8,LPCR_MER | ||
| 475 | mtspr SPRN_LPCR,r8 | ||
| 476 | isync | ||
| 477 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) | ||
| 478 | beq 5f | ||
| 479 | li r0,BOOK3S_INTERRUPT_EXTERNAL | ||
| 480 | 12: mr r6,r10 | ||
| 481 | mr r10,r0 | ||
| 482 | mr r7,r11 | ||
| 483 | li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ | ||
| 484 | rotldi r11,r11,63 | ||
| 485 | b 5f | ||
| 486 | 11: beq 5f | ||
| 487 | mfspr r0,SPRN_DEC | ||
| 488 | cmpwi r0,0 | ||
| 489 | li r0,BOOK3S_INTERRUPT_DECREMENTER | ||
| 490 | blt 12b | ||
| 491 | |||
| 492 | /* Move SRR0 and SRR1 into the respective regs */ | ||
| 493 | 5: mtspr SPRN_SRR0, r6 | ||
| 494 | mtspr SPRN_SRR1, r7 | ||
| 495 | li r0,0 | ||
| 496 | stb r0,VCPU_CEDED(r4) /* cancel cede */ | ||
| 497 | |||
| 464 | fast_guest_return: | 498 | fast_guest_return: |
| 465 | mtspr SPRN_HSRR0,r10 | 499 | mtspr SPRN_HSRR0,r10 |
| 466 | mtspr SPRN_HSRR1,r11 | 500 | mtspr SPRN_HSRR1,r11 |
| @@ -574,21 +608,20 @@ kvmppc_interrupt: | |||
| 574 | /* See if this is something we can handle in real mode */ | 608 | /* See if this is something we can handle in real mode */ |
| 575 | cmpwi r12,BOOK3S_INTERRUPT_SYSCALL | 609 | cmpwi r12,BOOK3S_INTERRUPT_SYSCALL |
| 576 | beq hcall_try_real_mode | 610 | beq hcall_try_real_mode |
| 577 | hcall_real_cont: | ||
| 578 | 611 | ||
| 579 | /* Check for mediated interrupts (could be done earlier really ...) */ | 612 | /* Check for mediated interrupts (could be done earlier really ...) */ |
| 580 | BEGIN_FTR_SECTION | 613 | BEGIN_FTR_SECTION |
| 581 | cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL | 614 | cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL |
| 582 | bne+ 1f | 615 | bne+ 1f |
| 583 | ld r5,VCPU_KVM(r9) | ||
| 584 | ld r5,KVM_LPCR(r5) | ||
| 585 | andi. r0,r11,MSR_EE | 616 | andi. r0,r11,MSR_EE |
| 586 | beq 1f | 617 | beq 1f |
| 618 | mfspr r5,SPRN_LPCR | ||
| 587 | andi. r0,r5,LPCR_MER | 619 | andi. r0,r5,LPCR_MER |
| 588 | bne bounce_ext_interrupt | 620 | bne bounce_ext_interrupt |
| 589 | 1: | 621 | 1: |
| 590 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) | 622 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) |
| 591 | 623 | ||
| 624 | hcall_real_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ | ||
| 592 | /* Save DEC */ | 625 | /* Save DEC */ |
| 593 | mfspr r5,SPRN_DEC | 626 | mfspr r5,SPRN_DEC |
| 594 | mftb r6 | 627 | mftb r6 |
| @@ -682,7 +715,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201) | |||
| 682 | slbia | 715 | slbia |
| 683 | ptesync | 716 | ptesync |
| 684 | 717 | ||
| 685 | hdec_soon: | 718 | hdec_soon: /* r9 = vcpu, r12 = trap, r13 = paca */ |
| 686 | BEGIN_FTR_SECTION | 719 | BEGIN_FTR_SECTION |
| 687 | b 32f | 720 | b 32f |
| 688 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) | 721 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) |
| @@ -700,6 +733,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) | |||
| 700 | addi r0,r3,0x100 | 733 | addi r0,r3,0x100 |
| 701 | stwcx. r0,0,r6 | 734 | stwcx. r0,0,r6 |
| 702 | bne 41b | 735 | bne 41b |
| 736 | lwsync | ||
| 703 | 737 | ||
| 704 | /* | 738 | /* |
| 705 | * At this point we have an interrupt that we have to pass | 739 | * At this point we have an interrupt that we have to pass |
| @@ -713,18 +747,39 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) | |||
| 713 | * interrupt, since the other threads will already be on their | 747 | * interrupt, since the other threads will already be on their |
| 714 | * way here in that case. | 748 | * way here in that case. |
| 715 | */ | 749 | */ |
| 750 | cmpwi r3,0x100 /* Are we the first here? */ | ||
| 751 | bge 43f | ||
| 752 | cmpwi r3,1 /* Are any other threads in the guest? */ | ||
| 753 | ble 43f | ||
| 716 | cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER | 754 | cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER |
| 717 | beq 40f | 755 | beq 40f |
| 718 | cmpwi r3,0x100 /* Are we the first here? */ | ||
| 719 | bge 40f | ||
| 720 | cmpwi r3,1 | ||
| 721 | ble 40f | ||
| 722 | li r0,0 | 756 | li r0,0 |
| 723 | mtspr SPRN_HDEC,r0 | 757 | mtspr SPRN_HDEC,r0 |
| 724 | 40: | 758 | 40: |
| 759 | /* | ||
| 760 | * Send an IPI to any napping threads, since an HDEC interrupt | ||
| 761 | * doesn't wake CPUs up from nap. | ||
| 762 | */ | ||
| 763 | lwz r3,VCORE_NAPPING_THREADS(r5) | ||
| 764 | lwz r4,VCPU_PTID(r9) | ||
| 765 | li r0,1 | ||
| 766 | sldi r0,r0,r4 | ||
| 767 | andc. r3,r3,r0 /* no sense IPI'ing ourselves */ | ||
| 768 | beq 43f | ||
| 769 | mulli r4,r4,PACA_SIZE /* get paca for thread 0 */ | ||
| 770 | subf r6,r4,r13 | ||
| 771 | 42: andi. r0,r3,1 | ||
| 772 | beq 44f | ||
| 773 | ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */ | ||
| 774 | li r0,IPI_PRIORITY | ||
| 775 | li r7,XICS_QIRR | ||
| 776 | stbcix r0,r7,r8 /* trigger the IPI */ | ||
| 777 | 44: srdi. r3,r3,1 | ||
| 778 | addi r6,r6,PACA_SIZE | ||
| 779 | bne 42b | ||
| 725 | 780 | ||
| 726 | /* Secondary threads wait for primary to do partition switch */ | 781 | /* Secondary threads wait for primary to do partition switch */ |
| 727 | ld r4,VCPU_KVM(r9) /* pointer to struct kvm */ | 782 | 43: ld r4,VCPU_KVM(r9) /* pointer to struct kvm */ |
| 728 | ld r5,HSTATE_KVM_VCORE(r13) | 783 | ld r5,HSTATE_KVM_VCORE(r13) |
| 729 | lwz r3,VCPU_PTID(r9) | 784 | lwz r3,VCPU_PTID(r9) |
| 730 | cmpwi r3,0 | 785 | cmpwi r3,0 |
| @@ -1077,7 +1132,6 @@ hcall_try_real_mode: | |||
| 1077 | hcall_real_fallback: | 1132 | hcall_real_fallback: |
| 1078 | li r12,BOOK3S_INTERRUPT_SYSCALL | 1133 | li r12,BOOK3S_INTERRUPT_SYSCALL |
| 1079 | ld r9, HSTATE_KVM_VCPU(r13) | 1134 | ld r9, HSTATE_KVM_VCPU(r13) |
| 1080 | ld r11, VCPU_MSR(r9) | ||
| 1081 | 1135 | ||
| 1082 | b hcall_real_cont | 1136 | b hcall_real_cont |
| 1083 | 1137 | ||
| @@ -1139,7 +1193,7 @@ hcall_real_table: | |||
| 1139 | .long 0 /* 0xd4 */ | 1193 | .long 0 /* 0xd4 */ |
| 1140 | .long 0 /* 0xd8 */ | 1194 | .long 0 /* 0xd8 */ |
| 1141 | .long 0 /* 0xdc */ | 1195 | .long 0 /* 0xdc */ |
| 1142 | .long 0 /* 0xe0 */ | 1196 | .long .kvmppc_h_cede - hcall_real_table |
| 1143 | .long 0 /* 0xe4 */ | 1197 | .long 0 /* 0xe4 */ |
| 1144 | .long 0 /* 0xe8 */ | 1198 | .long 0 /* 0xe8 */ |
| 1145 | .long 0 /* 0xec */ | 1199 | .long 0 /* 0xec */ |
| @@ -1168,7 +1222,8 @@ bounce_ext_interrupt: | |||
| 1168 | mtspr SPRN_SRR0,r10 | 1222 | mtspr SPRN_SRR0,r10 |
| 1169 | mtspr SPRN_SRR1,r11 | 1223 | mtspr SPRN_SRR1,r11 |
| 1170 | li r10,BOOK3S_INTERRUPT_EXTERNAL | 1224 | li r10,BOOK3S_INTERRUPT_EXTERNAL |
| 1171 | LOAD_REG_IMMEDIATE(r11,MSR_SF | MSR_ME); | 1225 | li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ |
| 1226 | rotldi r11,r11,63 | ||
| 1172 | b fast_guest_return | 1227 | b fast_guest_return |
| 1173 | 1228 | ||
| 1174 | _GLOBAL(kvmppc_h_set_dabr) | 1229 | _GLOBAL(kvmppc_h_set_dabr) |
| @@ -1177,6 +1232,178 @@ _GLOBAL(kvmppc_h_set_dabr) | |||
| 1177 | li r3,0 | 1232 | li r3,0 |
| 1178 | blr | 1233 | blr |
| 1179 | 1234 | ||
| 1235 | _GLOBAL(kvmppc_h_cede) | ||
| 1236 | ori r11,r11,MSR_EE | ||
| 1237 | std r11,VCPU_MSR(r3) | ||
| 1238 | li r0,1 | ||
| 1239 | stb r0,VCPU_CEDED(r3) | ||
| 1240 | sync /* order setting ceded vs. testing prodded */ | ||
| 1241 | lbz r5,VCPU_PRODDED(r3) | ||
| 1242 | cmpwi r5,0 | ||
| 1243 | bne 1f | ||
| 1244 | li r0,0 /* set trap to 0 to say hcall is handled */ | ||
| 1245 | stw r0,VCPU_TRAP(r3) | ||
| 1246 | li r0,H_SUCCESS | ||
| 1247 | std r0,VCPU_GPR(r3)(r3) | ||
| 1248 | BEGIN_FTR_SECTION | ||
| 1249 | b 2f /* just send it up to host on 970 */ | ||
| 1250 | END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) | ||
| 1251 | |||
| 1252 | /* | ||
| 1253 | * Set our bit in the bitmask of napping threads unless all the | ||
| 1254 | * other threads are already napping, in which case we send this | ||
| 1255 | * up to the host. | ||
| 1256 | */ | ||
| 1257 | ld r5,HSTATE_KVM_VCORE(r13) | ||
| 1258 | lwz r6,VCPU_PTID(r3) | ||
| 1259 | lwz r8,VCORE_ENTRY_EXIT(r5) | ||
| 1260 | clrldi r8,r8,56 | ||
| 1261 | li r0,1 | ||
| 1262 | sld r0,r0,r6 | ||
| 1263 | addi r6,r5,VCORE_NAPPING_THREADS | ||
| 1264 | 31: lwarx r4,0,r6 | ||
| 1265 | or r4,r4,r0 | ||
| 1266 | popcntw r7,r4 | ||
| 1267 | cmpw r7,r8 | ||
| 1268 | bge 2f | ||
| 1269 | stwcx. r4,0,r6 | ||
| 1270 | bne 31b | ||
| 1271 | li r0,1 | ||
| 1272 | stb r0,HSTATE_NAPPING(r13) | ||
| 1273 | /* order napping_threads update vs testing entry_exit_count */ | ||
| 1274 | lwsync | ||
| 1275 | mr r4,r3 | ||
| 1276 | lwz r7,VCORE_ENTRY_EXIT(r5) | ||
| 1277 | cmpwi r7,0x100 | ||
| 1278 | bge 33f /* another thread already exiting */ | ||
| 1279 | |||
| 1280 | /* | ||
| 1281 | * Although not specifically required by the architecture, POWER7 | ||
| 1282 | * preserves the following registers in nap mode, even if an SMT mode | ||
| 1283 | * switch occurs: SLB entries, PURR, SPURR, AMOR, UAMOR, AMR, SPRG0-3, | ||
| 1284 | * DAR, DSISR, DABR, DABRX, DSCR, PMCx, MMCRx, SIAR, SDAR. | ||
| 1285 | */ | ||
| 1286 | /* Save non-volatile GPRs */ | ||
| 1287 | std r14, VCPU_GPR(r14)(r3) | ||
| 1288 | std r15, VCPU_GPR(r15)(r3) | ||
| 1289 | std r16, VCPU_GPR(r16)(r3) | ||
| 1290 | std r17, VCPU_GPR(r17)(r3) | ||
| 1291 | std r18, VCPU_GPR(r18)(r3) | ||
| 1292 | std r19, VCPU_GPR(r19)(r3) | ||
| 1293 | std r20, VCPU_GPR(r20)(r3) | ||
| 1294 | std r21, VCPU_GPR(r21)(r3) | ||
| 1295 | std r22, VCPU_GPR(r22)(r3) | ||
| 1296 | std r23, VCPU_GPR(r23)(r3) | ||
| 1297 | std r24, VCPU_GPR(r24)(r3) | ||
| 1298 | std r25, VCPU_GPR(r25)(r3) | ||
| 1299 | std r26, VCPU_GPR(r26)(r3) | ||
| 1300 | std r27, VCPU_GPR(r27)(r3) | ||
| 1301 | std r28, VCPU_GPR(r28)(r3) | ||
| 1302 | std r29, VCPU_GPR(r29)(r3) | ||
| 1303 | std r30, VCPU_GPR(r30)(r3) | ||
| 1304 | std r31, VCPU_GPR(r31)(r3) | ||
| 1305 | |||
| 1306 | /* save FP state */ | ||
| 1307 | bl .kvmppc_save_fp | ||
| 1308 | |||
| 1309 | /* | ||
| 1310 | * Take a nap until a decrementer or external interrupt occurs, | ||
| 1311 | * with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR | ||
| 1312 | */ | ||
| 1313 | li r0,0x80 | ||
| 1314 | stb r0,PACAPROCSTART(r13) | ||
| 1315 | mfspr r5,SPRN_LPCR | ||
| 1316 | ori r5,r5,LPCR_PECE0 | LPCR_PECE1 | ||
| 1317 | mtspr SPRN_LPCR,r5 | ||
| 1318 | isync | ||
| 1319 | li r0, 0 | ||
| 1320 | std r0, HSTATE_SCRATCH0(r13) | ||
| 1321 | ptesync | ||
| 1322 | ld r0, HSTATE_SCRATCH0(r13) | ||
| 1323 | 1: cmpd r0, r0 | ||
| 1324 | bne 1b | ||
| 1325 | nap | ||
| 1326 | b . | ||
| 1327 | |||
| 1328 | kvm_end_cede: | ||
| 1329 | /* Woken by external or decrementer interrupt */ | ||
| 1330 | ld r1, HSTATE_HOST_R1(r13) | ||
| 1331 | ld r2, PACATOC(r13) | ||
| 1332 | |||
| 1333 | /* If we're a secondary thread and we got here by an IPI, ack it */ | ||
| 1334 | ld r4,HSTATE_KVM_VCPU(r13) | ||
| 1335 | lwz r3,VCPU_PTID(r4) | ||
| 1336 | cmpwi r3,0 | ||
| 1337 | beq 27f | ||
| 1338 | mfspr r3,SPRN_SRR1 | ||
| 1339 | rlwinm r3,r3,44-31,0x7 /* extract wake reason field */ | ||
| 1340 | cmpwi r3,4 /* was it an external interrupt? */ | ||
| 1341 | bne 27f | ||
| 1342 | ld r5, HSTATE_XICS_PHYS(r13) | ||
| 1343 | li r0,0xff | ||
| 1344 | li r6,XICS_QIRR | ||
| 1345 | li r7,XICS_XIRR | ||
| 1346 | lwzcix r8,r5,r7 /* ack the interrupt */ | ||
| 1347 | sync | ||
| 1348 | stbcix r0,r5,r6 /* clear it */ | ||
| 1349 | stwcix r8,r5,r7 /* EOI it */ | ||
| 1350 | 27: | ||
| 1351 | /* load up FP state */ | ||
| 1352 | bl kvmppc_load_fp | ||
| 1353 | |||
| 1354 | /* Load NV GPRS */ | ||
| 1355 | ld r14, VCPU_GPR(r14)(r4) | ||
| 1356 | ld r15, VCPU_GPR(r15)(r4) | ||
| 1357 | ld r16, VCPU_GPR(r16)(r4) | ||
| 1358 | ld r17, VCPU_GPR(r17)(r4) | ||
| 1359 | ld r18, VCPU_GPR(r18)(r4) | ||
| 1360 | ld r19, VCPU_GPR(r19)(r4) | ||
| 1361 | ld r20, VCPU_GPR(r20)(r4) | ||
| 1362 | ld r21, VCPU_GPR(r21)(r4) | ||
| 1363 | ld r22, VCPU_GPR(r22)(r4) | ||
| 1364 | ld r23, VCPU_GPR(r23)(r4) | ||
| 1365 | ld r24, VCPU_GPR(r24)(r4) | ||
| 1366 | ld r25, VCPU_GPR(r25)(r4) | ||
| 1367 | ld r26, VCPU_GPR(r26)(r4) | ||
| 1368 | ld r27, VCPU_GPR(r27)(r4) | ||
| 1369 | ld r28, VCPU_GPR(r28)(r4) | ||
| 1370 | ld r29, VCPU_GPR(r29)(r4) | ||
| 1371 | ld r30, VCPU_GPR(r30)(r4) | ||
| 1372 | ld r31, VCPU_GPR(r31)(r4) | ||
| 1373 | |||
| 1374 | /* clear our bit in vcore->napping_threads */ | ||
| 1375 | 33: ld r5,HSTATE_KVM_VCORE(r13) | ||
| 1376 | lwz r3,VCPU_PTID(r4) | ||
| 1377 | li r0,1 | ||
| 1378 | sld r0,r0,r3 | ||
| 1379 | addi r6,r5,VCORE_NAPPING_THREADS | ||
| 1380 | 32: lwarx r7,0,r6 | ||
| 1381 | andc r7,r7,r0 | ||
| 1382 | stwcx. r7,0,r6 | ||
| 1383 | bne 32b | ||
| 1384 | li r0,0 | ||
| 1385 | stb r0,HSTATE_NAPPING(r13) | ||
| 1386 | |||
| 1387 | /* see if any other thread is already exiting */ | ||
| 1388 | lwz r0,VCORE_ENTRY_EXIT(r5) | ||
| 1389 | cmpwi r0,0x100 | ||
| 1390 | blt kvmppc_cede_reentry /* if not go back to guest */ | ||
| 1391 | |||
| 1392 | /* some threads are exiting, so go to the guest exit path */ | ||
| 1393 | b hcall_real_fallback | ||
| 1394 | |||
| 1395 | /* cede when already previously prodded case */ | ||
| 1396 | 1: li r0,0 | ||
| 1397 | stb r0,VCPU_PRODDED(r3) | ||
| 1398 | sync /* order testing prodded vs. clearing ceded */ | ||
| 1399 | stb r0,VCPU_CEDED(r3) | ||
| 1400 | li r3,H_SUCCESS | ||
| 1401 | blr | ||
| 1402 | |||
| 1403 | /* we've ceded but we want to give control to the host */ | ||
| 1404 | 2: li r3,H_TOO_HARD | ||
| 1405 | blr | ||
| 1406 | |||
| 1180 | secondary_too_late: | 1407 | secondary_too_late: |
| 1181 | ld r5,HSTATE_KVM_VCORE(r13) | 1408 | ld r5,HSTATE_KVM_VCORE(r13) |
| 1182 | HMT_LOW | 1409 | HMT_LOW |
| @@ -1194,14 +1421,20 @@ secondary_too_late: | |||
| 1194 | slbmte r6,r5 | 1421 | slbmte r6,r5 |
| 1195 | 1: addi r11,r11,16 | 1422 | 1: addi r11,r11,16 |
| 1196 | .endr | 1423 | .endr |
| 1197 | b 50f | ||
| 1198 | 1424 | ||
| 1199 | secondary_nap: | 1425 | secondary_nap: |
| 1200 | /* Clear any pending IPI */ | 1426 | /* Clear any pending IPI - assume we're a secondary thread */ |
| 1201 | 50: ld r5, HSTATE_XICS_PHYS(r13) | 1427 | ld r5, HSTATE_XICS_PHYS(r13) |
| 1428 | li r7, XICS_XIRR | ||
| 1429 | lwzcix r3, r5, r7 /* ack any pending interrupt */ | ||
| 1430 | rlwinm. r0, r3, 0, 0xffffff /* any pending? */ | ||
| 1431 | beq 37f | ||
| 1432 | sync | ||
| 1202 | li r0, 0xff | 1433 | li r0, 0xff |
| 1203 | li r6, XICS_QIRR | 1434 | li r6, XICS_QIRR |
| 1204 | stbcix r0, r5, r6 | 1435 | stbcix r0, r5, r6 /* clear the IPI */ |
| 1436 | stwcix r3, r5, r7 /* EOI it */ | ||
| 1437 | 37: sync | ||
| 1205 | 1438 | ||
| 1206 | /* increment the nap count and then go to nap mode */ | 1439 | /* increment the nap count and then go to nap mode */ |
| 1207 | ld r4, HSTATE_KVM_VCORE(r13) | 1440 | ld r4, HSTATE_KVM_VCORE(r13) |
| @@ -1211,13 +1444,12 @@ secondary_nap: | |||
| 1211 | addi r3, r3, 1 | 1444 | addi r3, r3, 1 |
| 1212 | stwcx. r3, 0, r4 | 1445 | stwcx. r3, 0, r4 |
| 1213 | bne 51b | 1446 | bne 51b |
| 1214 | isync | ||
| 1215 | 1447 | ||
| 1448 | li r3, LPCR_PECE0 | ||
| 1216 | mfspr r4, SPRN_LPCR | 1449 | mfspr r4, SPRN_LPCR |
| 1217 | li r0, LPCR_PECE | 1450 | rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 |
| 1218 | andc r4, r4, r0 | ||
| 1219 | ori r4, r4, LPCR_PECE0 /* exit nap on interrupt */ | ||
| 1220 | mtspr SPRN_LPCR, r4 | 1451 | mtspr SPRN_LPCR, r4 |
| 1452 | isync | ||
| 1221 | li r0, 0 | 1453 | li r0, 0 |
| 1222 | std r0, HSTATE_SCRATCH0(r13) | 1454 | std r0, HSTATE_SCRATCH0(r13) |
| 1223 | ptesync | 1455 | ptesync |
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index c54b0e30cf3f..0a8515a5c042 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S | |||
| @@ -29,27 +29,11 @@ | |||
| 29 | #define ULONG_SIZE 8 | 29 | #define ULONG_SIZE 8 |
| 30 | #define FUNC(name) GLUE(.,name) | 30 | #define FUNC(name) GLUE(.,name) |
| 31 | 31 | ||
| 32 | #define GET_SHADOW_VCPU_R13 | ||
| 33 | |||
| 34 | #define DISABLE_INTERRUPTS \ | ||
| 35 | mfmsr r0; \ | ||
| 36 | rldicl r0,r0,48,1; \ | ||
| 37 | rotldi r0,r0,16; \ | ||
| 38 | mtmsrd r0,1; \ | ||
| 39 | |||
| 40 | #elif defined(CONFIG_PPC_BOOK3S_32) | 32 | #elif defined(CONFIG_PPC_BOOK3S_32) |
| 41 | 33 | ||
| 42 | #define ULONG_SIZE 4 | 34 | #define ULONG_SIZE 4 |
| 43 | #define FUNC(name) name | 35 | #define FUNC(name) name |
| 44 | 36 | ||
| 45 | #define GET_SHADOW_VCPU_R13 \ | ||
| 46 | lwz r13, (THREAD + THREAD_KVM_SVCPU)(r2) | ||
| 47 | |||
| 48 | #define DISABLE_INTERRUPTS \ | ||
| 49 | mfmsr r0; \ | ||
| 50 | rlwinm r0,r0,0,17,15; \ | ||
| 51 | mtmsr r0; \ | ||
| 52 | |||
| 53 | #endif /* CONFIG_PPC_BOOK3S_XX */ | 37 | #endif /* CONFIG_PPC_BOOK3S_XX */ |
| 54 | 38 | ||
| 55 | 39 | ||
| @@ -108,44 +92,17 @@ kvm_start_entry: | |||
| 108 | 92 | ||
| 109 | kvm_start_lightweight: | 93 | kvm_start_lightweight: |
| 110 | 94 | ||
| 111 | GET_SHADOW_VCPU_R13 | ||
| 112 | PPC_LL r3, VCPU_HIGHMEM_HANDLER(r4) | ||
| 113 | PPC_STL r3, HSTATE_VMHANDLER(r13) | ||
| 114 | |||
| 115 | PPC_LL r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */ | ||
| 116 | |||
| 117 | DISABLE_INTERRUPTS | ||
| 118 | |||
| 119 | #ifdef CONFIG_PPC_BOOK3S_64 | 95 | #ifdef CONFIG_PPC_BOOK3S_64 |
| 120 | /* Some guests may need to have dcbz set to 32 byte length. | ||
| 121 | * | ||
| 122 | * Usually we ensure that by patching the guest's instructions | ||
| 123 | * to trap on dcbz and emulate it in the hypervisor. | ||
| 124 | * | ||
| 125 | * If we can, we should tell the CPU to use 32 byte dcbz though, | ||
| 126 | * because that's a lot faster. | ||
| 127 | */ | ||
| 128 | |||
| 129 | PPC_LL r3, VCPU_HFLAGS(r4) | 96 | PPC_LL r3, VCPU_HFLAGS(r4) |
| 130 | rldicl. r3, r3, 0, 63 /* CR = ((r3 & 1) == 0) */ | 97 | rldicl r3, r3, 0, 63 /* r3 &= 1 */ |
| 131 | beq no_dcbz32_on | 98 | stb r3, HSTATE_RESTORE_HID5(r13) |
| 132 | |||
| 133 | mfspr r3,SPRN_HID5 | ||
| 134 | ori r3, r3, 0x80 /* XXX HID5_dcbz32 = 0x80 */ | ||
| 135 | mtspr SPRN_HID5,r3 | ||
| 136 | |||
| 137 | no_dcbz32_on: | ||
| 138 | |||
| 139 | #endif /* CONFIG_PPC_BOOK3S_64 */ | 99 | #endif /* CONFIG_PPC_BOOK3S_64 */ |
| 140 | 100 | ||
| 141 | PPC_LL r6, VCPU_RMCALL(r4) | 101 | PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */ |
| 142 | mtctr r6 | ||
| 143 | |||
| 144 | PPC_LL r3, VCPU_TRAMPOLINE_ENTER(r4) | ||
| 145 | LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR)) | ||
| 146 | 102 | ||
| 147 | /* Jump to segment patching handler and into our guest */ | 103 | /* Jump to segment patching handler and into our guest */ |
| 148 | bctr | 104 | bl FUNC(kvmppc_entry_trampoline) |
| 105 | nop | ||
| 149 | 106 | ||
| 150 | /* | 107 | /* |
| 151 | * This is the handler in module memory. It gets jumped at from the | 108 | * This is the handler in module memory. It gets jumped at from the |
| @@ -170,21 +127,6 @@ kvmppc_handler_highmem: | |||
| 170 | /* R7 = vcpu */ | 127 | /* R7 = vcpu */ |
| 171 | PPC_LL r7, GPR4(r1) | 128 | PPC_LL r7, GPR4(r1) |
| 172 | 129 | ||
| 173 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
| 174 | |||
| 175 | PPC_LL r5, VCPU_HFLAGS(r7) | ||
| 176 | rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */ | ||
| 177 | beq no_dcbz32_off | ||
| 178 | |||
| 179 | li r4, 0 | ||
| 180 | mfspr r5,SPRN_HID5 | ||
| 181 | rldimi r5,r4,6,56 | ||
| 182 | mtspr SPRN_HID5,r5 | ||
| 183 | |||
| 184 | no_dcbz32_off: | ||
| 185 | |||
| 186 | #endif /* CONFIG_PPC_BOOK3S_64 */ | ||
| 187 | |||
| 188 | PPC_STL r14, VCPU_GPR(r14)(r7) | 130 | PPC_STL r14, VCPU_GPR(r14)(r7) |
| 189 | PPC_STL r15, VCPU_GPR(r15)(r7) | 131 | PPC_STL r15, VCPU_GPR(r15)(r7) |
| 190 | PPC_STL r16, VCPU_GPR(r16)(r7) | 132 | PPC_STL r16, VCPU_GPR(r16)(r7) |
| @@ -204,67 +146,6 @@ no_dcbz32_off: | |||
| 204 | PPC_STL r30, VCPU_GPR(r30)(r7) | 146 | PPC_STL r30, VCPU_GPR(r30)(r7) |
| 205 | PPC_STL r31, VCPU_GPR(r31)(r7) | 147 | PPC_STL r31, VCPU_GPR(r31)(r7) |
| 206 | 148 | ||
| 207 | /* Restore host msr -> SRR1 */ | ||
| 208 | PPC_LL r6, VCPU_HOST_MSR(r7) | ||
| 209 | |||
| 210 | /* | ||
| 211 | * For some interrupts, we need to call the real Linux | ||
| 212 | * handler, so it can do work for us. This has to happen | ||
| 213 | * as if the interrupt arrived from the kernel though, | ||
| 214 | * so let's fake it here where most state is restored. | ||
| 215 | * | ||
| 216 | * Call Linux for hardware interrupts/decrementer | ||
| 217 | * r3 = address of interrupt handler (exit reason) | ||
| 218 | */ | ||
| 219 | |||
| 220 | cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL | ||
| 221 | beq call_linux_handler | ||
| 222 | cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER | ||
| 223 | beq call_linux_handler | ||
| 224 | cmpwi r12, BOOK3S_INTERRUPT_PERFMON | ||
| 225 | beq call_linux_handler | ||
| 226 | |||
| 227 | /* Back to EE=1 */ | ||
| 228 | mtmsr r6 | ||
| 229 | sync | ||
| 230 | b kvm_return_point | ||
| 231 | |||
| 232 | call_linux_handler: | ||
| 233 | |||
| 234 | /* | ||
| 235 | * If we land here we need to jump back to the handler we | ||
| 236 | * came from. | ||
| 237 | * | ||
| 238 | * We have a page that we can access from real mode, so let's | ||
| 239 | * jump back to that and use it as a trampoline to get back into the | ||
| 240 | * interrupt handler! | ||
| 241 | * | ||
| 242 | * R3 still contains the exit code, | ||
| 243 | * R5 VCPU_HOST_RETIP and | ||
| 244 | * R6 VCPU_HOST_MSR | ||
| 245 | */ | ||
| 246 | |||
| 247 | /* Restore host IP -> SRR0 */ | ||
| 248 | PPC_LL r5, VCPU_HOST_RETIP(r7) | ||
| 249 | |||
| 250 | /* XXX Better move to a safe function? | ||
| 251 | * What if we get an HTAB flush in between mtsrr0 and mtsrr1? */ | ||
| 252 | |||
| 253 | mtlr r12 | ||
| 254 | |||
| 255 | PPC_LL r4, VCPU_TRAMPOLINE_LOWMEM(r7) | ||
| 256 | mtsrr0 r4 | ||
| 257 | LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR)) | ||
| 258 | mtsrr1 r3 | ||
| 259 | |||
| 260 | RFI | ||
| 261 | |||
| 262 | .global kvm_return_point | ||
| 263 | kvm_return_point: | ||
| 264 | |||
| 265 | /* Jump back to lightweight entry if we're supposed to */ | ||
| 266 | /* go back into the guest */ | ||
| 267 | |||
| 268 | /* Pass the exit number as 3rd argument to kvmppc_handle_exit */ | 149 | /* Pass the exit number as 3rd argument to kvmppc_handle_exit */ |
| 269 | mr r5, r12 | 150 | mr r5, r12 |
| 270 | 151 | ||
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 0c0d3f274437..d417511abfb1 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c | |||
| @@ -150,16 +150,22 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) | |||
| 150 | #ifdef CONFIG_PPC_BOOK3S_64 | 150 | #ifdef CONFIG_PPC_BOOK3S_64 |
| 151 | if ((pvr >= 0x330000) && (pvr < 0x70330000)) { | 151 | if ((pvr >= 0x330000) && (pvr < 0x70330000)) { |
| 152 | kvmppc_mmu_book3s_64_init(vcpu); | 152 | kvmppc_mmu_book3s_64_init(vcpu); |
| 153 | to_book3s(vcpu)->hior = 0xfff00000; | 153 | if (!to_book3s(vcpu)->hior_sregs) |
| 154 | to_book3s(vcpu)->hior = 0xfff00000; | ||
| 154 | to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; | 155 | to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; |
| 156 | vcpu->arch.cpu_type = KVM_CPU_3S_64; | ||
| 155 | } else | 157 | } else |
| 156 | #endif | 158 | #endif |
| 157 | { | 159 | { |
| 158 | kvmppc_mmu_book3s_32_init(vcpu); | 160 | kvmppc_mmu_book3s_32_init(vcpu); |
| 159 | to_book3s(vcpu)->hior = 0; | 161 | if (!to_book3s(vcpu)->hior_sregs) |
| 162 | to_book3s(vcpu)->hior = 0; | ||
| 160 | to_book3s(vcpu)->msr_mask = 0xffffffffULL; | 163 | to_book3s(vcpu)->msr_mask = 0xffffffffULL; |
| 164 | vcpu->arch.cpu_type = KVM_CPU_3S_32; | ||
| 161 | } | 165 | } |
| 162 | 166 | ||
| 167 | kvmppc_sanity_check(vcpu); | ||
| 168 | |||
| 163 | /* If we are in hypervisor level on 970, we can tell the CPU to | 169 | /* If we are in hypervisor level on 970, we can tell the CPU to |
| 164 | * treat DCBZ as 32 bytes store */ | 170 | * treat DCBZ as 32 bytes store */ |
| 165 | vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32; | 171 | vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32; |
| @@ -646,7 +652,27 @@ program_interrupt: | |||
| 646 | break; | 652 | break; |
| 647 | } | 653 | } |
| 648 | case BOOK3S_INTERRUPT_SYSCALL: | 654 | case BOOK3S_INTERRUPT_SYSCALL: |
| 649 | if (vcpu->arch.osi_enabled && | 655 | if (vcpu->arch.papr_enabled && |
| 656 | (kvmppc_get_last_inst(vcpu) == 0x44000022) && | ||
| 657 | !(vcpu->arch.shared->msr & MSR_PR)) { | ||
| 658 | /* SC 1 papr hypercalls */ | ||
| 659 | ulong cmd = kvmppc_get_gpr(vcpu, 3); | ||
| 660 | int i; | ||
| 661 | |||
| 662 | if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) { | ||
| 663 | r = RESUME_GUEST; | ||
| 664 | break; | ||
| 665 | } | ||
| 666 | |||
| 667 | run->papr_hcall.nr = cmd; | ||
| 668 | for (i = 0; i < 9; ++i) { | ||
| 669 | ulong gpr = kvmppc_get_gpr(vcpu, 4 + i); | ||
| 670 | run->papr_hcall.args[i] = gpr; | ||
| 671 | } | ||
| 672 | run->exit_reason = KVM_EXIT_PAPR_HCALL; | ||
| 673 | vcpu->arch.hcall_needed = 1; | ||
| 674 | r = RESUME_HOST; | ||
| 675 | } else if (vcpu->arch.osi_enabled && | ||
| 650 | (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) && | 676 | (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) && |
| 651 | (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) { | 677 | (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) { |
| 652 | /* MOL hypercalls */ | 678 | /* MOL hypercalls */ |
| @@ -770,6 +796,9 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
| 770 | } | 796 | } |
| 771 | } | 797 | } |
| 772 | 798 | ||
| 799 | if (sregs->u.s.flags & KVM_SREGS_S_HIOR) | ||
| 800 | sregs->u.s.hior = to_book3s(vcpu)->hior; | ||
| 801 | |||
| 773 | return 0; | 802 | return 0; |
| 774 | } | 803 | } |
| 775 | 804 | ||
| @@ -806,6 +835,11 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 806 | /* Flush the MMU after messing with the segments */ | 835 | /* Flush the MMU after messing with the segments */ |
| 807 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | 836 | kvmppc_mmu_pte_flush(vcpu, 0, 0); |
| 808 | 837 | ||
| 838 | if (sregs->u.s.flags & KVM_SREGS_S_HIOR) { | ||
| 839 | to_book3s(vcpu)->hior_sregs = true; | ||
| 840 | to_book3s(vcpu)->hior = sregs->u.s.hior; | ||
| 841 | } | ||
| 842 | |||
| 809 | return 0; | 843 | return 0; |
| 810 | } | 844 | } |
| 811 | 845 | ||
| @@ -841,8 +875,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | |||
| 841 | if (!p) | 875 | if (!p) |
| 842 | goto uninit_vcpu; | 876 | goto uninit_vcpu; |
| 843 | 877 | ||
| 844 | vcpu->arch.host_retip = kvm_return_point; | ||
| 845 | vcpu->arch.host_msr = mfmsr(); | ||
| 846 | #ifdef CONFIG_PPC_BOOK3S_64 | 878 | #ifdef CONFIG_PPC_BOOK3S_64 |
| 847 | /* default to book3s_64 (970fx) */ | 879 | /* default to book3s_64 (970fx) */ |
| 848 | vcpu->arch.pvr = 0x3C0301; | 880 | vcpu->arch.pvr = 0x3C0301; |
| @@ -853,16 +885,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | |||
| 853 | kvmppc_set_pvr(vcpu, vcpu->arch.pvr); | 885 | kvmppc_set_pvr(vcpu, vcpu->arch.pvr); |
| 854 | vcpu->arch.slb_nr = 64; | 886 | vcpu->arch.slb_nr = 64; |
| 855 | 887 | ||
| 856 | /* remember where some real-mode handlers are */ | ||
| 857 | vcpu->arch.trampoline_lowmem = __pa(kvmppc_handler_lowmem_trampoline); | ||
| 858 | vcpu->arch.trampoline_enter = __pa(kvmppc_handler_trampoline_enter); | ||
| 859 | vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem; | ||
| 860 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
| 861 | vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall; | ||
| 862 | #else | ||
| 863 | vcpu->arch.rmcall = (ulong)kvmppc_rmcall; | ||
| 864 | #endif | ||
| 865 | |||
| 866 | vcpu->arch.shadow_msr = MSR_USER64; | 888 | vcpu->arch.shadow_msr = MSR_USER64; |
| 867 | 889 | ||
| 868 | err = kvmppc_mmu_init(vcpu); | 890 | err = kvmppc_mmu_init(vcpu); |
| @@ -908,6 +930,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
| 908 | #endif | 930 | #endif |
| 909 | ulong ext_msr; | 931 | ulong ext_msr; |
| 910 | 932 | ||
| 933 | /* Check if we can run the vcpu at all */ | ||
| 934 | if (!vcpu->arch.sane) { | ||
| 935 | kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
| 936 | return -EINVAL; | ||
| 937 | } | ||
| 938 | |||
| 911 | /* No need to go into the guest when all we do is going out */ | 939 | /* No need to go into the guest when all we do is going out */ |
| 912 | if (signal_pending(current)) { | 940 | if (signal_pending(current)) { |
| 913 | kvm_run->exit_reason = KVM_EXIT_INTR; | 941 | kvm_run->exit_reason = KVM_EXIT_INTR; |
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c new file mode 100644 index 000000000000..b9589324797b --- /dev/null +++ b/arch/powerpc/kvm/book3s_pr_papr.c | |||
| @@ -0,0 +1,158 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2011. Freescale Inc. All rights reserved. | ||
| 3 | * | ||
| 4 | * Authors: | ||
| 5 | * Alexander Graf <agraf@suse.de> | ||
| 6 | * Paul Mackerras <paulus@samba.org> | ||
| 7 | * | ||
| 8 | * Description: | ||
| 9 | * | ||
| 10 | * Hypercall handling for running PAPR guests in PR KVM on Book 3S | ||
| 11 | * processors. | ||
| 12 | * | ||
| 13 | * This program is free software; you can redistribute it and/or modify | ||
| 14 | * it under the terms of the GNU General Public License, version 2, as | ||
| 15 | * published by the Free Software Foundation. | ||
| 16 | */ | ||
| 17 | |||
| 18 | #include <asm/uaccess.h> | ||
| 19 | #include <asm/kvm_ppc.h> | ||
| 20 | #include <asm/kvm_book3s.h> | ||
| 21 | |||
| 22 | static unsigned long get_pteg_addr(struct kvm_vcpu *vcpu, long pte_index) | ||
| 23 | { | ||
| 24 | struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); | ||
| 25 | unsigned long pteg_addr; | ||
| 26 | |||
| 27 | pte_index <<= 4; | ||
| 28 | pte_index &= ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1) << 7 | 0x70; | ||
| 29 | pteg_addr = vcpu_book3s->sdr1 & 0xfffffffffffc0000ULL; | ||
| 30 | pteg_addr |= pte_index; | ||
| 31 | |||
| 32 | return pteg_addr; | ||
| 33 | } | ||
| 34 | |||
| 35 | static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu) | ||
| 36 | { | ||
| 37 | long flags = kvmppc_get_gpr(vcpu, 4); | ||
| 38 | long pte_index = kvmppc_get_gpr(vcpu, 5); | ||
| 39 | unsigned long pteg[2 * 8]; | ||
| 40 | unsigned long pteg_addr, i, *hpte; | ||
| 41 | |||
| 42 | pte_index &= ~7UL; | ||
| 43 | pteg_addr = get_pteg_addr(vcpu, pte_index); | ||
| 44 | |||
| 45 | copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg)); | ||
| 46 | hpte = pteg; | ||
| 47 | |||
| 48 | if (likely((flags & H_EXACT) == 0)) { | ||
| 49 | pte_index &= ~7UL; | ||
| 50 | for (i = 0; ; ++i) { | ||
| 51 | if (i == 8) | ||
| 52 | return H_PTEG_FULL; | ||
| 53 | if ((*hpte & HPTE_V_VALID) == 0) | ||
| 54 | break; | ||
| 55 | hpte += 2; | ||
| 56 | } | ||
| 57 | } else { | ||
| 58 | i = kvmppc_get_gpr(vcpu, 5) & 7UL; | ||
| 59 | hpte += i * 2; | ||
| 60 | } | ||
| 61 | |||
| 62 | hpte[0] = kvmppc_get_gpr(vcpu, 6); | ||
| 63 | hpte[1] = kvmppc_get_gpr(vcpu, 7); | ||
| 64 | copy_to_user((void __user *)pteg_addr, pteg, sizeof(pteg)); | ||
| 65 | kvmppc_set_gpr(vcpu, 3, H_SUCCESS); | ||
| 66 | kvmppc_set_gpr(vcpu, 4, pte_index | i); | ||
| 67 | |||
| 68 | return EMULATE_DONE; | ||
| 69 | } | ||
| 70 | |||
| 71 | static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu) | ||
| 72 | { | ||
| 73 | unsigned long flags= kvmppc_get_gpr(vcpu, 4); | ||
| 74 | unsigned long pte_index = kvmppc_get_gpr(vcpu, 5); | ||
| 75 | unsigned long avpn = kvmppc_get_gpr(vcpu, 6); | ||
| 76 | unsigned long v = 0, pteg, rb; | ||
| 77 | unsigned long pte[2]; | ||
| 78 | |||
| 79 | pteg = get_pteg_addr(vcpu, pte_index); | ||
| 80 | copy_from_user(pte, (void __user *)pteg, sizeof(pte)); | ||
| 81 | |||
| 82 | if ((pte[0] & HPTE_V_VALID) == 0 || | ||
| 83 | ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn) || | ||
| 84 | ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) { | ||
| 85 | kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND); | ||
| 86 | return EMULATE_DONE; | ||
| 87 | } | ||
| 88 | |||
| 89 | copy_to_user((void __user *)pteg, &v, sizeof(v)); | ||
| 90 | |||
| 91 | rb = compute_tlbie_rb(pte[0], pte[1], pte_index); | ||
| 92 | vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); | ||
| 93 | |||
| 94 | kvmppc_set_gpr(vcpu, 3, H_SUCCESS); | ||
| 95 | kvmppc_set_gpr(vcpu, 4, pte[0]); | ||
| 96 | kvmppc_set_gpr(vcpu, 5, pte[1]); | ||
| 97 | |||
| 98 | return EMULATE_DONE; | ||
| 99 | } | ||
| 100 | |||
| 101 | static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) | ||
| 102 | { | ||
| 103 | unsigned long flags = kvmppc_get_gpr(vcpu, 4); | ||
| 104 | unsigned long pte_index = kvmppc_get_gpr(vcpu, 5); | ||
| 105 | unsigned long avpn = kvmppc_get_gpr(vcpu, 6); | ||
| 106 | unsigned long rb, pteg, r, v; | ||
| 107 | unsigned long pte[2]; | ||
| 108 | |||
| 109 | pteg = get_pteg_addr(vcpu, pte_index); | ||
| 110 | copy_from_user(pte, (void __user *)pteg, sizeof(pte)); | ||
| 111 | |||
| 112 | if ((pte[0] & HPTE_V_VALID) == 0 || | ||
| 113 | ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn)) { | ||
| 114 | kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND); | ||
| 115 | return EMULATE_DONE; | ||
| 116 | } | ||
| 117 | |||
| 118 | v = pte[0]; | ||
| 119 | r = pte[1]; | ||
| 120 | r &= ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_HI | | ||
| 121 | HPTE_R_KEY_LO); | ||
| 122 | r |= (flags << 55) & HPTE_R_PP0; | ||
| 123 | r |= (flags << 48) & HPTE_R_KEY_HI; | ||
| 124 | r |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); | ||
| 125 | |||
| 126 | pte[1] = r; | ||
| 127 | |||
| 128 | rb = compute_tlbie_rb(v, r, pte_index); | ||
| 129 | vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); | ||
| 130 | copy_to_user((void __user *)pteg, pte, sizeof(pte)); | ||
| 131 | |||
| 132 | kvmppc_set_gpr(vcpu, 3, H_SUCCESS); | ||
| 133 | |||
| 134 | return EMULATE_DONE; | ||
| 135 | } | ||
| 136 | |||
| 137 | int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) | ||
| 138 | { | ||
| 139 | switch (cmd) { | ||
| 140 | case H_ENTER: | ||
| 141 | return kvmppc_h_pr_enter(vcpu); | ||
| 142 | case H_REMOVE: | ||
| 143 | return kvmppc_h_pr_remove(vcpu); | ||
| 144 | case H_PROTECT: | ||
| 145 | return kvmppc_h_pr_protect(vcpu); | ||
| 146 | case H_BULK_REMOVE: | ||
| 147 | /* We just flush all PTEs, so user space can | ||
| 148 | handle the HPT modifications */ | ||
| 149 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | ||
| 150 | break; | ||
| 151 | case H_CEDE: | ||
| 152 | kvm_vcpu_block(vcpu); | ||
| 153 | vcpu->stat.halt_wakeup++; | ||
| 154 | return EMULATE_DONE; | ||
| 155 | } | ||
| 156 | |||
| 157 | return EMULATE_FAIL; | ||
| 158 | } | ||
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index c1f877c4a884..34187585c507 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include <asm/ppc_asm.h> | 20 | #include <asm/ppc_asm.h> |
| 21 | #include <asm/kvm_asm.h> | 21 | #include <asm/kvm_asm.h> |
| 22 | #include <asm/reg.h> | 22 | #include <asm/reg.h> |
| 23 | #include <asm/mmu.h> | ||
| 23 | #include <asm/page.h> | 24 | #include <asm/page.h> |
| 24 | #include <asm/asm-offsets.h> | 25 | #include <asm/asm-offsets.h> |
| 25 | 26 | ||
| @@ -35,10 +36,10 @@ | |||
| 35 | 36 | ||
| 36 | #if defined(CONFIG_PPC_BOOK3S_64) | 37 | #if defined(CONFIG_PPC_BOOK3S_64) |
| 37 | 38 | ||
| 38 | #define LOAD_SHADOW_VCPU(reg) GET_PACA(reg) | ||
| 39 | #define MSR_NOIRQ MSR_KERNEL & ~(MSR_IR | MSR_DR) | ||
| 40 | #define FUNC(name) GLUE(.,name) | 39 | #define FUNC(name) GLUE(.,name) |
| 40 | #define MTMSR_EERI(reg) mtmsrd (reg),1 | ||
| 41 | 41 | ||
| 42 | .globl kvmppc_skip_interrupt | ||
| 42 | kvmppc_skip_interrupt: | 43 | kvmppc_skip_interrupt: |
| 43 | /* | 44 | /* |
| 44 | * Here all GPRs are unchanged from when the interrupt happened | 45 | * Here all GPRs are unchanged from when the interrupt happened |
| @@ -51,6 +52,7 @@ kvmppc_skip_interrupt: | |||
| 51 | rfid | 52 | rfid |
| 52 | b . | 53 | b . |
| 53 | 54 | ||
| 55 | .globl kvmppc_skip_Hinterrupt | ||
| 54 | kvmppc_skip_Hinterrupt: | 56 | kvmppc_skip_Hinterrupt: |
| 55 | /* | 57 | /* |
| 56 | * Here all GPRs are unchanged from when the interrupt happened | 58 | * Here all GPRs are unchanged from when the interrupt happened |
| @@ -65,8 +67,8 @@ kvmppc_skip_Hinterrupt: | |||
| 65 | 67 | ||
| 66 | #elif defined(CONFIG_PPC_BOOK3S_32) | 68 | #elif defined(CONFIG_PPC_BOOK3S_32) |
| 67 | 69 | ||
| 68 | #define MSR_NOIRQ MSR_KERNEL | ||
| 69 | #define FUNC(name) name | 70 | #define FUNC(name) name |
| 71 | #define MTMSR_EERI(reg) mtmsr (reg) | ||
| 70 | 72 | ||
| 71 | .macro INTERRUPT_TRAMPOLINE intno | 73 | .macro INTERRUPT_TRAMPOLINE intno |
| 72 | 74 | ||
| @@ -167,40 +169,24 @@ kvmppc_handler_skip_ins: | |||
| 167 | #endif | 169 | #endif |
| 168 | 170 | ||
| 169 | /* | 171 | /* |
| 170 | * This trampoline brings us back to a real mode handler | 172 | * Call kvmppc_handler_trampoline_enter in real mode |
| 171 | * | ||
| 172 | * Input Registers: | ||
| 173 | * | ||
| 174 | * R5 = SRR0 | ||
| 175 | * R6 = SRR1 | ||
| 176 | * LR = real-mode IP | ||
| 177 | * | 173 | * |
| 174 | * On entry, r4 contains the guest shadow MSR | ||
| 178 | */ | 175 | */ |
| 179 | .global kvmppc_handler_lowmem_trampoline | 176 | _GLOBAL(kvmppc_entry_trampoline) |
| 180 | kvmppc_handler_lowmem_trampoline: | 177 | mfmsr r5 |
| 181 | 178 | LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter) | |
| 182 | mtsrr0 r5 | 179 | toreal(r7) |
| 180 | |||
| 181 | li r9, MSR_RI | ||
| 182 | ori r9, r9, MSR_EE | ||
| 183 | andc r9, r5, r9 /* Clear EE and RI in MSR value */ | ||
| 184 | li r6, MSR_IR | MSR_DR | ||
| 185 | ori r6, r6, MSR_EE | ||
| 186 | andc r6, r5, r6 /* Clear EE, DR and IR in MSR value */ | ||
| 187 | MTMSR_EERI(r9) /* Clear EE and RI in MSR */ | ||
| 188 | mtsrr0 r7 /* before we set srr0/1 */ | ||
| 183 | mtsrr1 r6 | 189 | mtsrr1 r6 |
| 184 | blr | ||
| 185 | kvmppc_handler_lowmem_trampoline_end: | ||
| 186 | |||
| 187 | /* | ||
| 188 | * Call a function in real mode | ||
| 189 | * | ||
| 190 | * Input Registers: | ||
| 191 | * | ||
| 192 | * R3 = function | ||
| 193 | * R4 = MSR | ||
| 194 | * R5 = scratch register | ||
| 195 | * | ||
| 196 | */ | ||
| 197 | _GLOBAL(kvmppc_rmcall) | ||
| 198 | LOAD_REG_IMMEDIATE(r5, MSR_NOIRQ) | ||
| 199 | mtmsr r5 /* Disable relocation and interrupts, so mtsrr | ||
| 200 | doesn't get interrupted */ | ||
| 201 | sync | ||
| 202 | mtsrr0 r3 | ||
| 203 | mtsrr1 r4 | ||
| 204 | RFI | 190 | RFI |
| 205 | 191 | ||
| 206 | #if defined(CONFIG_PPC_BOOK3S_32) | 192 | #if defined(CONFIG_PPC_BOOK3S_32) |
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index aed32e517212..0676ae249b9f 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | 23 | ||
| 24 | #define GET_SHADOW_VCPU(reg) \ | 24 | #define GET_SHADOW_VCPU(reg) \ |
| 25 | mr reg, r13 | 25 | mr reg, r13 |
| 26 | #define MTMSR_EERI(reg) mtmsrd (reg),1 | ||
| 26 | 27 | ||
| 27 | #elif defined(CONFIG_PPC_BOOK3S_32) | 28 | #elif defined(CONFIG_PPC_BOOK3S_32) |
| 28 | 29 | ||
| @@ -30,6 +31,7 @@ | |||
| 30 | tophys(reg, r2); \ | 31 | tophys(reg, r2); \ |
| 31 | lwz reg, (THREAD + THREAD_KVM_SVCPU)(reg); \ | 32 | lwz reg, (THREAD + THREAD_KVM_SVCPU)(reg); \ |
| 32 | tophys(reg, reg) | 33 | tophys(reg, reg) |
| 34 | #define MTMSR_EERI(reg) mtmsr (reg) | ||
| 33 | 35 | ||
| 34 | #endif | 36 | #endif |
| 35 | 37 | ||
| @@ -57,10 +59,12 @@ kvmppc_handler_trampoline_enter: | |||
| 57 | /* Required state: | 59 | /* Required state: |
| 58 | * | 60 | * |
| 59 | * MSR = ~IR|DR | 61 | * MSR = ~IR|DR |
| 60 | * R13 = PACA | ||
| 61 | * R1 = host R1 | 62 | * R1 = host R1 |
| 62 | * R2 = host R2 | 63 | * R2 = host R2 |
| 63 | * R10 = guest MSR | 64 | * R4 = guest shadow MSR |
| 65 | * R5 = normal host MSR | ||
| 66 | * R6 = current host MSR (EE, IR, DR off) | ||
| 67 | * LR = highmem guest exit code | ||
| 64 | * all other volatile GPRS = free | 68 | * all other volatile GPRS = free |
| 65 | * SVCPU[CR] = guest CR | 69 | * SVCPU[CR] = guest CR |
| 66 | * SVCPU[XER] = guest XER | 70 | * SVCPU[XER] = guest XER |
| @@ -71,15 +75,15 @@ kvmppc_handler_trampoline_enter: | |||
| 71 | /* r3 = shadow vcpu */ | 75 | /* r3 = shadow vcpu */ |
| 72 | GET_SHADOW_VCPU(r3) | 76 | GET_SHADOW_VCPU(r3) |
| 73 | 77 | ||
| 78 | /* Save guest exit handler address and MSR */ | ||
| 79 | mflr r0 | ||
| 80 | PPC_STL r0, HSTATE_VMHANDLER(r3) | ||
| 81 | PPC_STL r5, HSTATE_HOST_MSR(r3) | ||
| 82 | |||
| 74 | /* Save R1/R2 in the PACA (64-bit) or shadow_vcpu (32-bit) */ | 83 | /* Save R1/R2 in the PACA (64-bit) or shadow_vcpu (32-bit) */ |
| 75 | PPC_STL r1, HSTATE_HOST_R1(r3) | 84 | PPC_STL r1, HSTATE_HOST_R1(r3) |
| 76 | PPC_STL r2, HSTATE_HOST_R2(r3) | 85 | PPC_STL r2, HSTATE_HOST_R2(r3) |
| 77 | 86 | ||
| 78 | /* Move SRR0 and SRR1 into the respective regs */ | ||
| 79 | PPC_LL r9, SVCPU_PC(r3) | ||
| 80 | mtsrr0 r9 | ||
| 81 | mtsrr1 r10 | ||
| 82 | |||
| 83 | /* Activate guest mode, so faults get handled by KVM */ | 87 | /* Activate guest mode, so faults get handled by KVM */ |
| 84 | li r11, KVM_GUEST_MODE_GUEST | 88 | li r11, KVM_GUEST_MODE_GUEST |
| 85 | stb r11, HSTATE_IN_GUEST(r3) | 89 | stb r11, HSTATE_IN_GUEST(r3) |
| @@ -87,17 +91,46 @@ kvmppc_handler_trampoline_enter: | |||
| 87 | /* Switch to guest segment. This is subarch specific. */ | 91 | /* Switch to guest segment. This is subarch specific. */ |
| 88 | LOAD_GUEST_SEGMENTS | 92 | LOAD_GUEST_SEGMENTS |
| 89 | 93 | ||
| 94 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
| 95 | /* Some guests may need to have dcbz set to 32 byte length. | ||
| 96 | * | ||
| 97 | * Usually we ensure that by patching the guest's instructions | ||
| 98 | * to trap on dcbz and emulate it in the hypervisor. | ||
| 99 | * | ||
| 100 | * If we can, we should tell the CPU to use 32 byte dcbz though, | ||
| 101 | * because that's a lot faster. | ||
| 102 | */ | ||
| 103 | lbz r0, HSTATE_RESTORE_HID5(r3) | ||
| 104 | cmpwi r0, 0 | ||
| 105 | beq no_dcbz32_on | ||
| 106 | |||
| 107 | mfspr r0,SPRN_HID5 | ||
| 108 | ori r0, r0, 0x80 /* XXX HID5_dcbz32 = 0x80 */ | ||
| 109 | mtspr SPRN_HID5,r0 | ||
| 110 | no_dcbz32_on: | ||
| 111 | |||
| 112 | #endif /* CONFIG_PPC_BOOK3S_64 */ | ||
| 113 | |||
| 90 | /* Enter guest */ | 114 | /* Enter guest */ |
| 91 | 115 | ||
| 92 | PPC_LL r4, SVCPU_CTR(r3) | 116 | PPC_LL r8, SVCPU_CTR(r3) |
| 93 | PPC_LL r5, SVCPU_LR(r3) | 117 | PPC_LL r9, SVCPU_LR(r3) |
| 94 | lwz r6, SVCPU_CR(r3) | 118 | lwz r10, SVCPU_CR(r3) |
| 95 | lwz r7, SVCPU_XER(r3) | 119 | lwz r11, SVCPU_XER(r3) |
| 120 | |||
| 121 | mtctr r8 | ||
| 122 | mtlr r9 | ||
| 123 | mtcr r10 | ||
| 124 | mtxer r11 | ||
| 96 | 125 | ||
| 97 | mtctr r4 | 126 | /* Move SRR0 and SRR1 into the respective regs */ |
| 98 | mtlr r5 | 127 | PPC_LL r9, SVCPU_PC(r3) |
| 99 | mtcr r6 | 128 | /* First clear RI in our current MSR value */ |
| 100 | mtxer r7 | 129 | li r0, MSR_RI |
| 130 | andc r6, r6, r0 | ||
| 131 | MTMSR_EERI(r6) | ||
| 132 | mtsrr0 r9 | ||
| 133 | mtsrr1 r4 | ||
| 101 | 134 | ||
| 102 | PPC_LL r0, SVCPU_R0(r3) | 135 | PPC_LL r0, SVCPU_R0(r3) |
| 103 | PPC_LL r1, SVCPU_R1(r3) | 136 | PPC_LL r1, SVCPU_R1(r3) |
| @@ -213,11 +246,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) | |||
| 213 | beq ld_last_inst | 246 | beq ld_last_inst |
| 214 | cmpwi r12, BOOK3S_INTERRUPT_PROGRAM | 247 | cmpwi r12, BOOK3S_INTERRUPT_PROGRAM |
| 215 | beq ld_last_inst | 248 | beq ld_last_inst |
| 249 | cmpwi r12, BOOK3S_INTERRUPT_SYSCALL | ||
| 250 | beq ld_last_prev_inst | ||
| 216 | cmpwi r12, BOOK3S_INTERRUPT_ALIGNMENT | 251 | cmpwi r12, BOOK3S_INTERRUPT_ALIGNMENT |
| 217 | beq- ld_last_inst | 252 | beq- ld_last_inst |
| 218 | 253 | ||
| 219 | b no_ld_last_inst | 254 | b no_ld_last_inst |
| 220 | 255 | ||
| 256 | ld_last_prev_inst: | ||
| 257 | addi r3, r3, -4 | ||
| 258 | |||
| 221 | ld_last_inst: | 259 | ld_last_inst: |
| 222 | /* Save off the guest instruction we're at */ | 260 | /* Save off the guest instruction we're at */ |
| 223 | 261 | ||
| @@ -254,6 +292,43 @@ no_ld_last_inst: | |||
| 254 | /* Switch back to host MMU */ | 292 | /* Switch back to host MMU */ |
| 255 | LOAD_HOST_SEGMENTS | 293 | LOAD_HOST_SEGMENTS |
| 256 | 294 | ||
| 295 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
| 296 | |||
| 297 | lbz r5, HSTATE_RESTORE_HID5(r13) | ||
| 298 | cmpwi r5, 0 | ||
| 299 | beq no_dcbz32_off | ||
| 300 | |||
| 301 | li r4, 0 | ||
| 302 | mfspr r5,SPRN_HID5 | ||
| 303 | rldimi r5,r4,6,56 | ||
| 304 | mtspr SPRN_HID5,r5 | ||
| 305 | |||
| 306 | no_dcbz32_off: | ||
| 307 | |||
| 308 | #endif /* CONFIG_PPC_BOOK3S_64 */ | ||
| 309 | |||
| 310 | /* | ||
| 311 | * For some interrupts, we need to call the real Linux | ||
| 312 | * handler, so it can do work for us. This has to happen | ||
| 313 | * as if the interrupt arrived from the kernel though, | ||
| 314 | * so let's fake it here where most state is restored. | ||
| 315 | * | ||
| 316 | * Having set up SRR0/1 with the address where we want | ||
| 317 | * to continue with relocation on (potentially in module | ||
| 318 | * space), we either just go straight there with rfi[d], | ||
| 319 | * or we jump to an interrupt handler with bctr if there | ||
| 320 | * is an interrupt to be handled first. In the latter | ||
| 321 | * case, the rfi[d] at the end of the interrupt handler | ||
| 322 | * will get us back to where we want to continue. | ||
| 323 | */ | ||
| 324 | |||
| 325 | cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL | ||
| 326 | beq 1f | ||
| 327 | cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER | ||
| 328 | beq 1f | ||
| 329 | cmpwi r12, BOOK3S_INTERRUPT_PERFMON | ||
| 330 | 1: mtctr r12 | ||
| 331 | |||
| 257 | /* Register usage at this point: | 332 | /* Register usage at this point: |
| 258 | * | 333 | * |
| 259 | * R1 = host R1 | 334 | * R1 = host R1 |
| @@ -264,13 +339,15 @@ no_ld_last_inst: | |||
| 264 | * | 339 | * |
| 265 | */ | 340 | */ |
| 266 | 341 | ||
| 267 | /* RFI into the highmem handler */ | 342 | PPC_LL r6, HSTATE_HOST_MSR(r13) |
| 268 | mfmsr r7 | ||
| 269 | ori r7, r7, MSR_IR|MSR_DR|MSR_RI|MSR_ME /* Enable paging */ | ||
| 270 | mtsrr1 r7 | ||
| 271 | /* Load highmem handler address */ | ||
| 272 | PPC_LL r8, HSTATE_VMHANDLER(r13) | 343 | PPC_LL r8, HSTATE_VMHANDLER(r13) |
| 344 | |||
| 345 | /* Restore host msr -> SRR1 */ | ||
| 346 | mtsrr1 r6 | ||
| 347 | /* Load highmem handler address */ | ||
| 273 | mtsrr0 r8 | 348 | mtsrr0 r8 |
| 274 | 349 | ||
| 350 | /* RFI into the highmem handler, or jump to interrupt handler */ | ||
| 351 | beqctr | ||
| 275 | RFI | 352 | RFI |
| 276 | kvmppc_handler_trampoline_exit_end: | 353 | kvmppc_handler_trampoline_exit_end: |
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index ee45fa01220e..bb6c988f010a 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c | |||
| @@ -316,6 +316,11 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
| 316 | { | 316 | { |
| 317 | int ret; | 317 | int ret; |
| 318 | 318 | ||
| 319 | if (!vcpu->arch.sane) { | ||
| 320 | kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
| 321 | return -EINVAL; | ||
| 322 | } | ||
| 323 | |||
| 319 | local_irq_disable(); | 324 | local_irq_disable(); |
| 320 | kvm_guest_enter(); | 325 | kvm_guest_enter(); |
| 321 | ret = __kvmppc_vcpu_run(kvm_run, vcpu); | 326 | ret = __kvmppc_vcpu_run(kvm_run, vcpu); |
| @@ -618,6 +623,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 618 | int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | 623 | int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) |
| 619 | { | 624 | { |
| 620 | int i; | 625 | int i; |
| 626 | int r; | ||
| 621 | 627 | ||
| 622 | vcpu->arch.pc = 0; | 628 | vcpu->arch.pc = 0; |
| 623 | vcpu->arch.shared->msr = 0; | 629 | vcpu->arch.shared->msr = 0; |
| @@ -634,7 +640,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
| 634 | 640 | ||
| 635 | kvmppc_init_timing_stats(vcpu); | 641 | kvmppc_init_timing_stats(vcpu); |
| 636 | 642 | ||
| 637 | return kvmppc_core_vcpu_setup(vcpu); | 643 | r = kvmppc_core_vcpu_setup(vcpu); |
| 644 | kvmppc_sanity_check(vcpu); | ||
| 645 | return r; | ||
| 638 | } | 646 | } |
| 639 | 647 | ||
| 640 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | 648 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) |
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 797a7447c268..26d20903f2bc 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c | |||
| @@ -73,6 +73,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) | |||
| 73 | /* Since booke kvm only support one core, update all vcpus' PIR to 0 */ | 73 | /* Since booke kvm only support one core, update all vcpus' PIR to 0 */ |
| 74 | vcpu->vcpu_id = 0; | 74 | vcpu->vcpu_id = 0; |
| 75 | 75 | ||
| 76 | vcpu->arch.cpu_type = KVM_CPU_E500V2; | ||
| 77 | |||
| 76 | return 0; | 78 | return 0; |
| 77 | } | 79 | } |
| 78 | 80 | ||
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index a107c9be0fb1..0d843c6ba315 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
| @@ -39,12 +39,8 @@ | |||
| 39 | 39 | ||
| 40 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) | 40 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) |
| 41 | { | 41 | { |
| 42 | #ifndef CONFIG_KVM_BOOK3S_64_HV | ||
| 43 | return !(v->arch.shared->msr & MSR_WE) || | 42 | return !(v->arch.shared->msr & MSR_WE) || |
| 44 | !!(v->arch.pending_exceptions); | 43 | !!(v->arch.pending_exceptions); |
| 45 | #else | ||
| 46 | return !(v->arch.ceded) || !!(v->arch.pending_exceptions); | ||
| 47 | #endif | ||
| 48 | } | 44 | } |
| 49 | 45 | ||
| 50 | int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) | 46 | int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) |
| @@ -95,6 +91,31 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) | |||
| 95 | return r; | 91 | return r; |
| 96 | } | 92 | } |
| 97 | 93 | ||
| 94 | int kvmppc_sanity_check(struct kvm_vcpu *vcpu) | ||
| 95 | { | ||
| 96 | int r = false; | ||
| 97 | |||
| 98 | /* We have to know what CPU to virtualize */ | ||
| 99 | if (!vcpu->arch.pvr) | ||
| 100 | goto out; | ||
| 101 | |||
| 102 | /* PAPR only works with book3s_64 */ | ||
| 103 | if ((vcpu->arch.cpu_type != KVM_CPU_3S_64) && vcpu->arch.papr_enabled) | ||
| 104 | goto out; | ||
| 105 | |||
| 106 | #ifdef CONFIG_KVM_BOOK3S_64_HV | ||
| 107 | /* HV KVM can only do PAPR mode for now */ | ||
| 108 | if (!vcpu->arch.papr_enabled) | ||
| 109 | goto out; | ||
| 110 | #endif | ||
| 111 | |||
| 112 | r = true; | ||
| 113 | |||
| 114 | out: | ||
| 115 | vcpu->arch.sane = r; | ||
| 116 | return r ? 0 : -EINVAL; | ||
| 117 | } | ||
| 118 | |||
| 98 | int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) | 119 | int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) |
| 99 | { | 120 | { |
| 100 | enum emulation_result er; | 121 | enum emulation_result er; |
| @@ -188,6 +209,8 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 188 | case KVM_CAP_PPC_BOOKE_SREGS: | 209 | case KVM_CAP_PPC_BOOKE_SREGS: |
| 189 | #else | 210 | #else |
| 190 | case KVM_CAP_PPC_SEGSTATE: | 211 | case KVM_CAP_PPC_SEGSTATE: |
| 212 | case KVM_CAP_PPC_HIOR: | ||
| 213 | case KVM_CAP_PPC_PAPR: | ||
| 191 | #endif | 214 | #endif |
| 192 | case KVM_CAP_PPC_UNSET_IRQ: | 215 | case KVM_CAP_PPC_UNSET_IRQ: |
| 193 | case KVM_CAP_PPC_IRQ_LEVEL: | 216 | case KVM_CAP_PPC_IRQ_LEVEL: |
| @@ -258,6 +281,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) | |||
| 258 | { | 281 | { |
| 259 | struct kvm_vcpu *vcpu; | 282 | struct kvm_vcpu *vcpu; |
| 260 | vcpu = kvmppc_core_vcpu_create(kvm, id); | 283 | vcpu = kvmppc_core_vcpu_create(kvm, id); |
| 284 | vcpu->arch.wqp = &vcpu->wq; | ||
| 261 | if (!IS_ERR(vcpu)) | 285 | if (!IS_ERR(vcpu)) |
| 262 | kvmppc_create_vcpu_debugfs(vcpu, id); | 286 | kvmppc_create_vcpu_debugfs(vcpu, id); |
| 263 | return vcpu; | 287 | return vcpu; |
| @@ -289,8 +313,8 @@ static void kvmppc_decrementer_func(unsigned long data) | |||
| 289 | 313 | ||
| 290 | kvmppc_core_queue_dec(vcpu); | 314 | kvmppc_core_queue_dec(vcpu); |
| 291 | 315 | ||
| 292 | if (waitqueue_active(&vcpu->wq)) { | 316 | if (waitqueue_active(vcpu->arch.wqp)) { |
| 293 | wake_up_interruptible(&vcpu->wq); | 317 | wake_up_interruptible(vcpu->arch.wqp); |
| 294 | vcpu->stat.halt_wakeup++; | 318 | vcpu->stat.halt_wakeup++; |
| 295 | } | 319 | } |
| 296 | } | 320 | } |
| @@ -543,13 +567,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
| 543 | 567 | ||
| 544 | int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) | 568 | int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) |
| 545 | { | 569 | { |
| 546 | if (irq->irq == KVM_INTERRUPT_UNSET) | 570 | if (irq->irq == KVM_INTERRUPT_UNSET) { |
| 547 | kvmppc_core_dequeue_external(vcpu, irq); | 571 | kvmppc_core_dequeue_external(vcpu, irq); |
| 548 | else | 572 | return 0; |
| 549 | kvmppc_core_queue_external(vcpu, irq); | 573 | } |
| 574 | |||
| 575 | kvmppc_core_queue_external(vcpu, irq); | ||
| 550 | 576 | ||
| 551 | if (waitqueue_active(&vcpu->wq)) { | 577 | if (waitqueue_active(vcpu->arch.wqp)) { |
| 552 | wake_up_interruptible(&vcpu->wq); | 578 | wake_up_interruptible(vcpu->arch.wqp); |
| 553 | vcpu->stat.halt_wakeup++; | 579 | vcpu->stat.halt_wakeup++; |
| 554 | } else if (vcpu->cpu != -1) { | 580 | } else if (vcpu->cpu != -1) { |
| 555 | smp_send_reschedule(vcpu->cpu); | 581 | smp_send_reschedule(vcpu->cpu); |
| @@ -571,11 +597,18 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, | |||
| 571 | r = 0; | 597 | r = 0; |
| 572 | vcpu->arch.osi_enabled = true; | 598 | vcpu->arch.osi_enabled = true; |
| 573 | break; | 599 | break; |
| 600 | case KVM_CAP_PPC_PAPR: | ||
| 601 | r = 0; | ||
| 602 | vcpu->arch.papr_enabled = true; | ||
| 603 | break; | ||
| 574 | default: | 604 | default: |
| 575 | r = -EINVAL; | 605 | r = -EINVAL; |
| 576 | break; | 606 | break; |
| 577 | } | 607 | } |
| 578 | 608 | ||
| 609 | if (!r) | ||
| 610 | r = kvmppc_sanity_check(vcpu); | ||
| 611 | |||
| 579 | return r; | 612 | return r; |
| 580 | } | 613 | } |
| 581 | 614 | ||
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 00ff00dfb24c..1ca5de07ac36 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h | |||
| @@ -119,6 +119,7 @@ struct kvm_vcpu_stat { | |||
| 119 | u32 instruction_lctlg; | 119 | u32 instruction_lctlg; |
| 120 | u32 exit_program_interruption; | 120 | u32 exit_program_interruption; |
| 121 | u32 exit_instr_and_program; | 121 | u32 exit_instr_and_program; |
| 122 | u32 deliver_external_call; | ||
| 122 | u32 deliver_emergency_signal; | 123 | u32 deliver_emergency_signal; |
| 123 | u32 deliver_service_signal; | 124 | u32 deliver_service_signal; |
| 124 | u32 deliver_virtio_interrupt; | 125 | u32 deliver_virtio_interrupt; |
| @@ -138,6 +139,7 @@ struct kvm_vcpu_stat { | |||
| 138 | u32 instruction_stfl; | 139 | u32 instruction_stfl; |
| 139 | u32 instruction_tprot; | 140 | u32 instruction_tprot; |
| 140 | u32 instruction_sigp_sense; | 141 | u32 instruction_sigp_sense; |
| 142 | u32 instruction_sigp_external_call; | ||
| 141 | u32 instruction_sigp_emergency; | 143 | u32 instruction_sigp_emergency; |
| 142 | u32 instruction_sigp_stop; | 144 | u32 instruction_sigp_stop; |
| 143 | u32 instruction_sigp_arch; | 145 | u32 instruction_sigp_arch; |
| @@ -174,6 +176,10 @@ struct kvm_s390_prefix_info { | |||
| 174 | __u32 address; | 176 | __u32 address; |
| 175 | }; | 177 | }; |
| 176 | 178 | ||
| 179 | struct kvm_s390_extcall_info { | ||
| 180 | __u16 code; | ||
| 181 | }; | ||
| 182 | |||
| 177 | struct kvm_s390_emerg_info { | 183 | struct kvm_s390_emerg_info { |
| 178 | __u16 code; | 184 | __u16 code; |
| 179 | }; | 185 | }; |
| @@ -186,6 +192,7 @@ struct kvm_s390_interrupt_info { | |||
| 186 | struct kvm_s390_ext_info ext; | 192 | struct kvm_s390_ext_info ext; |
| 187 | struct kvm_s390_pgm_info pgm; | 193 | struct kvm_s390_pgm_info pgm; |
| 188 | struct kvm_s390_emerg_info emerg; | 194 | struct kvm_s390_emerg_info emerg; |
| 195 | struct kvm_s390_extcall_info extcall; | ||
| 189 | struct kvm_s390_prefix_info prefix; | 196 | struct kvm_s390_prefix_info prefix; |
| 190 | }; | 197 | }; |
| 191 | }; | 198 | }; |
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index c9aeb4b4d0b8..87c16705b381 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c | |||
| @@ -38,6 +38,11 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, | |||
| 38 | struct kvm_s390_interrupt_info *inti) | 38 | struct kvm_s390_interrupt_info *inti) |
| 39 | { | 39 | { |
| 40 | switch (inti->type) { | 40 | switch (inti->type) { |
| 41 | case KVM_S390_INT_EXTERNAL_CALL: | ||
| 42 | if (psw_extint_disabled(vcpu)) | ||
| 43 | return 0; | ||
| 44 | if (vcpu->arch.sie_block->gcr[0] & 0x2000ul) | ||
| 45 | return 1; | ||
| 41 | case KVM_S390_INT_EMERGENCY: | 46 | case KVM_S390_INT_EMERGENCY: |
| 42 | if (psw_extint_disabled(vcpu)) | 47 | if (psw_extint_disabled(vcpu)) |
| 43 | return 0; | 48 | return 0; |
| @@ -98,6 +103,7 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu, | |||
| 98 | struct kvm_s390_interrupt_info *inti) | 103 | struct kvm_s390_interrupt_info *inti) |
| 99 | { | 104 | { |
| 100 | switch (inti->type) { | 105 | switch (inti->type) { |
| 106 | case KVM_S390_INT_EXTERNAL_CALL: | ||
| 101 | case KVM_S390_INT_EMERGENCY: | 107 | case KVM_S390_INT_EMERGENCY: |
| 102 | case KVM_S390_INT_SERVICE: | 108 | case KVM_S390_INT_SERVICE: |
| 103 | case KVM_S390_INT_VIRTIO: | 109 | case KVM_S390_INT_VIRTIO: |
| @@ -143,6 +149,28 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, | |||
| 143 | exception = 1; | 149 | exception = 1; |
| 144 | break; | 150 | break; |
| 145 | 151 | ||
| 152 | case KVM_S390_INT_EXTERNAL_CALL: | ||
| 153 | VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); | ||
| 154 | vcpu->stat.deliver_external_call++; | ||
| 155 | rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202); | ||
| 156 | if (rc == -EFAULT) | ||
| 157 | exception = 1; | ||
| 158 | |||
| 159 | rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, inti->extcall.code); | ||
| 160 | if (rc == -EFAULT) | ||
| 161 | exception = 1; | ||
| 162 | |||
| 163 | rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, | ||
| 164 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | ||
| 165 | if (rc == -EFAULT) | ||
| 166 | exception = 1; | ||
| 167 | |||
| 168 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
| 169 | __LC_EXT_NEW_PSW, sizeof(psw_t)); | ||
| 170 | if (rc == -EFAULT) | ||
| 171 | exception = 1; | ||
| 172 | break; | ||
| 173 | |||
| 146 | case KVM_S390_INT_SERVICE: | 174 | case KVM_S390_INT_SERVICE: |
| 147 | VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", | 175 | VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", |
| 148 | inti->ext.ext_params); | 176 | inti->ext.ext_params); |
| @@ -522,6 +550,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, | |||
| 522 | break; | 550 | break; |
| 523 | case KVM_S390_PROGRAM_INT: | 551 | case KVM_S390_PROGRAM_INT: |
| 524 | case KVM_S390_SIGP_STOP: | 552 | case KVM_S390_SIGP_STOP: |
| 553 | case KVM_S390_INT_EXTERNAL_CALL: | ||
| 525 | case KVM_S390_INT_EMERGENCY: | 554 | case KVM_S390_INT_EMERGENCY: |
| 526 | default: | 555 | default: |
| 527 | kfree(inti); | 556 | kfree(inti); |
| @@ -581,6 +610,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, | |||
| 581 | break; | 610 | break; |
| 582 | case KVM_S390_SIGP_STOP: | 611 | case KVM_S390_SIGP_STOP: |
| 583 | case KVM_S390_RESTART: | 612 | case KVM_S390_RESTART: |
| 613 | case KVM_S390_INT_EXTERNAL_CALL: | ||
| 584 | case KVM_S390_INT_EMERGENCY: | 614 | case KVM_S390_INT_EMERGENCY: |
| 585 | VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type); | 615 | VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type); |
| 586 | inti->type = s390int->type; | 616 | inti->type = s390int->type; |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index dc2b580e27bc..9610ba41b974 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
| @@ -46,6 +46,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
| 46 | { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, | 46 | { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, |
| 47 | { "instruction_lctl", VCPU_STAT(instruction_lctl) }, | 47 | { "instruction_lctl", VCPU_STAT(instruction_lctl) }, |
| 48 | { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, | 48 | { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, |
| 49 | { "deliver_external_call", VCPU_STAT(deliver_external_call) }, | ||
| 49 | { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, | 50 | { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, |
| 50 | { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, | 51 | { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, |
| 51 | { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, | 52 | { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, |
| @@ -64,6 +65,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
| 64 | { "instruction_stfl", VCPU_STAT(instruction_stfl) }, | 65 | { "instruction_stfl", VCPU_STAT(instruction_stfl) }, |
| 65 | { "instruction_tprot", VCPU_STAT(instruction_tprot) }, | 66 | { "instruction_tprot", VCPU_STAT(instruction_tprot) }, |
| 66 | { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, | 67 | { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, |
| 68 | { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, | ||
| 67 | { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, | 69 | { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, |
| 68 | { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, | 70 | { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, |
| 69 | { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, | 71 | { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, |
| @@ -175,6 +177,8 @@ int kvm_arch_init_vm(struct kvm *kvm) | |||
| 175 | if (rc) | 177 | if (rc) |
| 176 | goto out_err; | 178 | goto out_err; |
| 177 | 179 | ||
| 180 | rc = -ENOMEM; | ||
| 181 | |||
| 178 | kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); | 182 | kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); |
| 179 | if (!kvm->arch.sca) | 183 | if (!kvm->arch.sca) |
| 180 | goto out_err; | 184 | goto out_err; |
| @@ -312,11 +316,17 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
| 312 | struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, | 316 | struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, |
| 313 | unsigned int id) | 317 | unsigned int id) |
| 314 | { | 318 | { |
| 315 | struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); | 319 | struct kvm_vcpu *vcpu; |
| 316 | int rc = -ENOMEM; | 320 | int rc = -EINVAL; |
| 321 | |||
| 322 | if (id >= KVM_MAX_VCPUS) | ||
| 323 | goto out; | ||
| 317 | 324 | ||
| 325 | rc = -ENOMEM; | ||
| 326 | |||
| 327 | vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); | ||
| 318 | if (!vcpu) | 328 | if (!vcpu) |
| 319 | goto out_nomem; | 329 | goto out; |
| 320 | 330 | ||
| 321 | vcpu->arch.sie_block = (struct kvm_s390_sie_block *) | 331 | vcpu->arch.sie_block = (struct kvm_s390_sie_block *) |
| 322 | get_zeroed_page(GFP_KERNEL); | 332 | get_zeroed_page(GFP_KERNEL); |
| @@ -352,7 +362,7 @@ out_free_sie_block: | |||
| 352 | free_page((unsigned long)(vcpu->arch.sie_block)); | 362 | free_page((unsigned long)(vcpu->arch.sie_block)); |
| 353 | out_free_cpu: | 363 | out_free_cpu: |
| 354 | kfree(vcpu); | 364 | kfree(vcpu); |
| 355 | out_nomem: | 365 | out: |
| 356 | return ERR_PTR(rc); | 366 | return ERR_PTR(rc); |
| 357 | } | 367 | } |
| 358 | 368 | ||
| @@ -386,6 +396,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 386 | { | 396 | { |
| 387 | memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs)); | 397 | memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs)); |
| 388 | memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); | 398 | memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); |
| 399 | restore_access_regs(vcpu->arch.guest_acrs); | ||
| 389 | return 0; | 400 | return 0; |
| 390 | } | 401 | } |
| 391 | 402 | ||
| @@ -401,6 +412,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | |||
| 401 | { | 412 | { |
| 402 | memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); | 413 | memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); |
| 403 | vcpu->arch.guest_fpregs.fpc = fpu->fpc; | 414 | vcpu->arch.guest_fpregs.fpc = fpu->fpc; |
| 415 | restore_fp_regs(&vcpu->arch.guest_fpregs); | ||
| 404 | return 0; | 416 | return 0; |
| 405 | } | 417 | } |
| 406 | 418 | ||
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index d6a50c1fb2e6..f815118835f3 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c | |||
| @@ -87,6 +87,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) | |||
| 87 | return -ENOMEM; | 87 | return -ENOMEM; |
| 88 | 88 | ||
| 89 | inti->type = KVM_S390_INT_EMERGENCY; | 89 | inti->type = KVM_S390_INT_EMERGENCY; |
| 90 | inti->emerg.code = vcpu->vcpu_id; | ||
| 90 | 91 | ||
| 91 | spin_lock(&fi->lock); | 92 | spin_lock(&fi->lock); |
| 92 | li = fi->local_int[cpu_addr]; | 93 | li = fi->local_int[cpu_addr]; |
| @@ -103,9 +104,47 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) | |||
| 103 | wake_up_interruptible(&li->wq); | 104 | wake_up_interruptible(&li->wq); |
| 104 | spin_unlock_bh(&li->lock); | 105 | spin_unlock_bh(&li->lock); |
| 105 | rc = 0; /* order accepted */ | 106 | rc = 0; /* order accepted */ |
| 107 | VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); | ||
| 108 | unlock: | ||
| 109 | spin_unlock(&fi->lock); | ||
| 110 | return rc; | ||
| 111 | } | ||
| 112 | |||
| 113 | static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) | ||
| 114 | { | ||
| 115 | struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; | ||
| 116 | struct kvm_s390_local_interrupt *li; | ||
| 117 | struct kvm_s390_interrupt_info *inti; | ||
| 118 | int rc; | ||
| 119 | |||
| 120 | if (cpu_addr >= KVM_MAX_VCPUS) | ||
| 121 | return 3; /* not operational */ | ||
| 122 | |||
| 123 | inti = kzalloc(sizeof(*inti), GFP_KERNEL); | ||
| 124 | if (!inti) | ||
| 125 | return -ENOMEM; | ||
| 126 | |||
| 127 | inti->type = KVM_S390_INT_EXTERNAL_CALL; | ||
| 128 | inti->extcall.code = vcpu->vcpu_id; | ||
| 129 | |||
| 130 | spin_lock(&fi->lock); | ||
| 131 | li = fi->local_int[cpu_addr]; | ||
| 132 | if (li == NULL) { | ||
| 133 | rc = 3; /* not operational */ | ||
| 134 | kfree(inti); | ||
| 135 | goto unlock; | ||
| 136 | } | ||
| 137 | spin_lock_bh(&li->lock); | ||
| 138 | list_add_tail(&inti->list, &li->list); | ||
| 139 | atomic_set(&li->active, 1); | ||
| 140 | atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); | ||
| 141 | if (waitqueue_active(&li->wq)) | ||
| 142 | wake_up_interruptible(&li->wq); | ||
| 143 | spin_unlock_bh(&li->lock); | ||
| 144 | rc = 0; /* order accepted */ | ||
| 145 | VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr); | ||
| 106 | unlock: | 146 | unlock: |
| 107 | spin_unlock(&fi->lock); | 147 | spin_unlock(&fi->lock); |
| 108 | VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); | ||
| 109 | return rc; | 148 | return rc; |
| 110 | } | 149 | } |
| 111 | 150 | ||
| @@ -267,6 +306,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) | |||
| 267 | rc = __sigp_sense(vcpu, cpu_addr, | 306 | rc = __sigp_sense(vcpu, cpu_addr, |
| 268 | &vcpu->arch.guest_gprs[r1]); | 307 | &vcpu->arch.guest_gprs[r1]); |
| 269 | break; | 308 | break; |
| 309 | case SIGP_EXTERNAL_CALL: | ||
| 310 | vcpu->stat.instruction_sigp_external_call++; | ||
| 311 | rc = __sigp_external_call(vcpu, cpu_addr); | ||
| 312 | break; | ||
| 270 | case SIGP_EMERGENCY: | 313 | case SIGP_EMERGENCY: |
| 271 | vcpu->stat.instruction_sigp_emergency++; | 314 | vcpu->stat.instruction_sigp_emergency++; |
| 272 | rc = __sigp_emergency(vcpu, cpu_addr); | 315 | rc = __sigp_emergency(vcpu, cpu_addr); |
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 34595d5e1038..3925d8007864 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h | |||
| @@ -100,7 +100,9 @@ | |||
| 100 | #define APIC_TIMER_BASE_CLKIN 0x0 | 100 | #define APIC_TIMER_BASE_CLKIN 0x0 |
| 101 | #define APIC_TIMER_BASE_TMBASE 0x1 | 101 | #define APIC_TIMER_BASE_TMBASE 0x1 |
| 102 | #define APIC_TIMER_BASE_DIV 0x2 | 102 | #define APIC_TIMER_BASE_DIV 0x2 |
| 103 | #define APIC_LVT_TIMER_ONESHOT (0 << 17) | ||
| 103 | #define APIC_LVT_TIMER_PERIODIC (1 << 17) | 104 | #define APIC_LVT_TIMER_PERIODIC (1 << 17) |
| 105 | #define APIC_LVT_TIMER_TSCDEADLINE (2 << 17) | ||
| 104 | #define APIC_LVT_MASKED (1 << 16) | 106 | #define APIC_LVT_MASKED (1 << 16) |
| 105 | #define APIC_LVT_LEVEL_TRIGGER (1 << 15) | 107 | #define APIC_LVT_LEVEL_TRIGGER (1 << 15) |
| 106 | #define APIC_LVT_REMOTE_IRR (1 << 14) | 108 | #define APIC_LVT_REMOTE_IRR (1 << 14) |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index aa6a488cd075..2f84a433b6a0 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
| @@ -121,6 +121,7 @@ | |||
| 121 | #define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ | 121 | #define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ |
| 122 | #define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */ | 122 | #define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */ |
| 123 | #define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */ | 123 | #define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */ |
| 124 | #define X86_FEATURE_TSC_DEADLINE_TIMER (4*32+24) /* Tsc deadline timer */ | ||
| 124 | #define X86_FEATURE_AES (4*32+25) /* AES instructions */ | 125 | #define X86_FEATURE_AES (4*32+25) /* AES instructions */ |
| 125 | #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ | 126 | #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ |
| 126 | #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ | 127 | #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 6040d115ef51..a026507893e9 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
| @@ -262,7 +262,7 @@ struct x86_emulate_ctxt { | |||
| 262 | struct operand dst; | 262 | struct operand dst; |
| 263 | bool has_seg_override; | 263 | bool has_seg_override; |
| 264 | u8 seg_override; | 264 | u8 seg_override; |
| 265 | unsigned int d; | 265 | u64 d; |
| 266 | int (*execute)(struct x86_emulate_ctxt *ctxt); | 266 | int (*execute)(struct x86_emulate_ctxt *ctxt); |
| 267 | int (*check_perm)(struct x86_emulate_ctxt *ctxt); | 267 | int (*check_perm)(struct x86_emulate_ctxt *ctxt); |
| 268 | /* modrm */ | 268 | /* modrm */ |
| @@ -275,6 +275,8 @@ struct x86_emulate_ctxt { | |||
| 275 | unsigned long _eip; | 275 | unsigned long _eip; |
| 276 | /* Fields above regs are cleared together. */ | 276 | /* Fields above regs are cleared together. */ |
| 277 | unsigned long regs[NR_VCPU_REGS]; | 277 | unsigned long regs[NR_VCPU_REGS]; |
| 278 | struct operand memop; | ||
| 279 | struct operand *memopp; | ||
| 278 | struct fetch_cache fetch; | 280 | struct fetch_cache fetch; |
| 279 | struct read_cache io_read; | 281 | struct read_cache io_read; |
| 280 | struct read_cache mem_read; | 282 | struct read_cache mem_read; |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index dd51c83aa5de..b4973f4dab98 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
| @@ -26,7 +26,8 @@ | |||
| 26 | #include <asm/mtrr.h> | 26 | #include <asm/mtrr.h> |
| 27 | #include <asm/msr-index.h> | 27 | #include <asm/msr-index.h> |
| 28 | 28 | ||
| 29 | #define KVM_MAX_VCPUS 64 | 29 | #define KVM_MAX_VCPUS 254 |
| 30 | #define KVM_SOFT_MAX_VCPUS 64 | ||
| 30 | #define KVM_MEMORY_SLOTS 32 | 31 | #define KVM_MEMORY_SLOTS 32 |
| 31 | /* memory slots that does not exposed to userspace */ | 32 | /* memory slots that does not exposed to userspace */ |
| 32 | #define KVM_PRIVATE_MEM_SLOTS 4 | 33 | #define KVM_PRIVATE_MEM_SLOTS 4 |
| @@ -264,6 +265,7 @@ struct kvm_mmu { | |||
| 264 | void (*new_cr3)(struct kvm_vcpu *vcpu); | 265 | void (*new_cr3)(struct kvm_vcpu *vcpu); |
| 265 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); | 266 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); |
| 266 | unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); | 267 | unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); |
| 268 | u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); | ||
| 267 | int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, | 269 | int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, |
| 268 | bool prefault); | 270 | bool prefault); |
| 269 | void (*inject_page_fault)(struct kvm_vcpu *vcpu, | 271 | void (*inject_page_fault)(struct kvm_vcpu *vcpu, |
| @@ -411,8 +413,9 @@ struct kvm_vcpu_arch { | |||
| 411 | u32 tsc_catchup_mult; | 413 | u32 tsc_catchup_mult; |
| 412 | s8 tsc_catchup_shift; | 414 | s8 tsc_catchup_shift; |
| 413 | 415 | ||
| 414 | bool nmi_pending; | 416 | atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ |
| 415 | bool nmi_injected; | 417 | unsigned nmi_pending; /* NMI queued after currently running handler */ |
| 418 | bool nmi_injected; /* Trying to inject an NMI this entry */ | ||
| 416 | 419 | ||
| 417 | struct mtrr_state_type mtrr_state; | 420 | struct mtrr_state_type mtrr_state; |
| 418 | u32 pat; | 421 | u32 pat; |
| @@ -628,14 +631,13 @@ struct kvm_x86_ops { | |||
| 628 | void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); | 631 | void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); |
| 629 | 632 | ||
| 630 | u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); | 633 | u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); |
| 634 | u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu); | ||
| 631 | 635 | ||
| 632 | void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); | 636 | void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); |
| 633 | 637 | ||
| 634 | int (*check_intercept)(struct kvm_vcpu *vcpu, | 638 | int (*check_intercept)(struct kvm_vcpu *vcpu, |
| 635 | struct x86_instruction_info *info, | 639 | struct x86_instruction_info *info, |
| 636 | enum x86_intercept_stage stage); | 640 | enum x86_intercept_stage stage); |
| 637 | |||
| 638 | const struct trace_print_flags *exit_reasons_str; | ||
| 639 | }; | 641 | }; |
| 640 | 642 | ||
| 641 | struct kvm_arch_async_pf { | 643 | struct kvm_arch_async_pf { |
| @@ -672,6 +674,8 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); | |||
| 672 | 674 | ||
| 673 | extern bool tdp_enabled; | 675 | extern bool tdp_enabled; |
| 674 | 676 | ||
| 677 | u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); | ||
| 678 | |||
| 675 | /* control of guest tsc rate supported? */ | 679 | /* control of guest tsc rate supported? */ |
| 676 | extern bool kvm_has_tsc_control; | 680 | extern bool kvm_has_tsc_control; |
| 677 | /* minimum supported tsc_khz for guests */ | 681 | /* minimum supported tsc_khz for guests */ |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index d52609aeeab8..a6962d9161a0 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
| @@ -229,6 +229,8 @@ | |||
| 229 | #define MSR_IA32_APICBASE_ENABLE (1<<11) | 229 | #define MSR_IA32_APICBASE_ENABLE (1<<11) |
| 230 | #define MSR_IA32_APICBASE_BASE (0xfffff<<12) | 230 | #define MSR_IA32_APICBASE_BASE (0xfffff<<12) |
| 231 | 231 | ||
| 232 | #define MSR_IA32_TSCDEADLINE 0x000006e0 | ||
| 233 | |||
| 232 | #define MSR_IA32_UCODE_WRITE 0x00000079 | 234 | #define MSR_IA32_UCODE_WRITE 0x00000079 |
| 233 | #define MSR_IA32_UCODE_REV 0x0000008b | 235 | #define MSR_IA32_UCODE_REV 0x0000008b |
| 234 | 236 | ||
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 2caf290e9895..31f180c21ce9 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
| @@ -350,6 +350,18 @@ enum vmcs_field { | |||
| 350 | #define DEBUG_REG_ACCESS_REG(eq) (((eq) >> 8) & 0xf) /* 11:8, general purpose reg. */ | 350 | #define DEBUG_REG_ACCESS_REG(eq) (((eq) >> 8) & 0xf) /* 11:8, general purpose reg. */ |
| 351 | 351 | ||
| 352 | 352 | ||
| 353 | /* | ||
| 354 | * Exit Qualifications for APIC-Access | ||
| 355 | */ | ||
| 356 | #define APIC_ACCESS_OFFSET 0xfff /* 11:0, offset within the APIC page */ | ||
| 357 | #define APIC_ACCESS_TYPE 0xf000 /* 15:12, access type */ | ||
| 358 | #define TYPE_LINEAR_APIC_INST_READ (0 << 12) | ||
| 359 | #define TYPE_LINEAR_APIC_INST_WRITE (1 << 12) | ||
| 360 | #define TYPE_LINEAR_APIC_INST_FETCH (2 << 12) | ||
| 361 | #define TYPE_LINEAR_APIC_EVENT (3 << 12) | ||
| 362 | #define TYPE_PHYSICAL_APIC_EVENT (10 << 12) | ||
| 363 | #define TYPE_PHYSICAL_APIC_INST (15 << 12) | ||
| 364 | |||
| 353 | /* segment AR */ | 365 | /* segment AR */ |
| 354 | #define SEGMENT_AR_L_MASK (1 << 13) | 366 | #define SEGMENT_AR_L_MASK (1 << 13) |
| 355 | 367 | ||
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8b4cc5f067de..f1e3be18a08f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
| @@ -29,6 +29,39 @@ | |||
| 29 | #include "tss.h" | 29 | #include "tss.h" |
| 30 | 30 | ||
| 31 | /* | 31 | /* |
| 32 | * Operand types | ||
| 33 | */ | ||
| 34 | #define OpNone 0ull | ||
| 35 | #define OpImplicit 1ull /* No generic decode */ | ||
| 36 | #define OpReg 2ull /* Register */ | ||
| 37 | #define OpMem 3ull /* Memory */ | ||
| 38 | #define OpAcc 4ull /* Accumulator: AL/AX/EAX/RAX */ | ||
| 39 | #define OpDI 5ull /* ES:DI/EDI/RDI */ | ||
| 40 | #define OpMem64 6ull /* Memory, 64-bit */ | ||
| 41 | #define OpImmUByte 7ull /* Zero-extended 8-bit immediate */ | ||
| 42 | #define OpDX 8ull /* DX register */ | ||
| 43 | #define OpCL 9ull /* CL register (for shifts) */ | ||
| 44 | #define OpImmByte 10ull /* 8-bit sign extended immediate */ | ||
| 45 | #define OpOne 11ull /* Implied 1 */ | ||
| 46 | #define OpImm 12ull /* Sign extended immediate */ | ||
| 47 | #define OpMem16 13ull /* Memory operand (16-bit). */ | ||
| 48 | #define OpMem32 14ull /* Memory operand (32-bit). */ | ||
| 49 | #define OpImmU 15ull /* Immediate operand, zero extended */ | ||
| 50 | #define OpSI 16ull /* SI/ESI/RSI */ | ||
| 51 | #define OpImmFAddr 17ull /* Immediate far address */ | ||
| 52 | #define OpMemFAddr 18ull /* Far address in memory */ | ||
| 53 | #define OpImmU16 19ull /* Immediate operand, 16 bits, zero extended */ | ||
| 54 | #define OpES 20ull /* ES */ | ||
| 55 | #define OpCS 21ull /* CS */ | ||
| 56 | #define OpSS 22ull /* SS */ | ||
| 57 | #define OpDS 23ull /* DS */ | ||
| 58 | #define OpFS 24ull /* FS */ | ||
| 59 | #define OpGS 25ull /* GS */ | ||
| 60 | |||
| 61 | #define OpBits 5 /* Width of operand field */ | ||
| 62 | #define OpMask ((1ull << OpBits) - 1) | ||
| 63 | |||
| 64 | /* | ||
| 32 | * Opcode effective-address decode tables. | 65 | * Opcode effective-address decode tables. |
| 33 | * Note that we only emulate instructions that have at least one memory | 66 | * Note that we only emulate instructions that have at least one memory |
| 34 | * operand (excluding implicit stack references). We assume that stack | 67 | * operand (excluding implicit stack references). We assume that stack |
| @@ -40,37 +73,35 @@ | |||
| 40 | /* Operand sizes: 8-bit operands or specified/overridden size. */ | 73 | /* Operand sizes: 8-bit operands or specified/overridden size. */ |
| 41 | #define ByteOp (1<<0) /* 8-bit operands. */ | 74 | #define ByteOp (1<<0) /* 8-bit operands. */ |
| 42 | /* Destination operand type. */ | 75 | /* Destination operand type. */ |
| 43 | #define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */ | 76 | #define DstShift 1 |
| 44 | #define DstReg (2<<1) /* Register operand. */ | 77 | #define ImplicitOps (OpImplicit << DstShift) |
| 45 | #define DstMem (3<<1) /* Memory operand. */ | 78 | #define DstReg (OpReg << DstShift) |
| 46 | #define DstAcc (4<<1) /* Destination Accumulator */ | 79 | #define DstMem (OpMem << DstShift) |
| 47 | #define DstDI (5<<1) /* Destination is in ES:(E)DI */ | 80 | #define DstAcc (OpAcc << DstShift) |
| 48 | #define DstMem64 (6<<1) /* 64bit memory operand */ | 81 | #define DstDI (OpDI << DstShift) |
| 49 | #define DstImmUByte (7<<1) /* 8-bit unsigned immediate operand */ | 82 | #define DstMem64 (OpMem64 << DstShift) |
| 50 | #define DstDX (8<<1) /* Destination is in DX register */ | 83 | #define DstImmUByte (OpImmUByte << DstShift) |
| 51 | #define DstMask (0xf<<1) | 84 | #define DstDX (OpDX << DstShift) |
| 85 | #define DstMask (OpMask << DstShift) | ||
| 52 | /* Source operand type. */ | 86 | /* Source operand type. */ |
| 53 | #define SrcNone (0<<5) /* No source operand. */ | 87 | #define SrcShift 6 |
| 54 | #define SrcReg (1<<5) /* Register operand. */ | 88 | #define SrcNone (OpNone << SrcShift) |
| 55 | #define SrcMem (2<<5) /* Memory operand. */ | 89 | #define SrcReg (OpReg << SrcShift) |
| 56 | #define SrcMem16 (3<<5) /* Memory operand (16-bit). */ | 90 | #define SrcMem (OpMem << SrcShift) |
| 57 | #define SrcMem32 (4<<5) /* Memory operand (32-bit). */ | 91 | #define SrcMem16 (OpMem16 << SrcShift) |
| 58 | #define SrcImm (5<<5) /* Immediate operand. */ | 92 | #define SrcMem32 (OpMem32 << SrcShift) |
| 59 | #define SrcImmByte (6<<5) /* 8-bit sign-extended immediate operand. */ | 93 | #define SrcImm (OpImm << SrcShift) |
| 60 | #define SrcOne (7<<5) /* Implied '1' */ | 94 | #define SrcImmByte (OpImmByte << SrcShift) |
| 61 | #define SrcImmUByte (8<<5) /* 8-bit unsigned immediate operand. */ | 95 | #define SrcOne (OpOne << SrcShift) |
| 62 | #define SrcImmU (9<<5) /* Immediate operand, unsigned */ | 96 | #define SrcImmUByte (OpImmUByte << SrcShift) |
| 63 | #define SrcSI (0xa<<5) /* Source is in the DS:RSI */ | 97 | #define SrcImmU (OpImmU << SrcShift) |
| 64 | #define SrcImmFAddr (0xb<<5) /* Source is immediate far address */ | 98 | #define SrcSI (OpSI << SrcShift) |
| 65 | #define SrcMemFAddr (0xc<<5) /* Source is far address in memory */ | 99 | #define SrcImmFAddr (OpImmFAddr << SrcShift) |
| 66 | #define SrcAcc (0xd<<5) /* Source Accumulator */ | 100 | #define SrcMemFAddr (OpMemFAddr << SrcShift) |
| 67 | #define SrcImmU16 (0xe<<5) /* Immediate operand, unsigned, 16 bits */ | 101 | #define SrcAcc (OpAcc << SrcShift) |
| 68 | #define SrcDX (0xf<<5) /* Source is in DX register */ | 102 | #define SrcImmU16 (OpImmU16 << SrcShift) |
| 69 | #define SrcMask (0xf<<5) | 103 | #define SrcDX (OpDX << SrcShift) |
| 70 | /* Generic ModRM decode. */ | 104 | #define SrcMask (OpMask << SrcShift) |
| 71 | #define ModRM (1<<9) | ||
| 72 | /* Destination is only written; never read. */ | ||
| 73 | #define Mov (1<<10) | ||
| 74 | #define BitOp (1<<11) | 105 | #define BitOp (1<<11) |
| 75 | #define MemAbs (1<<12) /* Memory operand is absolute displacement */ | 106 | #define MemAbs (1<<12) /* Memory operand is absolute displacement */ |
| 76 | #define String (1<<13) /* String instruction (rep capable) */ | 107 | #define String (1<<13) /* String instruction (rep capable) */ |
| @@ -81,6 +112,10 @@ | |||
| 81 | #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */ | 112 | #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */ |
| 82 | #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */ | 113 | #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */ |
| 83 | #define Sse (1<<18) /* SSE Vector instruction */ | 114 | #define Sse (1<<18) /* SSE Vector instruction */ |
| 115 | /* Generic ModRM decode. */ | ||
| 116 | #define ModRM (1<<19) | ||
| 117 | /* Destination is only written; never read. */ | ||
| 118 | #define Mov (1<<20) | ||
| 84 | /* Misc flags */ | 119 | /* Misc flags */ |
| 85 | #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ | 120 | #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ |
| 86 | #define VendorSpecific (1<<22) /* Vendor specific instruction */ | 121 | #define VendorSpecific (1<<22) /* Vendor specific instruction */ |
| @@ -91,12 +126,19 @@ | |||
| 91 | #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ | 126 | #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ |
| 92 | #define No64 (1<<28) | 127 | #define No64 (1<<28) |
| 93 | /* Source 2 operand type */ | 128 | /* Source 2 operand type */ |
| 94 | #define Src2None (0<<29) | 129 | #define Src2Shift (29) |
| 95 | #define Src2CL (1<<29) | 130 | #define Src2None (OpNone << Src2Shift) |
| 96 | #define Src2ImmByte (2<<29) | 131 | #define Src2CL (OpCL << Src2Shift) |
| 97 | #define Src2One (3<<29) | 132 | #define Src2ImmByte (OpImmByte << Src2Shift) |
| 98 | #define Src2Imm (4<<29) | 133 | #define Src2One (OpOne << Src2Shift) |
| 99 | #define Src2Mask (7<<29) | 134 | #define Src2Imm (OpImm << Src2Shift) |
| 135 | #define Src2ES (OpES << Src2Shift) | ||
| 136 | #define Src2CS (OpCS << Src2Shift) | ||
| 137 | #define Src2SS (OpSS << Src2Shift) | ||
| 138 | #define Src2DS (OpDS << Src2Shift) | ||
| 139 | #define Src2FS (OpFS << Src2Shift) | ||
| 140 | #define Src2GS (OpGS << Src2Shift) | ||
| 141 | #define Src2Mask (OpMask << Src2Shift) | ||
| 100 | 142 | ||
| 101 | #define X2(x...) x, x | 143 | #define X2(x...) x, x |
| 102 | #define X3(x...) X2(x), x | 144 | #define X3(x...) X2(x), x |
| @@ -108,8 +150,8 @@ | |||
| 108 | #define X16(x...) X8(x), X8(x) | 150 | #define X16(x...) X8(x), X8(x) |
| 109 | 151 | ||
| 110 | struct opcode { | 152 | struct opcode { |
| 111 | u32 flags; | 153 | u64 flags : 56; |
| 112 | u8 intercept; | 154 | u64 intercept : 8; |
| 113 | union { | 155 | union { |
| 114 | int (*execute)(struct x86_emulate_ctxt *ctxt); | 156 | int (*execute)(struct x86_emulate_ctxt *ctxt); |
| 115 | struct opcode *group; | 157 | struct opcode *group; |
| @@ -205,105 +247,100 @@ struct gprefix { | |||
| 205 | #define ON64(x) | 247 | #define ON64(x) |
| 206 | #endif | 248 | #endif |
| 207 | 249 | ||
| 208 | #define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix, _dsttype) \ | 250 | #define ____emulate_2op(ctxt, _op, _x, _y, _suffix, _dsttype) \ |
| 209 | do { \ | 251 | do { \ |
| 210 | __asm__ __volatile__ ( \ | 252 | __asm__ __volatile__ ( \ |
| 211 | _PRE_EFLAGS("0", "4", "2") \ | 253 | _PRE_EFLAGS("0", "4", "2") \ |
| 212 | _op _suffix " %"_x"3,%1; " \ | 254 | _op _suffix " %"_x"3,%1; " \ |
| 213 | _POST_EFLAGS("0", "4", "2") \ | 255 | _POST_EFLAGS("0", "4", "2") \ |
| 214 | : "=m" (_eflags), "+q" (*(_dsttype*)&(_dst).val),\ | 256 | : "=m" ((ctxt)->eflags), \ |
| 257 | "+q" (*(_dsttype*)&(ctxt)->dst.val), \ | ||
| 215 | "=&r" (_tmp) \ | 258 | "=&r" (_tmp) \ |
| 216 | : _y ((_src).val), "i" (EFLAGS_MASK)); \ | 259 | : _y ((ctxt)->src.val), "i" (EFLAGS_MASK)); \ |
| 217 | } while (0) | 260 | } while (0) |
| 218 | 261 | ||
| 219 | 262 | ||
| 220 | /* Raw emulation: instruction has two explicit operands. */ | 263 | /* Raw emulation: instruction has two explicit operands. */ |
| 221 | #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \ | 264 | #define __emulate_2op_nobyte(ctxt,_op,_wx,_wy,_lx,_ly,_qx,_qy) \ |
| 222 | do { \ | 265 | do { \ |
| 223 | unsigned long _tmp; \ | 266 | unsigned long _tmp; \ |
| 224 | \ | 267 | \ |
| 225 | switch ((_dst).bytes) { \ | 268 | switch ((ctxt)->dst.bytes) { \ |
| 226 | case 2: \ | 269 | case 2: \ |
| 227 | ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w",u16);\ | 270 | ____emulate_2op(ctxt,_op,_wx,_wy,"w",u16); \ |
| 228 | break; \ | 271 | break; \ |
| 229 | case 4: \ | 272 | case 4: \ |
| 230 | ____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l",u32);\ | 273 | ____emulate_2op(ctxt,_op,_lx,_ly,"l",u32); \ |
| 231 | break; \ | 274 | break; \ |
| 232 | case 8: \ | 275 | case 8: \ |
| 233 | ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q",u64)); \ | 276 | ON64(____emulate_2op(ctxt,_op,_qx,_qy,"q",u64)); \ |
| 234 | break; \ | 277 | break; \ |
| 235 | } \ | 278 | } \ |
| 236 | } while (0) | 279 | } while (0) |
| 237 | 280 | ||
| 238 | #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ | 281 | #define __emulate_2op(ctxt,_op,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ |
| 239 | do { \ | 282 | do { \ |
| 240 | unsigned long _tmp; \ | 283 | unsigned long _tmp; \ |
| 241 | switch ((_dst).bytes) { \ | 284 | switch ((ctxt)->dst.bytes) { \ |
| 242 | case 1: \ | 285 | case 1: \ |
| 243 | ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b",u8); \ | 286 | ____emulate_2op(ctxt,_op,_bx,_by,"b",u8); \ |
| 244 | break; \ | 287 | break; \ |
| 245 | default: \ | 288 | default: \ |
| 246 | __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ | 289 | __emulate_2op_nobyte(ctxt, _op, \ |
| 247 | _wx, _wy, _lx, _ly, _qx, _qy); \ | 290 | _wx, _wy, _lx, _ly, _qx, _qy); \ |
| 248 | break; \ | 291 | break; \ |
| 249 | } \ | 292 | } \ |
| 250 | } while (0) | 293 | } while (0) |
| 251 | 294 | ||
| 252 | /* Source operand is byte-sized and may be restricted to just %cl. */ | 295 | /* Source operand is byte-sized and may be restricted to just %cl. */ |
| 253 | #define emulate_2op_SrcB(_op, _src, _dst, _eflags) \ | 296 | #define emulate_2op_SrcB(ctxt, _op) \ |
| 254 | __emulate_2op(_op, _src, _dst, _eflags, \ | 297 | __emulate_2op(ctxt, _op, "b", "c", "b", "c", "b", "c", "b", "c") |
| 255 | "b", "c", "b", "c", "b", "c", "b", "c") | ||
| 256 | 298 | ||
| 257 | /* Source operand is byte, word, long or quad sized. */ | 299 | /* Source operand is byte, word, long or quad sized. */ |
| 258 | #define emulate_2op_SrcV(_op, _src, _dst, _eflags) \ | 300 | #define emulate_2op_SrcV(ctxt, _op) \ |
| 259 | __emulate_2op(_op, _src, _dst, _eflags, \ | 301 | __emulate_2op(ctxt, _op, "b", "q", "w", "r", _LO32, "r", "", "r") |
| 260 | "b", "q", "w", "r", _LO32, "r", "", "r") | ||
| 261 | 302 | ||
| 262 | /* Source operand is word, long or quad sized. */ | 303 | /* Source operand is word, long or quad sized. */ |
| 263 | #define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags) \ | 304 | #define emulate_2op_SrcV_nobyte(ctxt, _op) \ |
| 264 | __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ | 305 | __emulate_2op_nobyte(ctxt, _op, "w", "r", _LO32, "r", "", "r") |
| 265 | "w", "r", _LO32, "r", "", "r") | ||
| 266 | 306 | ||
| 267 | /* Instruction has three operands and one operand is stored in ECX register */ | 307 | /* Instruction has three operands and one operand is stored in ECX register */ |
| 268 | #define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ | 308 | #define __emulate_2op_cl(ctxt, _op, _suffix, _type) \ |
| 269 | do { \ | 309 | do { \ |
| 270 | unsigned long _tmp; \ | 310 | unsigned long _tmp; \ |
| 271 | _type _clv = (_cl).val; \ | 311 | _type _clv = (ctxt)->src2.val; \ |
| 272 | _type _srcv = (_src).val; \ | 312 | _type _srcv = (ctxt)->src.val; \ |
| 273 | _type _dstv = (_dst).val; \ | 313 | _type _dstv = (ctxt)->dst.val; \ |
| 274 | \ | 314 | \ |
| 275 | __asm__ __volatile__ ( \ | 315 | __asm__ __volatile__ ( \ |
| 276 | _PRE_EFLAGS("0", "5", "2") \ | 316 | _PRE_EFLAGS("0", "5", "2") \ |
| 277 | _op _suffix " %4,%1 \n" \ | 317 | _op _suffix " %4,%1 \n" \ |
| 278 | _POST_EFLAGS("0", "5", "2") \ | 318 | _POST_EFLAGS("0", "5", "2") \ |
| 279 | : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ | 319 | : "=m" ((ctxt)->eflags), "+r" (_dstv), "=&r" (_tmp) \ |
| 280 | : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ | 320 | : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ |
| 281 | ); \ | 321 | ); \ |
| 282 | \ | 322 | \ |
| 283 | (_cl).val = (unsigned long) _clv; \ | 323 | (ctxt)->src2.val = (unsigned long) _clv; \ |
| 284 | (_src).val = (unsigned long) _srcv; \ | 324 | (ctxt)->src2.val = (unsigned long) _srcv; \ |
| 285 | (_dst).val = (unsigned long) _dstv; \ | 325 | (ctxt)->dst.val = (unsigned long) _dstv; \ |
| 286 | } while (0) | 326 | } while (0) |
| 287 | 327 | ||
| 288 | #define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ | 328 | #define emulate_2op_cl(ctxt, _op) \ |
| 289 | do { \ | 329 | do { \ |
| 290 | switch ((_dst).bytes) { \ | 330 | switch ((ctxt)->dst.bytes) { \ |
| 291 | case 2: \ | 331 | case 2: \ |
| 292 | __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ | 332 | __emulate_2op_cl(ctxt, _op, "w", u16); \ |
| 293 | "w", unsigned short); \ | ||
| 294 | break; \ | 333 | break; \ |
| 295 | case 4: \ | 334 | case 4: \ |
| 296 | __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ | 335 | __emulate_2op_cl(ctxt, _op, "l", u32); \ |
| 297 | "l", unsigned int); \ | ||
| 298 | break; \ | 336 | break; \ |
| 299 | case 8: \ | 337 | case 8: \ |
| 300 | ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ | 338 | ON64(__emulate_2op_cl(ctxt, _op, "q", ulong)); \ |
| 301 | "q", unsigned long)); \ | ||
| 302 | break; \ | 339 | break; \ |
| 303 | } \ | 340 | } \ |
| 304 | } while (0) | 341 | } while (0) |
| 305 | 342 | ||
| 306 | #define __emulate_1op(_op, _dst, _eflags, _suffix) \ | 343 | #define __emulate_1op(ctxt, _op, _suffix) \ |
| 307 | do { \ | 344 | do { \ |
| 308 | unsigned long _tmp; \ | 345 | unsigned long _tmp; \ |
| 309 | \ | 346 | \ |
| @@ -311,39 +348,27 @@ struct gprefix { | |||
| 311 | _PRE_EFLAGS("0", "3", "2") \ | 348 | _PRE_EFLAGS("0", "3", "2") \ |
| 312 | _op _suffix " %1; " \ | 349 | _op _suffix " %1; " \ |
| 313 | _POST_EFLAGS("0", "3", "2") \ | 350 | _POST_EFLAGS("0", "3", "2") \ |
| 314 | : "=m" (_eflags), "+m" ((_dst).val), \ | 351 | : "=m" ((ctxt)->eflags), "+m" ((ctxt)->dst.val), \ |
| 315 | "=&r" (_tmp) \ | 352 | "=&r" (_tmp) \ |
| 316 | : "i" (EFLAGS_MASK)); \ | 353 | : "i" (EFLAGS_MASK)); \ |
| 317 | } while (0) | 354 | } while (0) |
| 318 | 355 | ||
| 319 | /* Instruction has only one explicit operand (no source operand). */ | 356 | /* Instruction has only one explicit operand (no source operand). */ |
| 320 | #define emulate_1op(_op, _dst, _eflags) \ | 357 | #define emulate_1op(ctxt, _op) \ |
| 321 | do { \ | 358 | do { \ |
| 322 | switch ((_dst).bytes) { \ | 359 | switch ((ctxt)->dst.bytes) { \ |
| 323 | case 1: __emulate_1op(_op, _dst, _eflags, "b"); break; \ | 360 | case 1: __emulate_1op(ctxt, _op, "b"); break; \ |
| 324 | case 2: __emulate_1op(_op, _dst, _eflags, "w"); break; \ | 361 | case 2: __emulate_1op(ctxt, _op, "w"); break; \ |
| 325 | case 4: __emulate_1op(_op, _dst, _eflags, "l"); break; \ | 362 | case 4: __emulate_1op(ctxt, _op, "l"); break; \ |
| 326 | case 8: ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \ | 363 | case 8: ON64(__emulate_1op(ctxt, _op, "q")); break; \ |
| 327 | } \ | 364 | } \ |
| 328 | } while (0) | 365 | } while (0) |
| 329 | 366 | ||
| 330 | #define __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, _suffix) \ | 367 | #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \ |
| 331 | do { \ | ||
| 332 | unsigned long _tmp; \ | ||
| 333 | \ | ||
| 334 | __asm__ __volatile__ ( \ | ||
| 335 | _PRE_EFLAGS("0", "4", "1") \ | ||
| 336 | _op _suffix " %5; " \ | ||
| 337 | _POST_EFLAGS("0", "4", "1") \ | ||
| 338 | : "=m" (_eflags), "=&r" (_tmp), \ | ||
| 339 | "+a" (_rax), "+d" (_rdx) \ | ||
| 340 | : "i" (EFLAGS_MASK), "m" ((_src).val), \ | ||
| 341 | "a" (_rax), "d" (_rdx)); \ | ||
| 342 | } while (0) | ||
| 343 | |||
| 344 | #define __emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, _eflags, _suffix, _ex) \ | ||
| 345 | do { \ | 368 | do { \ |
| 346 | unsigned long _tmp; \ | 369 | unsigned long _tmp; \ |
| 370 | ulong *rax = &(ctxt)->regs[VCPU_REGS_RAX]; \ | ||
| 371 | ulong *rdx = &(ctxt)->regs[VCPU_REGS_RDX]; \ | ||
| 347 | \ | 372 | \ |
| 348 | __asm__ __volatile__ ( \ | 373 | __asm__ __volatile__ ( \ |
| 349 | _PRE_EFLAGS("0", "5", "1") \ | 374 | _PRE_EFLAGS("0", "5", "1") \ |
| @@ -356,53 +381,27 @@ struct gprefix { | |||
| 356 | "jmp 2b \n\t" \ | 381 | "jmp 2b \n\t" \ |
| 357 | ".popsection \n\t" \ | 382 | ".popsection \n\t" \ |
| 358 | _ASM_EXTABLE(1b, 3b) \ | 383 | _ASM_EXTABLE(1b, 3b) \ |
| 359 | : "=m" (_eflags), "=&r" (_tmp), \ | 384 | : "=m" ((ctxt)->eflags), "=&r" (_tmp), \ |
| 360 | "+a" (_rax), "+d" (_rdx), "+qm"(_ex) \ | 385 | "+a" (*rax), "+d" (*rdx), "+qm"(_ex) \ |
| 361 | : "i" (EFLAGS_MASK), "m" ((_src).val), \ | 386 | : "i" (EFLAGS_MASK), "m" ((ctxt)->src.val), \ |
| 362 | "a" (_rax), "d" (_rdx)); \ | 387 | "a" (*rax), "d" (*rdx)); \ |
| 363 | } while (0) | 388 | } while (0) |
| 364 | 389 | ||
| 365 | /* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */ | 390 | /* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */ |
| 366 | #define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags) \ | 391 | #define emulate_1op_rax_rdx(ctxt, _op, _ex) \ |
| 367 | do { \ | 392 | do { \ |
| 368 | switch((_src).bytes) { \ | 393 | switch((ctxt)->src.bytes) { \ |
| 369 | case 1: \ | 394 | case 1: \ |
| 370 | __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ | 395 | __emulate_1op_rax_rdx(ctxt, _op, "b", _ex); \ |
| 371 | _eflags, "b"); \ | ||
| 372 | break; \ | 396 | break; \ |
| 373 | case 2: \ | 397 | case 2: \ |
| 374 | __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ | 398 | __emulate_1op_rax_rdx(ctxt, _op, "w", _ex); \ |
| 375 | _eflags, "w"); \ | ||
| 376 | break; \ | 399 | break; \ |
| 377 | case 4: \ | 400 | case 4: \ |
| 378 | __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ | 401 | __emulate_1op_rax_rdx(ctxt, _op, "l", _ex); \ |
| 379 | _eflags, "l"); \ | ||
| 380 | break; \ | ||
| 381 | case 8: \ | ||
| 382 | ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ | ||
| 383 | _eflags, "q")); \ | ||
| 384 | break; \ | ||
| 385 | } \ | ||
| 386 | } while (0) | ||
| 387 | |||
| 388 | #define emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, _eflags, _ex) \ | ||
| 389 | do { \ | ||
| 390 | switch((_src).bytes) { \ | ||
| 391 | case 1: \ | ||
| 392 | __emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \ | ||
| 393 | _eflags, "b", _ex); \ | ||
| 394 | break; \ | ||
| 395 | case 2: \ | ||
| 396 | __emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \ | ||
| 397 | _eflags, "w", _ex); \ | ||
| 398 | break; \ | ||
| 399 | case 4: \ | ||
| 400 | __emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \ | ||
| 401 | _eflags, "l", _ex); \ | ||
| 402 | break; \ | 402 | break; \ |
| 403 | case 8: ON64( \ | 403 | case 8: ON64( \ |
| 404 | __emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \ | 404 | __emulate_1op_rax_rdx(ctxt, _op, "q", _ex)); \ |
| 405 | _eflags, "q", _ex)); \ | ||
| 406 | break; \ | 405 | break; \ |
| 407 | } \ | 406 | } \ |
| 408 | } while (0) | 407 | } while (0) |
| @@ -651,41 +650,50 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, | |||
| 651 | return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); | 650 | return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); |
| 652 | } | 651 | } |
| 653 | 652 | ||
| 654 | static int do_insn_fetch_byte(struct x86_emulate_ctxt *ctxt, | 653 | /* |
| 655 | unsigned long eip, u8 *dest) | 654 | * Fetch the next byte of the instruction being emulated which is pointed to |
| 655 | * by ctxt->_eip, then increment ctxt->_eip. | ||
| 656 | * | ||
| 657 | * Also prefetch the remaining bytes of the instruction without crossing page | ||
| 658 | * boundary if they are not in fetch_cache yet. | ||
| 659 | */ | ||
| 660 | static int do_insn_fetch_byte(struct x86_emulate_ctxt *ctxt, u8 *dest) | ||
| 656 | { | 661 | { |
| 657 | struct fetch_cache *fc = &ctxt->fetch; | 662 | struct fetch_cache *fc = &ctxt->fetch; |
| 658 | int rc; | 663 | int rc; |
| 659 | int size, cur_size; | 664 | int size, cur_size; |
| 660 | 665 | ||
| 661 | if (eip == fc->end) { | 666 | if (ctxt->_eip == fc->end) { |
| 662 | unsigned long linear; | 667 | unsigned long linear; |
| 663 | struct segmented_address addr = { .seg=VCPU_SREG_CS, .ea=eip}; | 668 | struct segmented_address addr = { .seg = VCPU_SREG_CS, |
| 669 | .ea = ctxt->_eip }; | ||
| 664 | cur_size = fc->end - fc->start; | 670 | cur_size = fc->end - fc->start; |
| 665 | size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); | 671 | size = min(15UL - cur_size, |
| 672 | PAGE_SIZE - offset_in_page(ctxt->_eip)); | ||
| 666 | rc = __linearize(ctxt, addr, size, false, true, &linear); | 673 | rc = __linearize(ctxt, addr, size, false, true, &linear); |
| 667 | if (rc != X86EMUL_CONTINUE) | 674 | if (unlikely(rc != X86EMUL_CONTINUE)) |
| 668 | return rc; | 675 | return rc; |
| 669 | rc = ctxt->ops->fetch(ctxt, linear, fc->data + cur_size, | 676 | rc = ctxt->ops->fetch(ctxt, linear, fc->data + cur_size, |
| 670 | size, &ctxt->exception); | 677 | size, &ctxt->exception); |
| 671 | if (rc != X86EMUL_CONTINUE) | 678 | if (unlikely(rc != X86EMUL_CONTINUE)) |
| 672 | return rc; | 679 | return rc; |
| 673 | fc->end += size; | 680 | fc->end += size; |
| 674 | } | 681 | } |
| 675 | *dest = fc->data[eip - fc->start]; | 682 | *dest = fc->data[ctxt->_eip - fc->start]; |
| 683 | ctxt->_eip++; | ||
| 676 | return X86EMUL_CONTINUE; | 684 | return X86EMUL_CONTINUE; |
| 677 | } | 685 | } |
| 678 | 686 | ||
| 679 | static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, | 687 | static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, |
| 680 | unsigned long eip, void *dest, unsigned size) | 688 | void *dest, unsigned size) |
| 681 | { | 689 | { |
| 682 | int rc; | 690 | int rc; |
| 683 | 691 | ||
| 684 | /* x86 instructions are limited to 15 bytes. */ | 692 | /* x86 instructions are limited to 15 bytes. */ |
| 685 | if (eip + size - ctxt->eip > 15) | 693 | if (unlikely(ctxt->_eip + size - ctxt->eip > 15)) |
| 686 | return X86EMUL_UNHANDLEABLE; | 694 | return X86EMUL_UNHANDLEABLE; |
| 687 | while (size--) { | 695 | while (size--) { |
| 688 | rc = do_insn_fetch_byte(ctxt, eip++, dest++); | 696 | rc = do_insn_fetch_byte(ctxt, dest++); |
| 689 | if (rc != X86EMUL_CONTINUE) | 697 | if (rc != X86EMUL_CONTINUE) |
| 690 | return rc; | 698 | return rc; |
| 691 | } | 699 | } |
| @@ -693,20 +701,18 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, | |||
| 693 | } | 701 | } |
| 694 | 702 | ||
| 695 | /* Fetch next part of the instruction being emulated. */ | 703 | /* Fetch next part of the instruction being emulated. */ |
| 696 | #define insn_fetch(_type, _size, _eip) \ | 704 | #define insn_fetch(_type, _ctxt) \ |
| 697 | ({ unsigned long _x; \ | 705 | ({ unsigned long _x; \ |
| 698 | rc = do_insn_fetch(ctxt, (_eip), &_x, (_size)); \ | 706 | rc = do_insn_fetch(_ctxt, &_x, sizeof(_type)); \ |
| 699 | if (rc != X86EMUL_CONTINUE) \ | 707 | if (rc != X86EMUL_CONTINUE) \ |
| 700 | goto done; \ | 708 | goto done; \ |
| 701 | (_eip) += (_size); \ | ||
| 702 | (_type)_x; \ | 709 | (_type)_x; \ |
| 703 | }) | 710 | }) |
| 704 | 711 | ||
| 705 | #define insn_fetch_arr(_arr, _size, _eip) \ | 712 | #define insn_fetch_arr(_arr, _size, _ctxt) \ |
| 706 | ({ rc = do_insn_fetch(ctxt, (_eip), _arr, (_size)); \ | 713 | ({ rc = do_insn_fetch(_ctxt, _arr, (_size)); \ |
| 707 | if (rc != X86EMUL_CONTINUE) \ | 714 | if (rc != X86EMUL_CONTINUE) \ |
| 708 | goto done; \ | 715 | goto done; \ |
| 709 | (_eip) += (_size); \ | ||
| 710 | }) | 716 | }) |
| 711 | 717 | ||
| 712 | /* | 718 | /* |
| @@ -894,7 +900,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
| 894 | ctxt->modrm_rm = base_reg = (ctxt->rex_prefix & 1) << 3; /* REG.B */ | 900 | ctxt->modrm_rm = base_reg = (ctxt->rex_prefix & 1) << 3; /* REG.B */ |
| 895 | } | 901 | } |
| 896 | 902 | ||
| 897 | ctxt->modrm = insn_fetch(u8, 1, ctxt->_eip); | 903 | ctxt->modrm = insn_fetch(u8, ctxt); |
| 898 | ctxt->modrm_mod |= (ctxt->modrm & 0xc0) >> 6; | 904 | ctxt->modrm_mod |= (ctxt->modrm & 0xc0) >> 6; |
| 899 | ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3; | 905 | ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3; |
| 900 | ctxt->modrm_rm |= (ctxt->modrm & 0x07); | 906 | ctxt->modrm_rm |= (ctxt->modrm & 0x07); |
| @@ -928,13 +934,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
| 928 | switch (ctxt->modrm_mod) { | 934 | switch (ctxt->modrm_mod) { |
| 929 | case 0: | 935 | case 0: |
| 930 | if (ctxt->modrm_rm == 6) | 936 | if (ctxt->modrm_rm == 6) |
| 931 | modrm_ea += insn_fetch(u16, 2, ctxt->_eip); | 937 | modrm_ea += insn_fetch(u16, ctxt); |
| 932 | break; | 938 | break; |
| 933 | case 1: | 939 | case 1: |
| 934 | modrm_ea += insn_fetch(s8, 1, ctxt->_eip); | 940 | modrm_ea += insn_fetch(s8, ctxt); |
| 935 | break; | 941 | break; |
| 936 | case 2: | 942 | case 2: |
| 937 | modrm_ea += insn_fetch(u16, 2, ctxt->_eip); | 943 | modrm_ea += insn_fetch(u16, ctxt); |
| 938 | break; | 944 | break; |
| 939 | } | 945 | } |
| 940 | switch (ctxt->modrm_rm) { | 946 | switch (ctxt->modrm_rm) { |
| @@ -971,13 +977,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
| 971 | } else { | 977 | } else { |
| 972 | /* 32/64-bit ModR/M decode. */ | 978 | /* 32/64-bit ModR/M decode. */ |
| 973 | if ((ctxt->modrm_rm & 7) == 4) { | 979 | if ((ctxt->modrm_rm & 7) == 4) { |
| 974 | sib = insn_fetch(u8, 1, ctxt->_eip); | 980 | sib = insn_fetch(u8, ctxt); |
| 975 | index_reg |= (sib >> 3) & 7; | 981 | index_reg |= (sib >> 3) & 7; |
| 976 | base_reg |= sib & 7; | 982 | base_reg |= sib & 7; |
| 977 | scale = sib >> 6; | 983 | scale = sib >> 6; |
| 978 | 984 | ||
| 979 | if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0) | 985 | if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0) |
| 980 | modrm_ea += insn_fetch(s32, 4, ctxt->_eip); | 986 | modrm_ea += insn_fetch(s32, ctxt); |
| 981 | else | 987 | else |
| 982 | modrm_ea += ctxt->regs[base_reg]; | 988 | modrm_ea += ctxt->regs[base_reg]; |
| 983 | if (index_reg != 4) | 989 | if (index_reg != 4) |
| @@ -990,13 +996,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
| 990 | switch (ctxt->modrm_mod) { | 996 | switch (ctxt->modrm_mod) { |
| 991 | case 0: | 997 | case 0: |
| 992 | if (ctxt->modrm_rm == 5) | 998 | if (ctxt->modrm_rm == 5) |
| 993 | modrm_ea += insn_fetch(s32, 4, ctxt->_eip); | 999 | modrm_ea += insn_fetch(s32, ctxt); |
| 994 | break; | 1000 | break; |
| 995 | case 1: | 1001 | case 1: |
| 996 | modrm_ea += insn_fetch(s8, 1, ctxt->_eip); | 1002 | modrm_ea += insn_fetch(s8, ctxt); |
| 997 | break; | 1003 | break; |
| 998 | case 2: | 1004 | case 2: |
| 999 | modrm_ea += insn_fetch(s32, 4, ctxt->_eip); | 1005 | modrm_ea += insn_fetch(s32, ctxt); |
| 1000 | break; | 1006 | break; |
| 1001 | } | 1007 | } |
| 1002 | } | 1008 | } |
| @@ -1013,13 +1019,13 @@ static int decode_abs(struct x86_emulate_ctxt *ctxt, | |||
| 1013 | op->type = OP_MEM; | 1019 | op->type = OP_MEM; |
| 1014 | switch (ctxt->ad_bytes) { | 1020 | switch (ctxt->ad_bytes) { |
| 1015 | case 2: | 1021 | case 2: |
| 1016 | op->addr.mem.ea = insn_fetch(u16, 2, ctxt->_eip); | 1022 | op->addr.mem.ea = insn_fetch(u16, ctxt); |
| 1017 | break; | 1023 | break; |
| 1018 | case 4: | 1024 | case 4: |
| 1019 | op->addr.mem.ea = insn_fetch(u32, 4, ctxt->_eip); | 1025 | op->addr.mem.ea = insn_fetch(u32, ctxt); |
| 1020 | break; | 1026 | break; |
| 1021 | case 8: | 1027 | case 8: |
| 1022 | op->addr.mem.ea = insn_fetch(u64, 8, ctxt->_eip); | 1028 | op->addr.mem.ea = insn_fetch(u64, ctxt); |
| 1023 | break; | 1029 | break; |
| 1024 | } | 1030 | } |
| 1025 | done: | 1031 | done: |
| @@ -1452,15 +1458,18 @@ static int em_popf(struct x86_emulate_ctxt *ctxt) | |||
| 1452 | return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes); | 1458 | return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes); |
| 1453 | } | 1459 | } |
| 1454 | 1460 | ||
| 1455 | static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg) | 1461 | static int em_push_sreg(struct x86_emulate_ctxt *ctxt) |
| 1456 | { | 1462 | { |
| 1463 | int seg = ctxt->src2.val; | ||
| 1464 | |||
| 1457 | ctxt->src.val = get_segment_selector(ctxt, seg); | 1465 | ctxt->src.val = get_segment_selector(ctxt, seg); |
| 1458 | 1466 | ||
| 1459 | return em_push(ctxt); | 1467 | return em_push(ctxt); |
| 1460 | } | 1468 | } |
| 1461 | 1469 | ||
| 1462 | static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, int seg) | 1470 | static int em_pop_sreg(struct x86_emulate_ctxt *ctxt) |
| 1463 | { | 1471 | { |
| 1472 | int seg = ctxt->src2.val; | ||
| 1464 | unsigned long selector; | 1473 | unsigned long selector; |
| 1465 | int rc; | 1474 | int rc; |
| 1466 | 1475 | ||
| @@ -1674,64 +1683,74 @@ static int em_grp2(struct x86_emulate_ctxt *ctxt) | |||
| 1674 | { | 1683 | { |
| 1675 | switch (ctxt->modrm_reg) { | 1684 | switch (ctxt->modrm_reg) { |
| 1676 | case 0: /* rol */ | 1685 | case 0: /* rol */ |
| 1677 | emulate_2op_SrcB("rol", ctxt->src, ctxt->dst, ctxt->eflags); | 1686 | emulate_2op_SrcB(ctxt, "rol"); |
| 1678 | break; | 1687 | break; |
| 1679 | case 1: /* ror */ | 1688 | case 1: /* ror */ |
| 1680 | emulate_2op_SrcB("ror", ctxt->src, ctxt->dst, ctxt->eflags); | 1689 | emulate_2op_SrcB(ctxt, "ror"); |
| 1681 | break; | 1690 | break; |
| 1682 | case 2: /* rcl */ | 1691 | case 2: /* rcl */ |
| 1683 | emulate_2op_SrcB("rcl", ctxt->src, ctxt->dst, ctxt->eflags); | 1692 | emulate_2op_SrcB(ctxt, "rcl"); |
| 1684 | break; | 1693 | break; |
| 1685 | case 3: /* rcr */ | 1694 | case 3: /* rcr */ |
| 1686 | emulate_2op_SrcB("rcr", ctxt->src, ctxt->dst, ctxt->eflags); | 1695 | emulate_2op_SrcB(ctxt, "rcr"); |
| 1687 | break; | 1696 | break; |
| 1688 | case 4: /* sal/shl */ | 1697 | case 4: /* sal/shl */ |
| 1689 | case 6: /* sal/shl */ | 1698 | case 6: /* sal/shl */ |
| 1690 | emulate_2op_SrcB("sal", ctxt->src, ctxt->dst, ctxt->eflags); | 1699 | emulate_2op_SrcB(ctxt, "sal"); |
| 1691 | break; | 1700 | break; |
| 1692 | case 5: /* shr */ | 1701 | case 5: /* shr */ |
| 1693 | emulate_2op_SrcB("shr", ctxt->src, ctxt->dst, ctxt->eflags); | 1702 | emulate_2op_SrcB(ctxt, "shr"); |
| 1694 | break; | 1703 | break; |
| 1695 | case 7: /* sar */ | 1704 | case 7: /* sar */ |
| 1696 | emulate_2op_SrcB("sar", ctxt->src, ctxt->dst, ctxt->eflags); | 1705 | emulate_2op_SrcB(ctxt, "sar"); |
| 1697 | break; | 1706 | break; |
| 1698 | } | 1707 | } |
| 1699 | return X86EMUL_CONTINUE; | 1708 | return X86EMUL_CONTINUE; |
| 1700 | } | 1709 | } |
| 1701 | 1710 | ||
| 1702 | static int em_grp3(struct x86_emulate_ctxt *ctxt) | 1711 | static int em_not(struct x86_emulate_ctxt *ctxt) |
| 1712 | { | ||
| 1713 | ctxt->dst.val = ~ctxt->dst.val; | ||
| 1714 | return X86EMUL_CONTINUE; | ||
| 1715 | } | ||
| 1716 | |||
| 1717 | static int em_neg(struct x86_emulate_ctxt *ctxt) | ||
| 1718 | { | ||
| 1719 | emulate_1op(ctxt, "neg"); | ||
| 1720 | return X86EMUL_CONTINUE; | ||
| 1721 | } | ||
| 1722 | |||
| 1723 | static int em_mul_ex(struct x86_emulate_ctxt *ctxt) | ||
| 1724 | { | ||
| 1725 | u8 ex = 0; | ||
| 1726 | |||
| 1727 | emulate_1op_rax_rdx(ctxt, "mul", ex); | ||
| 1728 | return X86EMUL_CONTINUE; | ||
| 1729 | } | ||
| 1730 | |||
| 1731 | static int em_imul_ex(struct x86_emulate_ctxt *ctxt) | ||
| 1732 | { | ||
| 1733 | u8 ex = 0; | ||
| 1734 | |||
| 1735 | emulate_1op_rax_rdx(ctxt, "imul", ex); | ||
| 1736 | return X86EMUL_CONTINUE; | ||
| 1737 | } | ||
| 1738 | |||
| 1739 | static int em_div_ex(struct x86_emulate_ctxt *ctxt) | ||
| 1703 | { | 1740 | { |
| 1704 | unsigned long *rax = &ctxt->regs[VCPU_REGS_RAX]; | ||
| 1705 | unsigned long *rdx = &ctxt->regs[VCPU_REGS_RDX]; | ||
| 1706 | u8 de = 0; | 1741 | u8 de = 0; |
| 1707 | 1742 | ||
| 1708 | switch (ctxt->modrm_reg) { | 1743 | emulate_1op_rax_rdx(ctxt, "div", de); |
| 1709 | case 0 ... 1: /* test */ | 1744 | if (de) |
| 1710 | emulate_2op_SrcV("test", ctxt->src, ctxt->dst, ctxt->eflags); | 1745 | return emulate_de(ctxt); |
| 1711 | break; | 1746 | return X86EMUL_CONTINUE; |
| 1712 | case 2: /* not */ | 1747 | } |
| 1713 | ctxt->dst.val = ~ctxt->dst.val; | 1748 | |
| 1714 | break; | 1749 | static int em_idiv_ex(struct x86_emulate_ctxt *ctxt) |
| 1715 | case 3: /* neg */ | 1750 | { |
| 1716 | emulate_1op("neg", ctxt->dst, ctxt->eflags); | 1751 | u8 de = 0; |
| 1717 | break; | 1752 | |
| 1718 | case 4: /* mul */ | 1753 | emulate_1op_rax_rdx(ctxt, "idiv", de); |
| 1719 | emulate_1op_rax_rdx("mul", ctxt->src, *rax, *rdx, ctxt->eflags); | ||
| 1720 | break; | ||
| 1721 | case 5: /* imul */ | ||
| 1722 | emulate_1op_rax_rdx("imul", ctxt->src, *rax, *rdx, ctxt->eflags); | ||
| 1723 | break; | ||
| 1724 | case 6: /* div */ | ||
| 1725 | emulate_1op_rax_rdx_ex("div", ctxt->src, *rax, *rdx, | ||
| 1726 | ctxt->eflags, de); | ||
| 1727 | break; | ||
| 1728 | case 7: /* idiv */ | ||
| 1729 | emulate_1op_rax_rdx_ex("idiv", ctxt->src, *rax, *rdx, | ||
| 1730 | ctxt->eflags, de); | ||
| 1731 | break; | ||
| 1732 | default: | ||
| 1733 | return X86EMUL_UNHANDLEABLE; | ||
| 1734 | } | ||
| 1735 | if (de) | 1754 | if (de) |
| 1736 | return emulate_de(ctxt); | 1755 | return emulate_de(ctxt); |
| 1737 | return X86EMUL_CONTINUE; | 1756 | return X86EMUL_CONTINUE; |
| @@ -1743,10 +1762,10 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt) | |||
| 1743 | 1762 | ||
| 1744 | switch (ctxt->modrm_reg) { | 1763 | switch (ctxt->modrm_reg) { |
| 1745 | case 0: /* inc */ | 1764 | case 0: /* inc */ |
| 1746 | emulate_1op("inc", ctxt->dst, ctxt->eflags); | 1765 | emulate_1op(ctxt, "inc"); |
| 1747 | break; | 1766 | break; |
| 1748 | case 1: /* dec */ | 1767 | case 1: /* dec */ |
| 1749 | emulate_1op("dec", ctxt->dst, ctxt->eflags); | 1768 | emulate_1op(ctxt, "dec"); |
| 1750 | break; | 1769 | break; |
| 1751 | case 2: /* call near abs */ { | 1770 | case 2: /* call near abs */ { |
| 1752 | long int old_eip; | 1771 | long int old_eip; |
| @@ -1812,8 +1831,9 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) | |||
| 1812 | return rc; | 1831 | return rc; |
| 1813 | } | 1832 | } |
| 1814 | 1833 | ||
| 1815 | static int emulate_load_segment(struct x86_emulate_ctxt *ctxt, int seg) | 1834 | static int em_lseg(struct x86_emulate_ctxt *ctxt) |
| 1816 | { | 1835 | { |
| 1836 | int seg = ctxt->src2.val; | ||
| 1817 | unsigned short sel; | 1837 | unsigned short sel; |
| 1818 | int rc; | 1838 | int rc; |
| 1819 | 1839 | ||
| @@ -2452,7 +2472,7 @@ static int em_das(struct x86_emulate_ctxt *ctxt) | |||
| 2452 | ctxt->src.type = OP_IMM; | 2472 | ctxt->src.type = OP_IMM; |
| 2453 | ctxt->src.val = 0; | 2473 | ctxt->src.val = 0; |
| 2454 | ctxt->src.bytes = 1; | 2474 | ctxt->src.bytes = 1; |
| 2455 | emulate_2op_SrcV("or", ctxt->src, ctxt->dst, ctxt->eflags); | 2475 | emulate_2op_SrcV(ctxt, "or"); |
| 2456 | ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF); | 2476 | ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF); |
| 2457 | if (cf) | 2477 | if (cf) |
| 2458 | ctxt->eflags |= X86_EFLAGS_CF; | 2478 | ctxt->eflags |= X86_EFLAGS_CF; |
| @@ -2502,49 +2522,49 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) | |||
| 2502 | 2522 | ||
| 2503 | static int em_add(struct x86_emulate_ctxt *ctxt) | 2523 | static int em_add(struct x86_emulate_ctxt *ctxt) |
| 2504 | { | 2524 | { |
| 2505 | emulate_2op_SrcV("add", ctxt->src, ctxt->dst, ctxt->eflags); | 2525 | emulate_2op_SrcV(ctxt, "add"); |
| 2506 | return X86EMUL_CONTINUE; | 2526 | return X86EMUL_CONTINUE; |
| 2507 | } | 2527 | } |
| 2508 | 2528 | ||
| 2509 | static int em_or(struct x86_emulate_ctxt *ctxt) | 2529 | static int em_or(struct x86_emulate_ctxt *ctxt) |
| 2510 | { | 2530 | { |
| 2511 | emulate_2op_SrcV("or", ctxt->src, ctxt->dst, ctxt->eflags); | 2531 | emulate_2op_SrcV(ctxt, "or"); |
| 2512 | return X86EMUL_CONTINUE; | 2532 | return X86EMUL_CONTINUE; |
| 2513 | } | 2533 | } |
| 2514 | 2534 | ||
| 2515 | static int em_adc(struct x86_emulate_ctxt *ctxt) | 2535 | static int em_adc(struct x86_emulate_ctxt *ctxt) |
| 2516 | { | 2536 | { |
| 2517 | emulate_2op_SrcV("adc", ctxt->src, ctxt->dst, ctxt->eflags); | 2537 | emulate_2op_SrcV(ctxt, "adc"); |
| 2518 | return X86EMUL_CONTINUE; | 2538 | return X86EMUL_CONTINUE; |
| 2519 | } | 2539 | } |
| 2520 | 2540 | ||
| 2521 | static int em_sbb(struct x86_emulate_ctxt *ctxt) | 2541 | static int em_sbb(struct x86_emulate_ctxt *ctxt) |
| 2522 | { | 2542 | { |
| 2523 | emulate_2op_SrcV("sbb", ctxt->src, ctxt->dst, ctxt->eflags); | 2543 | emulate_2op_SrcV(ctxt, "sbb"); |
| 2524 | return X86EMUL_CONTINUE; | 2544 | return X86EMUL_CONTINUE; |
| 2525 | } | 2545 | } |
| 2526 | 2546 | ||
| 2527 | static int em_and(struct x86_emulate_ctxt *ctxt) | 2547 | static int em_and(struct x86_emulate_ctxt *ctxt) |
| 2528 | { | 2548 | { |
| 2529 | emulate_2op_SrcV("and", ctxt->src, ctxt->dst, ctxt->eflags); | 2549 | emulate_2op_SrcV(ctxt, "and"); |
| 2530 | return X86EMUL_CONTINUE; | 2550 | return X86EMUL_CONTINUE; |
| 2531 | } | 2551 | } |
| 2532 | 2552 | ||
| 2533 | static int em_sub(struct x86_emulate_ctxt *ctxt) | 2553 | static int em_sub(struct x86_emulate_ctxt *ctxt) |
| 2534 | { | 2554 | { |
| 2535 | emulate_2op_SrcV("sub", ctxt->src, ctxt->dst, ctxt->eflags); | 2555 | emulate_2op_SrcV(ctxt, "sub"); |
| 2536 | return X86EMUL_CONTINUE; | 2556 | return X86EMUL_CONTINUE; |
| 2537 | } | 2557 | } |
| 2538 | 2558 | ||
| 2539 | static int em_xor(struct x86_emulate_ctxt *ctxt) | 2559 | static int em_xor(struct x86_emulate_ctxt *ctxt) |
| 2540 | { | 2560 | { |
| 2541 | emulate_2op_SrcV("xor", ctxt->src, ctxt->dst, ctxt->eflags); | 2561 | emulate_2op_SrcV(ctxt, "xor"); |
| 2542 | return X86EMUL_CONTINUE; | 2562 | return X86EMUL_CONTINUE; |
| 2543 | } | 2563 | } |
| 2544 | 2564 | ||
| 2545 | static int em_cmp(struct x86_emulate_ctxt *ctxt) | 2565 | static int em_cmp(struct x86_emulate_ctxt *ctxt) |
| 2546 | { | 2566 | { |
| 2547 | emulate_2op_SrcV("cmp", ctxt->src, ctxt->dst, ctxt->eflags); | 2567 | emulate_2op_SrcV(ctxt, "cmp"); |
| 2548 | /* Disable writeback. */ | 2568 | /* Disable writeback. */ |
| 2549 | ctxt->dst.type = OP_NONE; | 2569 | ctxt->dst.type = OP_NONE; |
| 2550 | return X86EMUL_CONTINUE; | 2570 | return X86EMUL_CONTINUE; |
| @@ -2552,7 +2572,9 @@ static int em_cmp(struct x86_emulate_ctxt *ctxt) | |||
| 2552 | 2572 | ||
| 2553 | static int em_test(struct x86_emulate_ctxt *ctxt) | 2573 | static int em_test(struct x86_emulate_ctxt *ctxt) |
| 2554 | { | 2574 | { |
| 2555 | emulate_2op_SrcV("test", ctxt->src, ctxt->dst, ctxt->eflags); | 2575 | emulate_2op_SrcV(ctxt, "test"); |
| 2576 | /* Disable writeback. */ | ||
| 2577 | ctxt->dst.type = OP_NONE; | ||
| 2556 | return X86EMUL_CONTINUE; | 2578 | return X86EMUL_CONTINUE; |
| 2557 | } | 2579 | } |
| 2558 | 2580 | ||
| @@ -2570,7 +2592,7 @@ static int em_xchg(struct x86_emulate_ctxt *ctxt) | |||
| 2570 | 2592 | ||
| 2571 | static int em_imul(struct x86_emulate_ctxt *ctxt) | 2593 | static int em_imul(struct x86_emulate_ctxt *ctxt) |
| 2572 | { | 2594 | { |
| 2573 | emulate_2op_SrcV_nobyte("imul", ctxt->src, ctxt->dst, ctxt->eflags); | 2595 | emulate_2op_SrcV_nobyte(ctxt, "imul"); |
| 2574 | return X86EMUL_CONTINUE; | 2596 | return X86EMUL_CONTINUE; |
| 2575 | } | 2597 | } |
| 2576 | 2598 | ||
| @@ -3025,9 +3047,14 @@ static struct opcode group1A[] = { | |||
| 3025 | }; | 3047 | }; |
| 3026 | 3048 | ||
| 3027 | static struct opcode group3[] = { | 3049 | static struct opcode group3[] = { |
| 3028 | D(DstMem | SrcImm | ModRM), D(DstMem | SrcImm | ModRM), | 3050 | I(DstMem | SrcImm | ModRM, em_test), |
| 3029 | D(DstMem | SrcNone | ModRM | Lock), D(DstMem | SrcNone | ModRM | Lock), | 3051 | I(DstMem | SrcImm | ModRM, em_test), |
| 3030 | X4(D(SrcMem | ModRM)), | 3052 | I(DstMem | SrcNone | ModRM | Lock, em_not), |
| 3053 | I(DstMem | SrcNone | ModRM | Lock, em_neg), | ||
| 3054 | I(SrcMem | ModRM, em_mul_ex), | ||
| 3055 | I(SrcMem | ModRM, em_imul_ex), | ||
| 3056 | I(SrcMem | ModRM, em_div_ex), | ||
| 3057 | I(SrcMem | ModRM, em_idiv_ex), | ||
| 3031 | }; | 3058 | }; |
| 3032 | 3059 | ||
| 3033 | static struct opcode group4[] = { | 3060 | static struct opcode group4[] = { |
| @@ -3090,16 +3117,20 @@ static struct gprefix pfx_0f_6f_0f_7f = { | |||
| 3090 | static struct opcode opcode_table[256] = { | 3117 | static struct opcode opcode_table[256] = { |
| 3091 | /* 0x00 - 0x07 */ | 3118 | /* 0x00 - 0x07 */ |
| 3092 | I6ALU(Lock, em_add), | 3119 | I6ALU(Lock, em_add), |
| 3093 | D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), | 3120 | I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), |
| 3121 | I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg), | ||
| 3094 | /* 0x08 - 0x0F */ | 3122 | /* 0x08 - 0x0F */ |
| 3095 | I6ALU(Lock, em_or), | 3123 | I6ALU(Lock, em_or), |
| 3096 | D(ImplicitOps | Stack | No64), N, | 3124 | I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg), |
| 3125 | N, | ||
| 3097 | /* 0x10 - 0x17 */ | 3126 | /* 0x10 - 0x17 */ |
| 3098 | I6ALU(Lock, em_adc), | 3127 | I6ALU(Lock, em_adc), |
| 3099 | D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), | 3128 | I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg), |
| 3129 | I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg), | ||
| 3100 | /* 0x18 - 0x1F */ | 3130 | /* 0x18 - 0x1F */ |
| 3101 | I6ALU(Lock, em_sbb), | 3131 | I6ALU(Lock, em_sbb), |
| 3102 | D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), | 3132 | I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg), |
| 3133 | I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg), | ||
| 3103 | /* 0x20 - 0x27 */ | 3134 | /* 0x20 - 0x27 */ |
| 3104 | I6ALU(Lock, em_and), N, N, | 3135 | I6ALU(Lock, em_and), N, N, |
| 3105 | /* 0x28 - 0x2F */ | 3136 | /* 0x28 - 0x2F */ |
| @@ -3167,7 +3198,8 @@ static struct opcode opcode_table[256] = { | |||
| 3167 | D2bv(DstMem | SrcImmByte | ModRM), | 3198 | D2bv(DstMem | SrcImmByte | ModRM), |
| 3168 | I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm), | 3199 | I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm), |
| 3169 | I(ImplicitOps | Stack, em_ret), | 3200 | I(ImplicitOps | Stack, em_ret), |
| 3170 | D(DstReg | SrcMemFAddr | ModRM | No64), D(DstReg | SrcMemFAddr | ModRM | No64), | 3201 | I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg), |
| 3202 | I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg), | ||
| 3171 | G(ByteOp, group11), G(0, group11), | 3203 | G(ByteOp, group11), G(0, group11), |
| 3172 | /* 0xC8 - 0xCF */ | 3204 | /* 0xC8 - 0xCF */ |
| 3173 | N, N, N, I(ImplicitOps | Stack, em_ret_far), | 3205 | N, N, N, I(ImplicitOps | Stack, em_ret_far), |
| @@ -3242,20 +3274,22 @@ static struct opcode twobyte_table[256] = { | |||
| 3242 | /* 0x90 - 0x9F */ | 3274 | /* 0x90 - 0x9F */ |
| 3243 | X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), | 3275 | X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), |
| 3244 | /* 0xA0 - 0xA7 */ | 3276 | /* 0xA0 - 0xA7 */ |
| 3245 | D(ImplicitOps | Stack), D(ImplicitOps | Stack), | 3277 | I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), |
| 3246 | DI(ImplicitOps, cpuid), D(DstMem | SrcReg | ModRM | BitOp), | 3278 | DI(ImplicitOps, cpuid), D(DstMem | SrcReg | ModRM | BitOp), |
| 3247 | D(DstMem | SrcReg | Src2ImmByte | ModRM), | 3279 | D(DstMem | SrcReg | Src2ImmByte | ModRM), |
| 3248 | D(DstMem | SrcReg | Src2CL | ModRM), N, N, | 3280 | D(DstMem | SrcReg | Src2CL | ModRM), N, N, |
| 3249 | /* 0xA8 - 0xAF */ | 3281 | /* 0xA8 - 0xAF */ |
| 3250 | D(ImplicitOps | Stack), D(ImplicitOps | Stack), | 3282 | I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), |
| 3251 | DI(ImplicitOps, rsm), D(DstMem | SrcReg | ModRM | BitOp | Lock), | 3283 | DI(ImplicitOps, rsm), D(DstMem | SrcReg | ModRM | BitOp | Lock), |
| 3252 | D(DstMem | SrcReg | Src2ImmByte | ModRM), | 3284 | D(DstMem | SrcReg | Src2ImmByte | ModRM), |
| 3253 | D(DstMem | SrcReg | Src2CL | ModRM), | 3285 | D(DstMem | SrcReg | Src2CL | ModRM), |
| 3254 | D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), | 3286 | D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), |
| 3255 | /* 0xB0 - 0xB7 */ | 3287 | /* 0xB0 - 0xB7 */ |
| 3256 | D2bv(DstMem | SrcReg | ModRM | Lock), | 3288 | D2bv(DstMem | SrcReg | ModRM | Lock), |
| 3257 | D(DstReg | SrcMemFAddr | ModRM), D(DstMem | SrcReg | ModRM | BitOp | Lock), | 3289 | I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), |
| 3258 | D(DstReg | SrcMemFAddr | ModRM), D(DstReg | SrcMemFAddr | ModRM), | 3290 | D(DstMem | SrcReg | ModRM | BitOp | Lock), |
| 3291 | I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), | ||
| 3292 | I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), | ||
| 3259 | D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), | 3293 | D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), |
| 3260 | /* 0xB8 - 0xBF */ | 3294 | /* 0xB8 - 0xBF */ |
| 3261 | N, N, | 3295 | N, N, |
| @@ -3309,13 +3343,13 @@ static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
| 3309 | /* NB. Immediates are sign-extended as necessary. */ | 3343 | /* NB. Immediates are sign-extended as necessary. */ |
| 3310 | switch (op->bytes) { | 3344 | switch (op->bytes) { |
| 3311 | case 1: | 3345 | case 1: |
| 3312 | op->val = insn_fetch(s8, 1, ctxt->_eip); | 3346 | op->val = insn_fetch(s8, ctxt); |
| 3313 | break; | 3347 | break; |
| 3314 | case 2: | 3348 | case 2: |
| 3315 | op->val = insn_fetch(s16, 2, ctxt->_eip); | 3349 | op->val = insn_fetch(s16, ctxt); |
| 3316 | break; | 3350 | break; |
| 3317 | case 4: | 3351 | case 4: |
| 3318 | op->val = insn_fetch(s32, 4, ctxt->_eip); | 3352 | op->val = insn_fetch(s32, ctxt); |
| 3319 | break; | 3353 | break; |
| 3320 | } | 3354 | } |
| 3321 | if (!sign_extension) { | 3355 | if (!sign_extension) { |
| @@ -3335,6 +3369,125 @@ done: | |||
| 3335 | return rc; | 3369 | return rc; |
| 3336 | } | 3370 | } |
| 3337 | 3371 | ||
| 3372 | static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | ||
| 3373 | unsigned d) | ||
| 3374 | { | ||
| 3375 | int rc = X86EMUL_CONTINUE; | ||
| 3376 | |||
| 3377 | switch (d) { | ||
| 3378 | case OpReg: | ||
| 3379 | decode_register_operand(ctxt, op, | ||
| 3380 | op == &ctxt->dst && | ||
| 3381 | ctxt->twobyte && (ctxt->b == 0xb6 || ctxt->b == 0xb7)); | ||
| 3382 | break; | ||
| 3383 | case OpImmUByte: | ||
| 3384 | rc = decode_imm(ctxt, op, 1, false); | ||
| 3385 | break; | ||
| 3386 | case OpMem: | ||
| 3387 | ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | ||
| 3388 | mem_common: | ||
| 3389 | *op = ctxt->memop; | ||
| 3390 | ctxt->memopp = op; | ||
| 3391 | if ((ctxt->d & BitOp) && op == &ctxt->dst) | ||
| 3392 | fetch_bit_operand(ctxt); | ||
| 3393 | op->orig_val = op->val; | ||
| 3394 | break; | ||
| 3395 | case OpMem64: | ||
| 3396 | ctxt->memop.bytes = 8; | ||
| 3397 | goto mem_common; | ||
| 3398 | case OpAcc: | ||
| 3399 | op->type = OP_REG; | ||
| 3400 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | ||
| 3401 | op->addr.reg = &ctxt->regs[VCPU_REGS_RAX]; | ||
| 3402 | fetch_register_operand(op); | ||
| 3403 | op->orig_val = op->val; | ||
| 3404 | break; | ||
| 3405 | case OpDI: | ||
| 3406 | op->type = OP_MEM; | ||
| 3407 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | ||
| 3408 | op->addr.mem.ea = | ||
| 3409 | register_address(ctxt, ctxt->regs[VCPU_REGS_RDI]); | ||
| 3410 | op->addr.mem.seg = VCPU_SREG_ES; | ||
| 3411 | op->val = 0; | ||
| 3412 | break; | ||
| 3413 | case OpDX: | ||
| 3414 | op->type = OP_REG; | ||
| 3415 | op->bytes = 2; | ||
| 3416 | op->addr.reg = &ctxt->regs[VCPU_REGS_RDX]; | ||
| 3417 | fetch_register_operand(op); | ||
| 3418 | break; | ||
| 3419 | case OpCL: | ||
| 3420 | op->bytes = 1; | ||
| 3421 | op->val = ctxt->regs[VCPU_REGS_RCX] & 0xff; | ||
| 3422 | break; | ||
| 3423 | case OpImmByte: | ||
| 3424 | rc = decode_imm(ctxt, op, 1, true); | ||
| 3425 | break; | ||
| 3426 | case OpOne: | ||
| 3427 | op->bytes = 1; | ||
| 3428 | op->val = 1; | ||
| 3429 | break; | ||
| 3430 | case OpImm: | ||
| 3431 | rc = decode_imm(ctxt, op, imm_size(ctxt), true); | ||
| 3432 | break; | ||
| 3433 | case OpMem16: | ||
| 3434 | ctxt->memop.bytes = 2; | ||
| 3435 | goto mem_common; | ||
| 3436 | case OpMem32: | ||
| 3437 | ctxt->memop.bytes = 4; | ||
| 3438 | goto mem_common; | ||
| 3439 | case OpImmU16: | ||
| 3440 | rc = decode_imm(ctxt, op, 2, false); | ||
| 3441 | break; | ||
| 3442 | case OpImmU: | ||
| 3443 | rc = decode_imm(ctxt, op, imm_size(ctxt), false); | ||
| 3444 | break; | ||
| 3445 | case OpSI: | ||
| 3446 | op->type = OP_MEM; | ||
| 3447 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | ||
| 3448 | op->addr.mem.ea = | ||
| 3449 | register_address(ctxt, ctxt->regs[VCPU_REGS_RSI]); | ||
| 3450 | op->addr.mem.seg = seg_override(ctxt); | ||
| 3451 | op->val = 0; | ||
| 3452 | break; | ||
| 3453 | case OpImmFAddr: | ||
| 3454 | op->type = OP_IMM; | ||
| 3455 | op->addr.mem.ea = ctxt->_eip; | ||
| 3456 | op->bytes = ctxt->op_bytes + 2; | ||
| 3457 | insn_fetch_arr(op->valptr, op->bytes, ctxt); | ||
| 3458 | break; | ||
| 3459 | case OpMemFAddr: | ||
| 3460 | ctxt->memop.bytes = ctxt->op_bytes + 2; | ||
| 3461 | goto mem_common; | ||
| 3462 | case OpES: | ||
| 3463 | op->val = VCPU_SREG_ES; | ||
| 3464 | break; | ||
| 3465 | case OpCS: | ||
| 3466 | op->val = VCPU_SREG_CS; | ||
| 3467 | break; | ||
| 3468 | case OpSS: | ||
| 3469 | op->val = VCPU_SREG_SS; | ||
| 3470 | break; | ||
| 3471 | case OpDS: | ||
| 3472 | op->val = VCPU_SREG_DS; | ||
| 3473 | break; | ||
| 3474 | case OpFS: | ||
| 3475 | op->val = VCPU_SREG_FS; | ||
| 3476 | break; | ||
| 3477 | case OpGS: | ||
| 3478 | op->val = VCPU_SREG_GS; | ||
| 3479 | break; | ||
| 3480 | case OpImplicit: | ||
| 3481 | /* Special instructions do their own operand decoding. */ | ||
| 3482 | default: | ||
| 3483 | op->type = OP_NONE; /* Disable writeback. */ | ||
| 3484 | break; | ||
| 3485 | } | ||
| 3486 | |||
| 3487 | done: | ||
| 3488 | return rc; | ||
| 3489 | } | ||
| 3490 | |||
| 3338 | int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) | 3491 | int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) |
| 3339 | { | 3492 | { |
| 3340 | int rc = X86EMUL_CONTINUE; | 3493 | int rc = X86EMUL_CONTINUE; |
| @@ -3342,8 +3495,9 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) | |||
| 3342 | int def_op_bytes, def_ad_bytes, goffset, simd_prefix; | 3495 | int def_op_bytes, def_ad_bytes, goffset, simd_prefix; |
| 3343 | bool op_prefix = false; | 3496 | bool op_prefix = false; |
| 3344 | struct opcode opcode; | 3497 | struct opcode opcode; |
| 3345 | struct operand memop = { .type = OP_NONE }, *memopp = NULL; | ||
| 3346 | 3498 | ||
| 3499 | ctxt->memop.type = OP_NONE; | ||
| 3500 | ctxt->memopp = NULL; | ||
| 3347 | ctxt->_eip = ctxt->eip; | 3501 | ctxt->_eip = ctxt->eip; |
| 3348 | ctxt->fetch.start = ctxt->_eip; | 3502 | ctxt->fetch.start = ctxt->_eip; |
| 3349 | ctxt->fetch.end = ctxt->fetch.start + insn_len; | 3503 | ctxt->fetch.end = ctxt->fetch.start + insn_len; |
| @@ -3366,7 +3520,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) | |||
| 3366 | break; | 3520 | break; |
| 3367 | #endif | 3521 | #endif |
| 3368 | default: | 3522 | default: |
| 3369 | return -1; | 3523 | return EMULATION_FAILED; |
| 3370 | } | 3524 | } |
| 3371 | 3525 | ||
| 3372 | ctxt->op_bytes = def_op_bytes; | 3526 | ctxt->op_bytes = def_op_bytes; |
| @@ -3374,7 +3528,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) | |||
| 3374 | 3528 | ||
| 3375 | /* Legacy prefixes. */ | 3529 | /* Legacy prefixes. */ |
| 3376 | for (;;) { | 3530 | for (;;) { |
| 3377 | switch (ctxt->b = insn_fetch(u8, 1, ctxt->_eip)) { | 3531 | switch (ctxt->b = insn_fetch(u8, ctxt)) { |
| 3378 | case 0x66: /* operand-size override */ | 3532 | case 0x66: /* operand-size override */ |
| 3379 | op_prefix = true; | 3533 | op_prefix = true; |
| 3380 | /* switch between 2/4 bytes */ | 3534 | /* switch between 2/4 bytes */ |
| @@ -3430,7 +3584,7 @@ done_prefixes: | |||
| 3430 | /* Two-byte opcode? */ | 3584 | /* Two-byte opcode? */ |
| 3431 | if (ctxt->b == 0x0f) { | 3585 | if (ctxt->b == 0x0f) { |
| 3432 | ctxt->twobyte = 1; | 3586 | ctxt->twobyte = 1; |
| 3433 | ctxt->b = insn_fetch(u8, 1, ctxt->_eip); | 3587 | ctxt->b = insn_fetch(u8, ctxt); |
| 3434 | opcode = twobyte_table[ctxt->b]; | 3588 | opcode = twobyte_table[ctxt->b]; |
| 3435 | } | 3589 | } |
| 3436 | ctxt->d = opcode.flags; | 3590 | ctxt->d = opcode.flags; |
| @@ -3438,13 +3592,13 @@ done_prefixes: | |||
| 3438 | while (ctxt->d & GroupMask) { | 3592 | while (ctxt->d & GroupMask) { |
| 3439 | switch (ctxt->d & GroupMask) { | 3593 | switch (ctxt->d & GroupMask) { |
| 3440 | case Group: | 3594 | case Group: |
| 3441 | ctxt->modrm = insn_fetch(u8, 1, ctxt->_eip); | 3595 | ctxt->modrm = insn_fetch(u8, ctxt); |
| 3442 | --ctxt->_eip; | 3596 | --ctxt->_eip; |
| 3443 | goffset = (ctxt->modrm >> 3) & 7; | 3597 | goffset = (ctxt->modrm >> 3) & 7; |
| 3444 | opcode = opcode.u.group[goffset]; | 3598 | opcode = opcode.u.group[goffset]; |
| 3445 | break; | 3599 | break; |
| 3446 | case GroupDual: | 3600 | case GroupDual: |
| 3447 | ctxt->modrm = insn_fetch(u8, 1, ctxt->_eip); | 3601 | ctxt->modrm = insn_fetch(u8, ctxt); |
| 3448 | --ctxt->_eip; | 3602 | --ctxt->_eip; |
| 3449 | goffset = (ctxt->modrm >> 3) & 7; | 3603 | goffset = (ctxt->modrm >> 3) & 7; |
| 3450 | if ((ctxt->modrm >> 6) == 3) | 3604 | if ((ctxt->modrm >> 6) == 3) |
| @@ -3458,7 +3612,7 @@ done_prefixes: | |||
| 3458 | break; | 3612 | break; |
| 3459 | case Prefix: | 3613 | case Prefix: |
| 3460 | if (ctxt->rep_prefix && op_prefix) | 3614 | if (ctxt->rep_prefix && op_prefix) |
| 3461 | return X86EMUL_UNHANDLEABLE; | 3615 | return EMULATION_FAILED; |
| 3462 | simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix; | 3616 | simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix; |
| 3463 | switch (simd_prefix) { | 3617 | switch (simd_prefix) { |
| 3464 | case 0x00: opcode = opcode.u.gprefix->pfx_no; break; | 3618 | case 0x00: opcode = opcode.u.gprefix->pfx_no; break; |
| @@ -3468,10 +3622,10 @@ done_prefixes: | |||
| 3468 | } | 3622 | } |
| 3469 | break; | 3623 | break; |
| 3470 | default: | 3624 | default: |
| 3471 | return X86EMUL_UNHANDLEABLE; | 3625 | return EMULATION_FAILED; |
| 3472 | } | 3626 | } |
| 3473 | 3627 | ||
| 3474 | ctxt->d &= ~GroupMask; | 3628 | ctxt->d &= ~(u64)GroupMask; |
| 3475 | ctxt->d |= opcode.flags; | 3629 | ctxt->d |= opcode.flags; |
| 3476 | } | 3630 | } |
| 3477 | 3631 | ||
| @@ -3481,10 +3635,10 @@ done_prefixes: | |||
| 3481 | 3635 | ||
| 3482 | /* Unrecognised? */ | 3636 | /* Unrecognised? */ |
| 3483 | if (ctxt->d == 0 || (ctxt->d & Undefined)) | 3637 | if (ctxt->d == 0 || (ctxt->d & Undefined)) |
| 3484 | return -1; | 3638 | return EMULATION_FAILED; |
| 3485 | 3639 | ||
| 3486 | if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn) | 3640 | if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn) |
| 3487 | return -1; | 3641 | return EMULATION_FAILED; |
| 3488 | 3642 | ||
| 3489 | if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) | 3643 | if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) |
| 3490 | ctxt->op_bytes = 8; | 3644 | ctxt->op_bytes = 8; |
| @@ -3501,96 +3655,27 @@ done_prefixes: | |||
| 3501 | 3655 | ||
| 3502 | /* ModRM and SIB bytes. */ | 3656 | /* ModRM and SIB bytes. */ |
| 3503 | if (ctxt->d & ModRM) { | 3657 | if (ctxt->d & ModRM) { |
| 3504 | rc = decode_modrm(ctxt, &memop); | 3658 | rc = decode_modrm(ctxt, &ctxt->memop); |
| 3505 | if (!ctxt->has_seg_override) | 3659 | if (!ctxt->has_seg_override) |
| 3506 | set_seg_override(ctxt, ctxt->modrm_seg); | 3660 | set_seg_override(ctxt, ctxt->modrm_seg); |
| 3507 | } else if (ctxt->d & MemAbs) | 3661 | } else if (ctxt->d & MemAbs) |
| 3508 | rc = decode_abs(ctxt, &memop); | 3662 | rc = decode_abs(ctxt, &ctxt->memop); |
| 3509 | if (rc != X86EMUL_CONTINUE) | 3663 | if (rc != X86EMUL_CONTINUE) |
| 3510 | goto done; | 3664 | goto done; |
| 3511 | 3665 | ||
| 3512 | if (!ctxt->has_seg_override) | 3666 | if (!ctxt->has_seg_override) |
| 3513 | set_seg_override(ctxt, VCPU_SREG_DS); | 3667 | set_seg_override(ctxt, VCPU_SREG_DS); |
| 3514 | 3668 | ||
| 3515 | memop.addr.mem.seg = seg_override(ctxt); | 3669 | ctxt->memop.addr.mem.seg = seg_override(ctxt); |
| 3516 | 3670 | ||
| 3517 | if (memop.type == OP_MEM && ctxt->ad_bytes != 8) | 3671 | if (ctxt->memop.type == OP_MEM && ctxt->ad_bytes != 8) |
| 3518 | memop.addr.mem.ea = (u32)memop.addr.mem.ea; | 3672 | ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea; |
| 3519 | 3673 | ||
| 3520 | /* | 3674 | /* |
| 3521 | * Decode and fetch the source operand: register, memory | 3675 | * Decode and fetch the source operand: register, memory |
| 3522 | * or immediate. | 3676 | * or immediate. |
| 3523 | */ | 3677 | */ |
| 3524 | switch (ctxt->d & SrcMask) { | 3678 | rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask); |
| 3525 | case SrcNone: | ||
| 3526 | break; | ||
| 3527 | case SrcReg: | ||
| 3528 | decode_register_operand(ctxt, &ctxt->src, 0); | ||
| 3529 | break; | ||
| 3530 | case SrcMem16: | ||
| 3531 | memop.bytes = 2; | ||
| 3532 | goto srcmem_common; | ||
| 3533 | case SrcMem32: | ||
| 3534 | memop.bytes = 4; | ||
| 3535 | goto srcmem_common; | ||
| 3536 | case SrcMem: | ||
| 3537 | memop.bytes = (ctxt->d & ByteOp) ? 1 : | ||
| 3538 | ctxt->op_bytes; | ||
| 3539 | srcmem_common: | ||
| 3540 | ctxt->src = memop; | ||
| 3541 | memopp = &ctxt->src; | ||
| 3542 | break; | ||
| 3543 | case SrcImmU16: | ||
| 3544 | rc = decode_imm(ctxt, &ctxt->src, 2, false); | ||
| 3545 | break; | ||
| 3546 | case SrcImm: | ||
| 3547 | rc = decode_imm(ctxt, &ctxt->src, imm_size(ctxt), true); | ||
| 3548 | break; | ||
| 3549 | case SrcImmU: | ||
| 3550 | rc = decode_imm(ctxt, &ctxt->src, imm_size(ctxt), false); | ||
| 3551 | break; | ||
| 3552 | case SrcImmByte: | ||
| 3553 | rc = decode_imm(ctxt, &ctxt->src, 1, true); | ||
| 3554 | break; | ||
| 3555 | case SrcImmUByte: | ||
| 3556 | rc = decode_imm(ctxt, &ctxt->src, 1, false); | ||
| 3557 | break; | ||
| 3558 | case SrcAcc: | ||
| 3559 | ctxt->src.type = OP_REG; | ||
| 3560 | ctxt->src.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | ||
| 3561 | ctxt->src.addr.reg = &ctxt->regs[VCPU_REGS_RAX]; | ||
| 3562 | fetch_register_operand(&ctxt->src); | ||
| 3563 | break; | ||
| 3564 | case SrcOne: | ||
| 3565 | ctxt->src.bytes = 1; | ||
| 3566 | ctxt->src.val = 1; | ||
| 3567 | break; | ||
| 3568 | case SrcSI: | ||
| 3569 | ctxt->src.type = OP_MEM; | ||
| 3570 | ctxt->src.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | ||
| 3571 | ctxt->src.addr.mem.ea = | ||
| 3572 | register_address(ctxt, ctxt->regs[VCPU_REGS_RSI]); | ||
| 3573 | ctxt->src.addr.mem.seg = seg_override(ctxt); | ||
| 3574 | ctxt->src.val = 0; | ||
| 3575 | break; | ||
| 3576 | case SrcImmFAddr: | ||
| 3577 | ctxt->src.type = OP_IMM; | ||
| 3578 | ctxt->src.addr.mem.ea = ctxt->_eip; | ||
| 3579 | ctxt->src.bytes = ctxt->op_bytes + 2; | ||
| 3580 | insn_fetch_arr(ctxt->src.valptr, ctxt->src.bytes, ctxt->_eip); | ||
| 3581 | break; | ||
| 3582 | case SrcMemFAddr: | ||
| 3583 | memop.bytes = ctxt->op_bytes + 2; | ||
| 3584 | goto srcmem_common; | ||
| 3585 | break; | ||
| 3586 | case SrcDX: | ||
| 3587 | ctxt->src.type = OP_REG; | ||
| 3588 | ctxt->src.bytes = 2; | ||
| 3589 | ctxt->src.addr.reg = &ctxt->regs[VCPU_REGS_RDX]; | ||
| 3590 | fetch_register_operand(&ctxt->src); | ||
| 3591 | break; | ||
| 3592 | } | ||
| 3593 | |||
| 3594 | if (rc != X86EMUL_CONTINUE) | 3679 | if (rc != X86EMUL_CONTINUE) |
| 3595 | goto done; | 3680 | goto done; |
| 3596 | 3681 | ||
| @@ -3598,85 +3683,18 @@ done_prefixes: | |||
| 3598 | * Decode and fetch the second source operand: register, memory | 3683 | * Decode and fetch the second source operand: register, memory |
| 3599 | * or immediate. | 3684 | * or immediate. |
| 3600 | */ | 3685 | */ |
| 3601 | switch (ctxt->d & Src2Mask) { | 3686 | rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask); |
| 3602 | case Src2None: | ||
| 3603 | break; | ||
| 3604 | case Src2CL: | ||
| 3605 | ctxt->src2.bytes = 1; | ||
| 3606 | ctxt->src2.val = ctxt->regs[VCPU_REGS_RCX] & 0xff; | ||
| 3607 | break; | ||
| 3608 | case Src2ImmByte: | ||
| 3609 | rc = decode_imm(ctxt, &ctxt->src2, 1, true); | ||
| 3610 | break; | ||
| 3611 | case Src2One: | ||
| 3612 | ctxt->src2.bytes = 1; | ||
| 3613 | ctxt->src2.val = 1; | ||
| 3614 | break; | ||
| 3615 | case Src2Imm: | ||
| 3616 | rc = decode_imm(ctxt, &ctxt->src2, imm_size(ctxt), true); | ||
| 3617 | break; | ||
| 3618 | } | ||
| 3619 | |||
| 3620 | if (rc != X86EMUL_CONTINUE) | 3687 | if (rc != X86EMUL_CONTINUE) |
| 3621 | goto done; | 3688 | goto done; |
| 3622 | 3689 | ||
| 3623 | /* Decode and fetch the destination operand: register or memory. */ | 3690 | /* Decode and fetch the destination operand: register or memory. */ |
| 3624 | switch (ctxt->d & DstMask) { | 3691 | rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); |
| 3625 | case DstReg: | ||
| 3626 | decode_register_operand(ctxt, &ctxt->dst, | ||
| 3627 | ctxt->twobyte && (ctxt->b == 0xb6 || ctxt->b == 0xb7)); | ||
| 3628 | break; | ||
| 3629 | case DstImmUByte: | ||
| 3630 | ctxt->dst.type = OP_IMM; | ||
| 3631 | ctxt->dst.addr.mem.ea = ctxt->_eip; | ||
| 3632 | ctxt->dst.bytes = 1; | ||
| 3633 | ctxt->dst.val = insn_fetch(u8, 1, ctxt->_eip); | ||
| 3634 | break; | ||
| 3635 | case DstMem: | ||
| 3636 | case DstMem64: | ||
| 3637 | ctxt->dst = memop; | ||
| 3638 | memopp = &ctxt->dst; | ||
| 3639 | if ((ctxt->d & DstMask) == DstMem64) | ||
| 3640 | ctxt->dst.bytes = 8; | ||
| 3641 | else | ||
| 3642 | ctxt->dst.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | ||
| 3643 | if (ctxt->d & BitOp) | ||
| 3644 | fetch_bit_operand(ctxt); | ||
| 3645 | ctxt->dst.orig_val = ctxt->dst.val; | ||
| 3646 | break; | ||
| 3647 | case DstAcc: | ||
| 3648 | ctxt->dst.type = OP_REG; | ||
| 3649 | ctxt->dst.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | ||
| 3650 | ctxt->dst.addr.reg = &ctxt->regs[VCPU_REGS_RAX]; | ||
| 3651 | fetch_register_operand(&ctxt->dst); | ||
| 3652 | ctxt->dst.orig_val = ctxt->dst.val; | ||
| 3653 | break; | ||
| 3654 | case DstDI: | ||
| 3655 | ctxt->dst.type = OP_MEM; | ||
| 3656 | ctxt->dst.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | ||
| 3657 | ctxt->dst.addr.mem.ea = | ||
| 3658 | register_address(ctxt, ctxt->regs[VCPU_REGS_RDI]); | ||
| 3659 | ctxt->dst.addr.mem.seg = VCPU_SREG_ES; | ||
| 3660 | ctxt->dst.val = 0; | ||
| 3661 | break; | ||
| 3662 | case DstDX: | ||
| 3663 | ctxt->dst.type = OP_REG; | ||
| 3664 | ctxt->dst.bytes = 2; | ||
| 3665 | ctxt->dst.addr.reg = &ctxt->regs[VCPU_REGS_RDX]; | ||
| 3666 | fetch_register_operand(&ctxt->dst); | ||
| 3667 | break; | ||
| 3668 | case ImplicitOps: | ||
| 3669 | /* Special instructions do their own operand decoding. */ | ||
| 3670 | default: | ||
| 3671 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ | ||
| 3672 | break; | ||
| 3673 | } | ||
| 3674 | 3692 | ||
| 3675 | done: | 3693 | done: |
| 3676 | if (memopp && memopp->type == OP_MEM && ctxt->rip_relative) | 3694 | if (ctxt->memopp && ctxt->memopp->type == OP_MEM && ctxt->rip_relative) |
| 3677 | memopp->addr.mem.ea += ctxt->_eip; | 3695 | ctxt->memopp->addr.mem.ea += ctxt->_eip; |
| 3678 | 3696 | ||
| 3679 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; | 3697 | return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; |
| 3680 | } | 3698 | } |
| 3681 | 3699 | ||
| 3682 | static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) | 3700 | static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) |
| @@ -3825,32 +3843,11 @@ special_insn: | |||
| 3825 | goto twobyte_insn; | 3843 | goto twobyte_insn; |
| 3826 | 3844 | ||
| 3827 | switch (ctxt->b) { | 3845 | switch (ctxt->b) { |
| 3828 | case 0x06: /* push es */ | ||
| 3829 | rc = emulate_push_sreg(ctxt, VCPU_SREG_ES); | ||
| 3830 | break; | ||
| 3831 | case 0x07: /* pop es */ | ||
| 3832 | rc = emulate_pop_sreg(ctxt, VCPU_SREG_ES); | ||
| 3833 | break; | ||
| 3834 | case 0x0e: /* push cs */ | ||
| 3835 | rc = emulate_push_sreg(ctxt, VCPU_SREG_CS); | ||
| 3836 | break; | ||
| 3837 | case 0x16: /* push ss */ | ||
| 3838 | rc = emulate_push_sreg(ctxt, VCPU_SREG_SS); | ||
| 3839 | break; | ||
| 3840 | case 0x17: /* pop ss */ | ||
| 3841 | rc = emulate_pop_sreg(ctxt, VCPU_SREG_SS); | ||
| 3842 | break; | ||
| 3843 | case 0x1e: /* push ds */ | ||
| 3844 | rc = emulate_push_sreg(ctxt, VCPU_SREG_DS); | ||
| 3845 | break; | ||
| 3846 | case 0x1f: /* pop ds */ | ||
| 3847 | rc = emulate_pop_sreg(ctxt, VCPU_SREG_DS); | ||
| 3848 | break; | ||
| 3849 | case 0x40 ... 0x47: /* inc r16/r32 */ | 3846 | case 0x40 ... 0x47: /* inc r16/r32 */ |
| 3850 | emulate_1op("inc", ctxt->dst, ctxt->eflags); | 3847 | emulate_1op(ctxt, "inc"); |
| 3851 | break; | 3848 | break; |
| 3852 | case 0x48 ... 0x4f: /* dec r16/r32 */ | 3849 | case 0x48 ... 0x4f: /* dec r16/r32 */ |
| 3853 | emulate_1op("dec", ctxt->dst, ctxt->eflags); | 3850 | emulate_1op(ctxt, "dec"); |
| 3854 | break; | 3851 | break; |
| 3855 | case 0x63: /* movsxd */ | 3852 | case 0x63: /* movsxd */ |
| 3856 | if (ctxt->mode != X86EMUL_MODE_PROT64) | 3853 | if (ctxt->mode != X86EMUL_MODE_PROT64) |
| @@ -3891,12 +3888,6 @@ special_insn: | |||
| 3891 | case 0xc0 ... 0xc1: | 3888 | case 0xc0 ... 0xc1: |
| 3892 | rc = em_grp2(ctxt); | 3889 | rc = em_grp2(ctxt); |
| 3893 | break; | 3890 | break; |
| 3894 | case 0xc4: /* les */ | ||
| 3895 | rc = emulate_load_segment(ctxt, VCPU_SREG_ES); | ||
| 3896 | break; | ||
| 3897 | case 0xc5: /* lds */ | ||
| 3898 | rc = emulate_load_segment(ctxt, VCPU_SREG_DS); | ||
| 3899 | break; | ||
| 3900 | case 0xcc: /* int3 */ | 3891 | case 0xcc: /* int3 */ |
| 3901 | rc = emulate_int(ctxt, 3); | 3892 | rc = emulate_int(ctxt, 3); |
| 3902 | break; | 3893 | break; |
| @@ -3953,9 +3944,6 @@ special_insn: | |||
| 3953 | /* complement carry flag from eflags reg */ | 3944 | /* complement carry flag from eflags reg */ |
| 3954 | ctxt->eflags ^= EFLG_CF; | 3945 | ctxt->eflags ^= EFLG_CF; |
| 3955 | break; | 3946 | break; |
| 3956 | case 0xf6 ... 0xf7: /* Grp3 */ | ||
| 3957 | rc = em_grp3(ctxt); | ||
| 3958 | break; | ||
| 3959 | case 0xf8: /* clc */ | 3947 | case 0xf8: /* clc */ |
| 3960 | ctxt->eflags &= ~EFLG_CF; | 3948 | ctxt->eflags &= ~EFLG_CF; |
| 3961 | break; | 3949 | break; |
| @@ -4103,36 +4091,24 @@ twobyte_insn: | |||
| 4103 | case 0x90 ... 0x9f: /* setcc r/m8 */ | 4091 | case 0x90 ... 0x9f: /* setcc r/m8 */ |
| 4104 | ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); | 4092 | ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); |
| 4105 | break; | 4093 | break; |
| 4106 | case 0xa0: /* push fs */ | ||
| 4107 | rc = emulate_push_sreg(ctxt, VCPU_SREG_FS); | ||
| 4108 | break; | ||
| 4109 | case 0xa1: /* pop fs */ | ||
| 4110 | rc = emulate_pop_sreg(ctxt, VCPU_SREG_FS); | ||
| 4111 | break; | ||
| 4112 | case 0xa3: | 4094 | case 0xa3: |
| 4113 | bt: /* bt */ | 4095 | bt: /* bt */ |
| 4114 | ctxt->dst.type = OP_NONE; | 4096 | ctxt->dst.type = OP_NONE; |
| 4115 | /* only subword offset */ | 4097 | /* only subword offset */ |
| 4116 | ctxt->src.val &= (ctxt->dst.bytes << 3) - 1; | 4098 | ctxt->src.val &= (ctxt->dst.bytes << 3) - 1; |
| 4117 | emulate_2op_SrcV_nobyte("bt", ctxt->src, ctxt->dst, ctxt->eflags); | 4099 | emulate_2op_SrcV_nobyte(ctxt, "bt"); |
| 4118 | break; | 4100 | break; |
| 4119 | case 0xa4: /* shld imm8, r, r/m */ | 4101 | case 0xa4: /* shld imm8, r, r/m */ |
| 4120 | case 0xa5: /* shld cl, r, r/m */ | 4102 | case 0xa5: /* shld cl, r, r/m */ |
| 4121 | emulate_2op_cl("shld", ctxt->src2, ctxt->src, ctxt->dst, ctxt->eflags); | 4103 | emulate_2op_cl(ctxt, "shld"); |
| 4122 | break; | ||
| 4123 | case 0xa8: /* push gs */ | ||
| 4124 | rc = emulate_push_sreg(ctxt, VCPU_SREG_GS); | ||
| 4125 | break; | ||
| 4126 | case 0xa9: /* pop gs */ | ||
| 4127 | rc = emulate_pop_sreg(ctxt, VCPU_SREG_GS); | ||
| 4128 | break; | 4104 | break; |
| 4129 | case 0xab: | 4105 | case 0xab: |
| 4130 | bts: /* bts */ | 4106 | bts: /* bts */ |
| 4131 | emulate_2op_SrcV_nobyte("bts", ctxt->src, ctxt->dst, ctxt->eflags); | 4107 | emulate_2op_SrcV_nobyte(ctxt, "bts"); |
| 4132 | break; | 4108 | break; |
| 4133 | case 0xac: /* shrd imm8, r, r/m */ | 4109 | case 0xac: /* shrd imm8, r, r/m */ |
| 4134 | case 0xad: /* shrd cl, r, r/m */ | 4110 | case 0xad: /* shrd cl, r, r/m */ |
| 4135 | emulate_2op_cl("shrd", ctxt->src2, ctxt->src, ctxt->dst, ctxt->eflags); | 4111 | emulate_2op_cl(ctxt, "shrd"); |
| 4136 | break; | 4112 | break; |
| 4137 | case 0xae: /* clflush */ | 4113 | case 0xae: /* clflush */ |
| 4138 | break; | 4114 | break; |
| @@ -4143,7 +4119,7 @@ twobyte_insn: | |||
| 4143 | */ | 4119 | */ |
| 4144 | ctxt->src.orig_val = ctxt->src.val; | 4120 | ctxt->src.orig_val = ctxt->src.val; |
| 4145 | ctxt->src.val = ctxt->regs[VCPU_REGS_RAX]; | 4121 | ctxt->src.val = ctxt->regs[VCPU_REGS_RAX]; |
| 4146 | emulate_2op_SrcV("cmp", ctxt->src, ctxt->dst, ctxt->eflags); | 4122 | emulate_2op_SrcV(ctxt, "cmp"); |
| 4147 | if (ctxt->eflags & EFLG_ZF) { | 4123 | if (ctxt->eflags & EFLG_ZF) { |
| 4148 | /* Success: write back to memory. */ | 4124 | /* Success: write back to memory. */ |
| 4149 | ctxt->dst.val = ctxt->src.orig_val; | 4125 | ctxt->dst.val = ctxt->src.orig_val; |
| @@ -4153,18 +4129,9 @@ twobyte_insn: | |||
| 4153 | ctxt->dst.addr.reg = (unsigned long *)&ctxt->regs[VCPU_REGS_RAX]; | 4129 | ctxt->dst.addr.reg = (unsigned long *)&ctxt->regs[VCPU_REGS_RAX]; |
| 4154 | } | 4130 | } |
| 4155 | break; | 4131 | break; |
| 4156 | case 0xb2: /* lss */ | ||
| 4157 | rc = emulate_load_segment(ctxt, VCPU_SREG_SS); | ||
| 4158 | break; | ||
| 4159 | case 0xb3: | 4132 | case 0xb3: |
| 4160 | btr: /* btr */ | 4133 | btr: /* btr */ |
| 4161 | emulate_2op_SrcV_nobyte("btr", ctxt->src, ctxt->dst, ctxt->eflags); | 4134 | emulate_2op_SrcV_nobyte(ctxt, "btr"); |
| 4162 | break; | ||
| 4163 | case 0xb4: /* lfs */ | ||
| 4164 | rc = emulate_load_segment(ctxt, VCPU_SREG_FS); | ||
| 4165 | break; | ||
| 4166 | case 0xb5: /* lgs */ | ||
| 4167 | rc = emulate_load_segment(ctxt, VCPU_SREG_GS); | ||
| 4168 | break; | 4135 | break; |
| 4169 | case 0xb6 ... 0xb7: /* movzx */ | 4136 | case 0xb6 ... 0xb7: /* movzx */ |
| 4170 | ctxt->dst.bytes = ctxt->op_bytes; | 4137 | ctxt->dst.bytes = ctxt->op_bytes; |
| @@ -4185,7 +4152,7 @@ twobyte_insn: | |||
| 4185 | break; | 4152 | break; |
| 4186 | case 0xbb: | 4153 | case 0xbb: |
| 4187 | btc: /* btc */ | 4154 | btc: /* btc */ |
| 4188 | emulate_2op_SrcV_nobyte("btc", ctxt->src, ctxt->dst, ctxt->eflags); | 4155 | emulate_2op_SrcV_nobyte(ctxt, "btc"); |
| 4189 | break; | 4156 | break; |
| 4190 | case 0xbc: { /* bsf */ | 4157 | case 0xbc: { /* bsf */ |
| 4191 | u8 zf; | 4158 | u8 zf; |
| @@ -4217,7 +4184,7 @@ twobyte_insn: | |||
| 4217 | (s16) ctxt->src.val; | 4184 | (s16) ctxt->src.val; |
| 4218 | break; | 4185 | break; |
| 4219 | case 0xc0 ... 0xc1: /* xadd */ | 4186 | case 0xc0 ... 0xc1: /* xadd */ |
| 4220 | emulate_2op_SrcV("add", ctxt->src, ctxt->dst, ctxt->eflags); | 4187 | emulate_2op_SrcV(ctxt, "add"); |
| 4221 | /* Write back the register source. */ | 4188 | /* Write back the register source. */ |
| 4222 | ctxt->src.val = ctxt->dst.orig_val; | 4189 | ctxt->src.val = ctxt->dst.orig_val; |
| 4223 | write_register_operand(&ctxt->src); | 4190 | write_register_operand(&ctxt->src); |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index efad72385058..76e3f1cd0369 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
| @@ -713,14 +713,16 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | |||
| 713 | kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); | 713 | kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); |
| 714 | 714 | ||
| 715 | kvm_iodevice_init(&pit->dev, &pit_dev_ops); | 715 | kvm_iodevice_init(&pit->dev, &pit_dev_ops); |
| 716 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev); | 716 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS, |
| 717 | KVM_PIT_MEM_LENGTH, &pit->dev); | ||
| 717 | if (ret < 0) | 718 | if (ret < 0) |
| 718 | goto fail; | 719 | goto fail; |
| 719 | 720 | ||
| 720 | if (flags & KVM_PIT_SPEAKER_DUMMY) { | 721 | if (flags & KVM_PIT_SPEAKER_DUMMY) { |
| 721 | kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops); | 722 | kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops); |
| 722 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, | 723 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, |
| 723 | &pit->speaker_dev); | 724 | KVM_SPEAKER_BASE_ADDRESS, 4, |
| 725 | &pit->speaker_dev); | ||
| 724 | if (ret < 0) | 726 | if (ret < 0) |
| 725 | goto fail_unregister; | 727 | goto fail_unregister; |
| 726 | } | 728 | } |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 19fe855e7953..cac4746d7ffb 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
| @@ -34,6 +34,9 @@ | |||
| 34 | #include <linux/kvm_host.h> | 34 | #include <linux/kvm_host.h> |
| 35 | #include "trace.h" | 35 | #include "trace.h" |
| 36 | 36 | ||
| 37 | #define pr_pic_unimpl(fmt, ...) \ | ||
| 38 | pr_err_ratelimited("kvm: pic: " fmt, ## __VA_ARGS__) | ||
| 39 | |||
| 37 | static void pic_irq_request(struct kvm *kvm, int level); | 40 | static void pic_irq_request(struct kvm *kvm, int level); |
| 38 | 41 | ||
| 39 | static void pic_lock(struct kvm_pic *s) | 42 | static void pic_lock(struct kvm_pic *s) |
| @@ -306,10 +309,10 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) | |||
| 306 | } | 309 | } |
| 307 | s->init_state = 1; | 310 | s->init_state = 1; |
| 308 | if (val & 0x02) | 311 | if (val & 0x02) |
| 309 | printk(KERN_ERR "single mode not supported"); | 312 | pr_pic_unimpl("single mode not supported"); |
| 310 | if (val & 0x08) | 313 | if (val & 0x08) |
| 311 | printk(KERN_ERR | 314 | pr_pic_unimpl( |
| 312 | "level sensitive irq not supported"); | 315 | "level sensitive irq not supported"); |
| 313 | } else if (val & 0x08) { | 316 | } else if (val & 0x08) { |
| 314 | if (val & 0x04) | 317 | if (val & 0x04) |
| 315 | s->poll = 1; | 318 | s->poll = 1; |
| @@ -459,22 +462,15 @@ static int picdev_in_range(gpa_t addr) | |||
| 459 | } | 462 | } |
| 460 | } | 463 | } |
| 461 | 464 | ||
| 462 | static inline struct kvm_pic *to_pic(struct kvm_io_device *dev) | 465 | static int picdev_write(struct kvm_pic *s, |
| 463 | { | ||
| 464 | return container_of(dev, struct kvm_pic, dev); | ||
| 465 | } | ||
| 466 | |||
| 467 | static int picdev_write(struct kvm_io_device *this, | ||
| 468 | gpa_t addr, int len, const void *val) | 466 | gpa_t addr, int len, const void *val) |
| 469 | { | 467 | { |
| 470 | struct kvm_pic *s = to_pic(this); | ||
| 471 | unsigned char data = *(unsigned char *)val; | 468 | unsigned char data = *(unsigned char *)val; |
| 472 | if (!picdev_in_range(addr)) | 469 | if (!picdev_in_range(addr)) |
| 473 | return -EOPNOTSUPP; | 470 | return -EOPNOTSUPP; |
| 474 | 471 | ||
| 475 | if (len != 1) { | 472 | if (len != 1) { |
| 476 | if (printk_ratelimit()) | 473 | pr_pic_unimpl("non byte write\n"); |
| 477 | printk(KERN_ERR "PIC: non byte write\n"); | ||
| 478 | return 0; | 474 | return 0; |
| 479 | } | 475 | } |
| 480 | pic_lock(s); | 476 | pic_lock(s); |
| @@ -494,17 +490,15 @@ static int picdev_write(struct kvm_io_device *this, | |||
| 494 | return 0; | 490 | return 0; |
| 495 | } | 491 | } |
| 496 | 492 | ||
| 497 | static int picdev_read(struct kvm_io_device *this, | 493 | static int picdev_read(struct kvm_pic *s, |
| 498 | gpa_t addr, int len, void *val) | 494 | gpa_t addr, int len, void *val) |
| 499 | { | 495 | { |
| 500 | struct kvm_pic *s = to_pic(this); | ||
| 501 | unsigned char data = 0; | 496 | unsigned char data = 0; |
| 502 | if (!picdev_in_range(addr)) | 497 | if (!picdev_in_range(addr)) |
| 503 | return -EOPNOTSUPP; | 498 | return -EOPNOTSUPP; |
| 504 | 499 | ||
| 505 | if (len != 1) { | 500 | if (len != 1) { |
| 506 | if (printk_ratelimit()) | 501 | pr_pic_unimpl("non byte read\n"); |
| 507 | printk(KERN_ERR "PIC: non byte read\n"); | ||
| 508 | return 0; | 502 | return 0; |
| 509 | } | 503 | } |
| 510 | pic_lock(s); | 504 | pic_lock(s); |
| @@ -525,6 +519,48 @@ static int picdev_read(struct kvm_io_device *this, | |||
| 525 | return 0; | 519 | return 0; |
| 526 | } | 520 | } |
| 527 | 521 | ||
| 522 | static int picdev_master_write(struct kvm_io_device *dev, | ||
| 523 | gpa_t addr, int len, const void *val) | ||
| 524 | { | ||
| 525 | return picdev_write(container_of(dev, struct kvm_pic, dev_master), | ||
| 526 | addr, len, val); | ||
| 527 | } | ||
| 528 | |||
| 529 | static int picdev_master_read(struct kvm_io_device *dev, | ||
| 530 | gpa_t addr, int len, void *val) | ||
| 531 | { | ||
| 532 | return picdev_read(container_of(dev, struct kvm_pic, dev_master), | ||
| 533 | addr, len, val); | ||
| 534 | } | ||
| 535 | |||
| 536 | static int picdev_slave_write(struct kvm_io_device *dev, | ||
| 537 | gpa_t addr, int len, const void *val) | ||
| 538 | { | ||
| 539 | return picdev_write(container_of(dev, struct kvm_pic, dev_slave), | ||
| 540 | addr, len, val); | ||
| 541 | } | ||
| 542 | |||
| 543 | static int picdev_slave_read(struct kvm_io_device *dev, | ||
| 544 | gpa_t addr, int len, void *val) | ||
| 545 | { | ||
| 546 | return picdev_read(container_of(dev, struct kvm_pic, dev_slave), | ||
| 547 | addr, len, val); | ||
| 548 | } | ||
| 549 | |||
| 550 | static int picdev_eclr_write(struct kvm_io_device *dev, | ||
| 551 | gpa_t addr, int len, const void *val) | ||
| 552 | { | ||
| 553 | return picdev_write(container_of(dev, struct kvm_pic, dev_eclr), | ||
| 554 | addr, len, val); | ||
| 555 | } | ||
| 556 | |||
| 557 | static int picdev_eclr_read(struct kvm_io_device *dev, | ||
| 558 | gpa_t addr, int len, void *val) | ||
| 559 | { | ||
| 560 | return picdev_read(container_of(dev, struct kvm_pic, dev_eclr), | ||
| 561 | addr, len, val); | ||
| 562 | } | ||
| 563 | |||
| 528 | /* | 564 | /* |
| 529 | * callback when PIC0 irq status changed | 565 | * callback when PIC0 irq status changed |
| 530 | */ | 566 | */ |
| @@ -537,9 +573,19 @@ static void pic_irq_request(struct kvm *kvm, int level) | |||
| 537 | s->output = level; | 573 | s->output = level; |
| 538 | } | 574 | } |
| 539 | 575 | ||
| 540 | static const struct kvm_io_device_ops picdev_ops = { | 576 | static const struct kvm_io_device_ops picdev_master_ops = { |
| 541 | .read = picdev_read, | 577 | .read = picdev_master_read, |
| 542 | .write = picdev_write, | 578 | .write = picdev_master_write, |
| 579 | }; | ||
| 580 | |||
| 581 | static const struct kvm_io_device_ops picdev_slave_ops = { | ||
| 582 | .read = picdev_slave_read, | ||
| 583 | .write = picdev_slave_write, | ||
| 584 | }; | ||
| 585 | |||
| 586 | static const struct kvm_io_device_ops picdev_eclr_ops = { | ||
| 587 | .read = picdev_eclr_read, | ||
| 588 | .write = picdev_eclr_write, | ||
| 543 | }; | 589 | }; |
| 544 | 590 | ||
| 545 | struct kvm_pic *kvm_create_pic(struct kvm *kvm) | 591 | struct kvm_pic *kvm_create_pic(struct kvm *kvm) |
| @@ -560,16 +606,39 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) | |||
| 560 | /* | 606 | /* |
| 561 | * Initialize PIO device | 607 | * Initialize PIO device |
| 562 | */ | 608 | */ |
| 563 | kvm_iodevice_init(&s->dev, &picdev_ops); | 609 | kvm_iodevice_init(&s->dev_master, &picdev_master_ops); |
| 610 | kvm_iodevice_init(&s->dev_slave, &picdev_slave_ops); | ||
| 611 | kvm_iodevice_init(&s->dev_eclr, &picdev_eclr_ops); | ||
| 564 | mutex_lock(&kvm->slots_lock); | 612 | mutex_lock(&kvm->slots_lock); |
| 565 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev); | 613 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x20, 2, |
| 614 | &s->dev_master); | ||
| 615 | if (ret < 0) | ||
| 616 | goto fail_unlock; | ||
| 617 | |||
| 618 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0xa0, 2, &s->dev_slave); | ||
| 619 | if (ret < 0) | ||
| 620 | goto fail_unreg_2; | ||
| 621 | |||
| 622 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x4d0, 2, &s->dev_eclr); | ||
| 623 | if (ret < 0) | ||
| 624 | goto fail_unreg_1; | ||
| 625 | |||
| 566 | mutex_unlock(&kvm->slots_lock); | 626 | mutex_unlock(&kvm->slots_lock); |
| 567 | if (ret < 0) { | ||
| 568 | kfree(s); | ||
| 569 | return NULL; | ||
| 570 | } | ||
| 571 | 627 | ||
| 572 | return s; | 628 | return s; |
| 629 | |||
| 630 | fail_unreg_1: | ||
| 631 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_slave); | ||
| 632 | |||
| 633 | fail_unreg_2: | ||
| 634 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_master); | ||
| 635 | |||
| 636 | fail_unlock: | ||
| 637 | mutex_unlock(&kvm->slots_lock); | ||
| 638 | |||
| 639 | kfree(s); | ||
| 640 | |||
| 641 | return NULL; | ||
| 573 | } | 642 | } |
| 574 | 643 | ||
| 575 | void kvm_destroy_pic(struct kvm *kvm) | 644 | void kvm_destroy_pic(struct kvm *kvm) |
| @@ -577,7 +646,9 @@ void kvm_destroy_pic(struct kvm *kvm) | |||
| 577 | struct kvm_pic *vpic = kvm->arch.vpic; | 646 | struct kvm_pic *vpic = kvm->arch.vpic; |
| 578 | 647 | ||
| 579 | if (vpic) { | 648 | if (vpic) { |
| 580 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev); | 649 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_master); |
| 650 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_slave); | ||
| 651 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_eclr); | ||
| 581 | kvm->arch.vpic = NULL; | 652 | kvm->arch.vpic = NULL; |
| 582 | kfree(vpic); | 653 | kfree(vpic); |
| 583 | } | 654 | } |
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 53e2d084bffb..2086f2bfba33 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
| @@ -66,7 +66,9 @@ struct kvm_pic { | |||
| 66 | struct kvm *kvm; | 66 | struct kvm *kvm; |
| 67 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ | 67 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ |
| 68 | int output; /* intr from master PIC */ | 68 | int output; /* intr from master PIC */ |
| 69 | struct kvm_io_device dev; | 69 | struct kvm_io_device dev_master; |
| 70 | struct kvm_io_device dev_slave; | ||
| 71 | struct kvm_io_device dev_eclr; | ||
| 70 | void (*ack_notifier)(void *opaque, int irq); | 72 | void (*ack_notifier)(void *opaque, int irq); |
| 71 | unsigned long irq_states[16]; | 73 | unsigned long irq_states[16]; |
| 72 | }; | 74 | }; |
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 3377d53fcd36..544076c4f44b 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h | |||
| @@ -45,13 +45,6 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) | |||
| 45 | return vcpu->arch.walk_mmu->pdptrs[index]; | 45 | return vcpu->arch.walk_mmu->pdptrs[index]; |
| 46 | } | 46 | } |
| 47 | 47 | ||
| 48 | static inline u64 kvm_pdptr_read_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, int index) | ||
| 49 | { | ||
| 50 | load_pdptrs(vcpu, mmu, mmu->get_cr3(vcpu)); | ||
| 51 | |||
| 52 | return mmu->pdptrs[index]; | ||
| 53 | } | ||
| 54 | |||
| 55 | static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask) | 48 | static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask) |
| 56 | { | 49 | { |
| 57 | ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS; | 50 | ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS; |
diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h index 64bc6ea78d90..497dbaa366d4 100644 --- a/arch/x86/kvm/kvm_timer.h +++ b/arch/x86/kvm/kvm_timer.h | |||
| @@ -2,6 +2,8 @@ | |||
| 2 | struct kvm_timer { | 2 | struct kvm_timer { |
| 3 | struct hrtimer timer; | 3 | struct hrtimer timer; |
| 4 | s64 period; /* unit: ns */ | 4 | s64 period; /* unit: ns */ |
| 5 | u32 timer_mode_mask; | ||
| 6 | u64 tscdeadline; | ||
| 5 | atomic_t pending; /* accumulated triggered timers */ | 7 | atomic_t pending; /* accumulated triggered timers */ |
| 6 | bool reinject; | 8 | bool reinject; |
| 7 | struct kvm_timer_ops *t_ops; | 9 | struct kvm_timer_ops *t_ops; |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 57dcbd4308fa..54abb40199d6 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
| @@ -68,6 +68,9 @@ | |||
| 68 | #define VEC_POS(v) ((v) & (32 - 1)) | 68 | #define VEC_POS(v) ((v) & (32 - 1)) |
| 69 | #define REG_POS(v) (((v) >> 5) << 4) | 69 | #define REG_POS(v) (((v) >> 5) << 4) |
| 70 | 70 | ||
| 71 | static unsigned int min_timer_period_us = 500; | ||
| 72 | module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); | ||
| 73 | |||
| 71 | static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off) | 74 | static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off) |
| 72 | { | 75 | { |
| 73 | return *((u32 *) (apic->regs + reg_off)); | 76 | return *((u32 *) (apic->regs + reg_off)); |
| @@ -135,9 +138,23 @@ static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) | |||
| 135 | return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; | 138 | return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; |
| 136 | } | 139 | } |
| 137 | 140 | ||
| 141 | static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) | ||
| 142 | { | ||
| 143 | return ((apic_get_reg(apic, APIC_LVTT) & | ||
| 144 | apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT); | ||
| 145 | } | ||
| 146 | |||
| 138 | static inline int apic_lvtt_period(struct kvm_lapic *apic) | 147 | static inline int apic_lvtt_period(struct kvm_lapic *apic) |
| 139 | { | 148 | { |
| 140 | return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC; | 149 | return ((apic_get_reg(apic, APIC_LVTT) & |
| 150 | apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC); | ||
| 151 | } | ||
| 152 | |||
| 153 | static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) | ||
| 154 | { | ||
| 155 | return ((apic_get_reg(apic, APIC_LVTT) & | ||
| 156 | apic->lapic_timer.timer_mode_mask) == | ||
| 157 | APIC_LVT_TIMER_TSCDEADLINE); | ||
| 141 | } | 158 | } |
| 142 | 159 | ||
| 143 | static inline int apic_lvt_nmi_mode(u32 lvt_val) | 160 | static inline int apic_lvt_nmi_mode(u32 lvt_val) |
| @@ -166,7 +183,7 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic) | |||
| 166 | } | 183 | } |
| 167 | 184 | ||
| 168 | static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { | 185 | static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { |
| 169 | LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ | 186 | LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ |
| 170 | LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ | 187 | LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ |
| 171 | LVT_MASK | APIC_MODE_MASK, /* LVTPC */ | 188 | LVT_MASK | APIC_MODE_MASK, /* LVTPC */ |
| 172 | LINT_MASK, LINT_MASK, /* LVT0-1 */ | 189 | LINT_MASK, LINT_MASK, /* LVT0-1 */ |
| @@ -316,8 +333,8 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) | |||
| 316 | result = 1; | 333 | result = 1; |
| 317 | break; | 334 | break; |
| 318 | default: | 335 | default: |
| 319 | printk(KERN_WARNING "Bad DFR vcpu %d: %08x\n", | 336 | apic_debug("Bad DFR vcpu %d: %08x\n", |
| 320 | apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR)); | 337 | apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR)); |
| 321 | break; | 338 | break; |
| 322 | } | 339 | } |
| 323 | 340 | ||
| @@ -354,8 +371,8 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | |||
| 354 | result = (target != source); | 371 | result = (target != source); |
| 355 | break; | 372 | break; |
| 356 | default: | 373 | default: |
| 357 | printk(KERN_WARNING "Bad dest shorthand value %x\n", | 374 | apic_debug("kvm: apic: Bad dest shorthand value %x\n", |
| 358 | short_hand); | 375 | short_hand); |
| 359 | break; | 376 | break; |
| 360 | } | 377 | } |
| 361 | 378 | ||
| @@ -401,11 +418,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
| 401 | break; | 418 | break; |
| 402 | 419 | ||
| 403 | case APIC_DM_REMRD: | 420 | case APIC_DM_REMRD: |
| 404 | printk(KERN_DEBUG "Ignoring delivery mode 3\n"); | 421 | apic_debug("Ignoring delivery mode 3\n"); |
| 405 | break; | 422 | break; |
| 406 | 423 | ||
| 407 | case APIC_DM_SMI: | 424 | case APIC_DM_SMI: |
| 408 | printk(KERN_DEBUG "Ignoring guest SMI\n"); | 425 | apic_debug("Ignoring guest SMI\n"); |
| 409 | break; | 426 | break; |
| 410 | 427 | ||
| 411 | case APIC_DM_NMI: | 428 | case APIC_DM_NMI: |
| @@ -565,11 +582,13 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) | |||
| 565 | val = kvm_apic_id(apic) << 24; | 582 | val = kvm_apic_id(apic) << 24; |
| 566 | break; | 583 | break; |
| 567 | case APIC_ARBPRI: | 584 | case APIC_ARBPRI: |
| 568 | printk(KERN_WARNING "Access APIC ARBPRI register " | 585 | apic_debug("Access APIC ARBPRI register which is for P6\n"); |
| 569 | "which is for P6\n"); | ||
| 570 | break; | 586 | break; |
| 571 | 587 | ||
| 572 | case APIC_TMCCT: /* Timer CCR */ | 588 | case APIC_TMCCT: /* Timer CCR */ |
| 589 | if (apic_lvtt_tscdeadline(apic)) | ||
| 590 | return 0; | ||
| 591 | |||
| 573 | val = apic_get_tmcct(apic); | 592 | val = apic_get_tmcct(apic); |
| 574 | break; | 593 | break; |
| 575 | 594 | ||
| @@ -664,29 +683,40 @@ static void update_divide_count(struct kvm_lapic *apic) | |||
| 664 | 683 | ||
| 665 | static void start_apic_timer(struct kvm_lapic *apic) | 684 | static void start_apic_timer(struct kvm_lapic *apic) |
| 666 | { | 685 | { |
| 667 | ktime_t now = apic->lapic_timer.timer.base->get_time(); | 686 | ktime_t now; |
| 668 | |||
| 669 | apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) * | ||
| 670 | APIC_BUS_CYCLE_NS * apic->divide_count; | ||
| 671 | atomic_set(&apic->lapic_timer.pending, 0); | 687 | atomic_set(&apic->lapic_timer.pending, 0); |
| 672 | 688 | ||
| 673 | if (!apic->lapic_timer.period) | 689 | if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { |
| 674 | return; | 690 | /* lapic timer in oneshot or peroidic mode */ |
| 675 | /* | 691 | now = apic->lapic_timer.timer.base->get_time(); |
| 676 | * Do not allow the guest to program periodic timers with small | 692 | apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) |
| 677 | * interval, since the hrtimers are not throttled by the host | 693 | * APIC_BUS_CYCLE_NS * apic->divide_count; |
| 678 | * scheduler. | 694 | |
| 679 | */ | 695 | if (!apic->lapic_timer.period) |
| 680 | if (apic_lvtt_period(apic)) { | 696 | return; |
| 681 | if (apic->lapic_timer.period < NSEC_PER_MSEC/2) | 697 | /* |
| 682 | apic->lapic_timer.period = NSEC_PER_MSEC/2; | 698 | * Do not allow the guest to program periodic timers with small |
| 683 | } | 699 | * interval, since the hrtimers are not throttled by the host |
| 700 | * scheduler. | ||
| 701 | */ | ||
| 702 | if (apic_lvtt_period(apic)) { | ||
| 703 | s64 min_period = min_timer_period_us * 1000LL; | ||
| 704 | |||
| 705 | if (apic->lapic_timer.period < min_period) { | ||
| 706 | pr_info_ratelimited( | ||
| 707 | "kvm: vcpu %i: requested %lld ns " | ||
| 708 | "lapic timer period limited to %lld ns\n", | ||
| 709 | apic->vcpu->vcpu_id, | ||
| 710 | apic->lapic_timer.period, min_period); | ||
| 711 | apic->lapic_timer.period = min_period; | ||
| 712 | } | ||
| 713 | } | ||
| 684 | 714 | ||
| 685 | hrtimer_start(&apic->lapic_timer.timer, | 715 | hrtimer_start(&apic->lapic_timer.timer, |
| 686 | ktime_add_ns(now, apic->lapic_timer.period), | 716 | ktime_add_ns(now, apic->lapic_timer.period), |
| 687 | HRTIMER_MODE_ABS); | 717 | HRTIMER_MODE_ABS); |
| 688 | 718 | ||
| 689 | apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" | 719 | apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" |
| 690 | PRIx64 ", " | 720 | PRIx64 ", " |
| 691 | "timer initial count 0x%x, period %lldns, " | 721 | "timer initial count 0x%x, period %lldns, " |
| 692 | "expire @ 0x%016" PRIx64 ".\n", __func__, | 722 | "expire @ 0x%016" PRIx64 ".\n", __func__, |
| @@ -695,6 +725,30 @@ static void start_apic_timer(struct kvm_lapic *apic) | |||
| 695 | apic->lapic_timer.period, | 725 | apic->lapic_timer.period, |
| 696 | ktime_to_ns(ktime_add_ns(now, | 726 | ktime_to_ns(ktime_add_ns(now, |
| 697 | apic->lapic_timer.period))); | 727 | apic->lapic_timer.period))); |
| 728 | } else if (apic_lvtt_tscdeadline(apic)) { | ||
| 729 | /* lapic timer in tsc deadline mode */ | ||
| 730 | u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; | ||
| 731 | u64 ns = 0; | ||
| 732 | struct kvm_vcpu *vcpu = apic->vcpu; | ||
| 733 | unsigned long this_tsc_khz = vcpu_tsc_khz(vcpu); | ||
| 734 | unsigned long flags; | ||
| 735 | |||
| 736 | if (unlikely(!tscdeadline || !this_tsc_khz)) | ||
| 737 | return; | ||
| 738 | |||
| 739 | local_irq_save(flags); | ||
| 740 | |||
| 741 | now = apic->lapic_timer.timer.base->get_time(); | ||
| 742 | guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); | ||
| 743 | if (likely(tscdeadline > guest_tsc)) { | ||
| 744 | ns = (tscdeadline - guest_tsc) * 1000000ULL; | ||
| 745 | do_div(ns, this_tsc_khz); | ||
| 746 | } | ||
| 747 | hrtimer_start(&apic->lapic_timer.timer, | ||
| 748 | ktime_add_ns(now, ns), HRTIMER_MODE_ABS); | ||
| 749 | |||
| 750 | local_irq_restore(flags); | ||
| 751 | } | ||
| 698 | } | 752 | } |
| 699 | 753 | ||
| 700 | static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) | 754 | static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) |
| @@ -782,7 +836,6 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) | |||
| 782 | 836 | ||
| 783 | case APIC_LVT0: | 837 | case APIC_LVT0: |
| 784 | apic_manage_nmi_watchdog(apic, val); | 838 | apic_manage_nmi_watchdog(apic, val); |
| 785 | case APIC_LVTT: | ||
| 786 | case APIC_LVTTHMR: | 839 | case APIC_LVTTHMR: |
| 787 | case APIC_LVTPC: | 840 | case APIC_LVTPC: |
| 788 | case APIC_LVT1: | 841 | case APIC_LVT1: |
| @@ -796,7 +849,22 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) | |||
| 796 | 849 | ||
| 797 | break; | 850 | break; |
| 798 | 851 | ||
| 852 | case APIC_LVTT: | ||
| 853 | if ((apic_get_reg(apic, APIC_LVTT) & | ||
| 854 | apic->lapic_timer.timer_mode_mask) != | ||
| 855 | (val & apic->lapic_timer.timer_mode_mask)) | ||
| 856 | hrtimer_cancel(&apic->lapic_timer.timer); | ||
| 857 | |||
| 858 | if (!apic_sw_enabled(apic)) | ||
| 859 | val |= APIC_LVT_MASKED; | ||
| 860 | val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); | ||
| 861 | apic_set_reg(apic, APIC_LVTT, val); | ||
| 862 | break; | ||
| 863 | |||
| 799 | case APIC_TMICT: | 864 | case APIC_TMICT: |
| 865 | if (apic_lvtt_tscdeadline(apic)) | ||
| 866 | break; | ||
| 867 | |||
| 800 | hrtimer_cancel(&apic->lapic_timer.timer); | 868 | hrtimer_cancel(&apic->lapic_timer.timer); |
| 801 | apic_set_reg(apic, APIC_TMICT, val); | 869 | apic_set_reg(apic, APIC_TMICT, val); |
| 802 | start_apic_timer(apic); | 870 | start_apic_timer(apic); |
| @@ -804,14 +872,14 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) | |||
| 804 | 872 | ||
| 805 | case APIC_TDCR: | 873 | case APIC_TDCR: |
| 806 | if (val & 4) | 874 | if (val & 4) |
| 807 | printk(KERN_ERR "KVM_WRITE:TDCR %x\n", val); | 875 | apic_debug("KVM_WRITE:TDCR %x\n", val); |
| 808 | apic_set_reg(apic, APIC_TDCR, val); | 876 | apic_set_reg(apic, APIC_TDCR, val); |
| 809 | update_divide_count(apic); | 877 | update_divide_count(apic); |
| 810 | break; | 878 | break; |
| 811 | 879 | ||
| 812 | case APIC_ESR: | 880 | case APIC_ESR: |
| 813 | if (apic_x2apic_mode(apic) && val != 0) { | 881 | if (apic_x2apic_mode(apic) && val != 0) { |
| 814 | printk(KERN_ERR "KVM_WRITE:ESR not zero %x\n", val); | 882 | apic_debug("KVM_WRITE:ESR not zero %x\n", val); |
| 815 | ret = 1; | 883 | ret = 1; |
| 816 | } | 884 | } |
| 817 | break; | 885 | break; |
| @@ -864,6 +932,15 @@ static int apic_mmio_write(struct kvm_io_device *this, | |||
| 864 | return 0; | 932 | return 0; |
| 865 | } | 933 | } |
| 866 | 934 | ||
| 935 | void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) | ||
| 936 | { | ||
| 937 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
| 938 | |||
| 939 | if (apic) | ||
| 940 | apic_reg_write(vcpu->arch.apic, APIC_EOI, 0); | ||
| 941 | } | ||
| 942 | EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); | ||
| 943 | |||
| 867 | void kvm_free_lapic(struct kvm_vcpu *vcpu) | 944 | void kvm_free_lapic(struct kvm_vcpu *vcpu) |
| 868 | { | 945 | { |
| 869 | if (!vcpu->arch.apic) | 946 | if (!vcpu->arch.apic) |
| @@ -883,6 +960,32 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu) | |||
| 883 | *---------------------------------------------------------------------- | 960 | *---------------------------------------------------------------------- |
| 884 | */ | 961 | */ |
| 885 | 962 | ||
| 963 | u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) | ||
| 964 | { | ||
| 965 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
| 966 | if (!apic) | ||
| 967 | return 0; | ||
| 968 | |||
| 969 | if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) | ||
| 970 | return 0; | ||
| 971 | |||
| 972 | return apic->lapic_timer.tscdeadline; | ||
| 973 | } | ||
| 974 | |||
| 975 | void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) | ||
| 976 | { | ||
| 977 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
| 978 | if (!apic) | ||
| 979 | return; | ||
| 980 | |||
| 981 | if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) | ||
| 982 | return; | ||
| 983 | |||
| 984 | hrtimer_cancel(&apic->lapic_timer.timer); | ||
| 985 | apic->lapic_timer.tscdeadline = data; | ||
| 986 | start_apic_timer(apic); | ||
| 987 | } | ||
| 988 | |||
| 886 | void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) | 989 | void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) |
| 887 | { | 990 | { |
| 888 | struct kvm_lapic *apic = vcpu->arch.apic; | 991 | struct kvm_lapic *apic = vcpu->arch.apic; |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 52c9e6b9e725..138e8cc6fea6 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
| @@ -26,6 +26,7 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); | |||
| 26 | void kvm_lapic_reset(struct kvm_vcpu *vcpu); | 26 | void kvm_lapic_reset(struct kvm_vcpu *vcpu); |
| 27 | u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); | 27 | u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); |
| 28 | void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); | 28 | void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); |
| 29 | void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu); | ||
| 29 | void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); | 30 | void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); |
| 30 | u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); | 31 | u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); |
| 31 | void kvm_apic_set_version(struct kvm_vcpu *vcpu); | 32 | void kvm_apic_set_version(struct kvm_vcpu *vcpu); |
| @@ -41,6 +42,9 @@ int kvm_lapic_enabled(struct kvm_vcpu *vcpu); | |||
| 41 | bool kvm_apic_present(struct kvm_vcpu *vcpu); | 42 | bool kvm_apic_present(struct kvm_vcpu *vcpu); |
| 42 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); | 43 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); |
| 43 | 44 | ||
| 45 | u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); | ||
| 46 | void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); | ||
| 47 | |||
| 44 | void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); | 48 | void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); |
| 45 | void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); | 49 | void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); |
| 46 | void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); | 50 | void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 8e8da7960dbe..f1b36cf3e3d0 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
| @@ -2770,7 +2770,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | |||
| 2770 | 2770 | ||
| 2771 | ASSERT(!VALID_PAGE(root)); | 2771 | ASSERT(!VALID_PAGE(root)); |
| 2772 | if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) { | 2772 | if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) { |
| 2773 | pdptr = kvm_pdptr_read_mmu(vcpu, &vcpu->arch.mmu, i); | 2773 | pdptr = vcpu->arch.mmu.get_pdptr(vcpu, i); |
| 2774 | if (!is_present_gpte(pdptr)) { | 2774 | if (!is_present_gpte(pdptr)) { |
| 2775 | vcpu->arch.mmu.pae_root[i] = 0; | 2775 | vcpu->arch.mmu.pae_root[i] = 0; |
| 2776 | continue; | 2776 | continue; |
| @@ -3318,6 +3318,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
| 3318 | context->direct_map = true; | 3318 | context->direct_map = true; |
| 3319 | context->set_cr3 = kvm_x86_ops->set_tdp_cr3; | 3319 | context->set_cr3 = kvm_x86_ops->set_tdp_cr3; |
| 3320 | context->get_cr3 = get_cr3; | 3320 | context->get_cr3 = get_cr3; |
| 3321 | context->get_pdptr = kvm_pdptr_read; | ||
| 3321 | context->inject_page_fault = kvm_inject_page_fault; | 3322 | context->inject_page_fault = kvm_inject_page_fault; |
| 3322 | context->nx = is_nx(vcpu); | 3323 | context->nx = is_nx(vcpu); |
| 3323 | 3324 | ||
| @@ -3376,6 +3377,7 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu) | |||
| 3376 | 3377 | ||
| 3377 | vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3; | 3378 | vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3; |
| 3378 | vcpu->arch.walk_mmu->get_cr3 = get_cr3; | 3379 | vcpu->arch.walk_mmu->get_cr3 = get_cr3; |
| 3380 | vcpu->arch.walk_mmu->get_pdptr = kvm_pdptr_read; | ||
| 3379 | vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; | 3381 | vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; |
| 3380 | 3382 | ||
| 3381 | return r; | 3383 | return r; |
| @@ -3386,6 +3388,7 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) | |||
| 3386 | struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; | 3388 | struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; |
| 3387 | 3389 | ||
| 3388 | g_context->get_cr3 = get_cr3; | 3390 | g_context->get_cr3 = get_cr3; |
| 3391 | g_context->get_pdptr = kvm_pdptr_read; | ||
| 3389 | g_context->inject_page_fault = kvm_inject_page_fault; | 3392 | g_context->inject_page_fault = kvm_inject_page_fault; |
| 3390 | 3393 | ||
| 3391 | /* | 3394 | /* |
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index 2460a265be23..746ec259d024 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c | |||
| @@ -121,16 +121,16 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) | |||
| 121 | 121 | ||
| 122 | static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) | 122 | static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) |
| 123 | { | 123 | { |
| 124 | static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); | ||
| 124 | unsigned long *rmapp; | 125 | unsigned long *rmapp; |
| 125 | struct kvm_mmu_page *rev_sp; | 126 | struct kvm_mmu_page *rev_sp; |
| 126 | gfn_t gfn; | 127 | gfn_t gfn; |
| 127 | 128 | ||
| 128 | |||
| 129 | rev_sp = page_header(__pa(sptep)); | 129 | rev_sp = page_header(__pa(sptep)); |
| 130 | gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt); | 130 | gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt); |
| 131 | 131 | ||
| 132 | if (!gfn_to_memslot(kvm, gfn)) { | 132 | if (!gfn_to_memslot(kvm, gfn)) { |
| 133 | if (!printk_ratelimit()) | 133 | if (!__ratelimit(&ratelimit_state)) |
| 134 | return; | 134 | return; |
| 135 | audit_printk(kvm, "no memslot for gfn %llx\n", gfn); | 135 | audit_printk(kvm, "no memslot for gfn %llx\n", gfn); |
| 136 | audit_printk(kvm, "index %ld of sp (gfn=%llx)\n", | 136 | audit_printk(kvm, "index %ld of sp (gfn=%llx)\n", |
| @@ -141,7 +141,7 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) | |||
| 141 | 141 | ||
| 142 | rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level); | 142 | rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level); |
| 143 | if (!*rmapp) { | 143 | if (!*rmapp) { |
| 144 | if (!printk_ratelimit()) | 144 | if (!__ratelimit(&ratelimit_state)) |
| 145 | return; | 145 | return; |
| 146 | audit_printk(kvm, "no rmap for writable spte %llx\n", | 146 | audit_printk(kvm, "no rmap for writable spte %llx\n", |
| 147 | *sptep); | 147 | *sptep); |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 507e2b844cfa..92994100638b 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
| @@ -147,7 +147,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, | |||
| 147 | gfn_t table_gfn; | 147 | gfn_t table_gfn; |
| 148 | unsigned index, pt_access, uninitialized_var(pte_access); | 148 | unsigned index, pt_access, uninitialized_var(pte_access); |
| 149 | gpa_t pte_gpa; | 149 | gpa_t pte_gpa; |
| 150 | bool eperm; | 150 | bool eperm, last_gpte; |
| 151 | int offset; | 151 | int offset; |
| 152 | const int write_fault = access & PFERR_WRITE_MASK; | 152 | const int write_fault = access & PFERR_WRITE_MASK; |
| 153 | const int user_fault = access & PFERR_USER_MASK; | 153 | const int user_fault = access & PFERR_USER_MASK; |
| @@ -163,7 +163,7 @@ retry_walk: | |||
| 163 | 163 | ||
| 164 | #if PTTYPE == 64 | 164 | #if PTTYPE == 64 |
| 165 | if (walker->level == PT32E_ROOT_LEVEL) { | 165 | if (walker->level == PT32E_ROOT_LEVEL) { |
| 166 | pte = kvm_pdptr_read_mmu(vcpu, mmu, (addr >> 30) & 3); | 166 | pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3); |
| 167 | trace_kvm_mmu_paging_element(pte, walker->level); | 167 | trace_kvm_mmu_paging_element(pte, walker->level); |
| 168 | if (!is_present_gpte(pte)) | 168 | if (!is_present_gpte(pte)) |
| 169 | goto error; | 169 | goto error; |
| @@ -221,6 +221,17 @@ retry_walk: | |||
| 221 | eperm = true; | 221 | eperm = true; |
| 222 | #endif | 222 | #endif |
| 223 | 223 | ||
| 224 | last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte); | ||
| 225 | if (last_gpte) { | ||
| 226 | pte_access = pt_access & | ||
| 227 | FNAME(gpte_access)(vcpu, pte, true); | ||
| 228 | /* check if the kernel is fetching from user page */ | ||
| 229 | if (unlikely(pte_access & PT_USER_MASK) && | ||
| 230 | kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)) | ||
| 231 | if (fetch_fault && !user_fault) | ||
| 232 | eperm = true; | ||
| 233 | } | ||
| 234 | |||
| 224 | if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) { | 235 | if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) { |
| 225 | int ret; | 236 | int ret; |
| 226 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, | 237 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, |
| @@ -238,18 +249,12 @@ retry_walk: | |||
| 238 | 249 | ||
| 239 | walker->ptes[walker->level - 1] = pte; | 250 | walker->ptes[walker->level - 1] = pte; |
| 240 | 251 | ||
| 241 | if (FNAME(is_last_gpte)(walker, vcpu, mmu, pte)) { | 252 | if (last_gpte) { |
| 242 | int lvl = walker->level; | 253 | int lvl = walker->level; |
| 243 | gpa_t real_gpa; | 254 | gpa_t real_gpa; |
| 244 | gfn_t gfn; | 255 | gfn_t gfn; |
| 245 | u32 ac; | 256 | u32 ac; |
| 246 | 257 | ||
| 247 | /* check if the kernel is fetching from user page */ | ||
| 248 | if (unlikely(pte_access & PT_USER_MASK) && | ||
| 249 | kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)) | ||
| 250 | if (fetch_fault && !user_fault) | ||
| 251 | eperm = true; | ||
| 252 | |||
| 253 | gfn = gpte_to_gfn_lvl(pte, lvl); | 258 | gfn = gpte_to_gfn_lvl(pte, lvl); |
| 254 | gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT; | 259 | gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT; |
| 255 | 260 | ||
| @@ -295,7 +300,6 @@ retry_walk: | |||
| 295 | walker->ptes[walker->level - 1] = pte; | 300 | walker->ptes[walker->level - 1] = pte; |
| 296 | } | 301 | } |
| 297 | 302 | ||
| 298 | pte_access = pt_access & FNAME(gpte_access)(vcpu, pte, true); | ||
| 299 | walker->pt_access = pt_access; | 303 | walker->pt_access = pt_access; |
| 300 | walker->pte_access = pte_access; | 304 | walker->pte_access = pte_access; |
| 301 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", | 305 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 475d1c948501..e32243eac2f4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
| @@ -1084,7 +1084,6 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 1084 | if (npt_enabled) { | 1084 | if (npt_enabled) { |
| 1085 | /* Setup VMCB for Nested Paging */ | 1085 | /* Setup VMCB for Nested Paging */ |
| 1086 | control->nested_ctl = 1; | 1086 | control->nested_ctl = 1; |
| 1087 | clr_intercept(svm, INTERCEPT_TASK_SWITCH); | ||
| 1088 | clr_intercept(svm, INTERCEPT_INVLPG); | 1087 | clr_intercept(svm, INTERCEPT_INVLPG); |
| 1089 | clr_exception_intercept(svm, PF_VECTOR); | 1088 | clr_exception_intercept(svm, PF_VECTOR); |
| 1090 | clr_cr_intercept(svm, INTERCEPT_CR3_READ); | 1089 | clr_cr_intercept(svm, INTERCEPT_CR3_READ); |
| @@ -1844,6 +1843,20 @@ static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) | |||
| 1844 | return svm->nested.nested_cr3; | 1843 | return svm->nested.nested_cr3; |
| 1845 | } | 1844 | } |
| 1846 | 1845 | ||
| 1846 | static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) | ||
| 1847 | { | ||
| 1848 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 1849 | u64 cr3 = svm->nested.nested_cr3; | ||
| 1850 | u64 pdpte; | ||
| 1851 | int ret; | ||
| 1852 | |||
| 1853 | ret = kvm_read_guest_page(vcpu->kvm, gpa_to_gfn(cr3), &pdpte, | ||
| 1854 | offset_in_page(cr3) + index * 8, 8); | ||
| 1855 | if (ret) | ||
| 1856 | return 0; | ||
| 1857 | return pdpte; | ||
| 1858 | } | ||
| 1859 | |||
| 1847 | static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, | 1860 | static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, |
| 1848 | unsigned long root) | 1861 | unsigned long root) |
| 1849 | { | 1862 | { |
| @@ -1875,6 +1888,7 @@ static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) | |||
| 1875 | 1888 | ||
| 1876 | vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; | 1889 | vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; |
| 1877 | vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; | 1890 | vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; |
| 1891 | vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr; | ||
| 1878 | vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; | 1892 | vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; |
| 1879 | vcpu->arch.mmu.shadow_root_level = get_npt_level(); | 1893 | vcpu->arch.mmu.shadow_root_level = get_npt_level(); |
| 1880 | vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; | 1894 | vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; |
| @@ -2182,7 +2196,8 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
| 2182 | vmcb->control.exit_info_1, | 2196 | vmcb->control.exit_info_1, |
| 2183 | vmcb->control.exit_info_2, | 2197 | vmcb->control.exit_info_2, |
| 2184 | vmcb->control.exit_int_info, | 2198 | vmcb->control.exit_int_info, |
| 2185 | vmcb->control.exit_int_info_err); | 2199 | vmcb->control.exit_int_info_err, |
| 2200 | KVM_ISA_SVM); | ||
| 2186 | 2201 | ||
| 2187 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page); | 2202 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page); |
| 2188 | if (!nested_vmcb) | 2203 | if (!nested_vmcb) |
| @@ -2894,15 +2909,20 @@ static int cr8_write_interception(struct vcpu_svm *svm) | |||
| 2894 | return 0; | 2909 | return 0; |
| 2895 | } | 2910 | } |
| 2896 | 2911 | ||
| 2912 | u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu) | ||
| 2913 | { | ||
| 2914 | struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); | ||
| 2915 | return vmcb->control.tsc_offset + | ||
| 2916 | svm_scale_tsc(vcpu, native_read_tsc()); | ||
| 2917 | } | ||
| 2918 | |||
| 2897 | static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | 2919 | static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) |
| 2898 | { | 2920 | { |
| 2899 | struct vcpu_svm *svm = to_svm(vcpu); | 2921 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2900 | 2922 | ||
| 2901 | switch (ecx) { | 2923 | switch (ecx) { |
| 2902 | case MSR_IA32_TSC: { | 2924 | case MSR_IA32_TSC: { |
| 2903 | struct vmcb *vmcb = get_host_vmcb(svm); | 2925 | *data = svm->vmcb->control.tsc_offset + |
| 2904 | |||
| 2905 | *data = vmcb->control.tsc_offset + | ||
| 2906 | svm_scale_tsc(vcpu, native_read_tsc()); | 2926 | svm_scale_tsc(vcpu, native_read_tsc()); |
| 2907 | 2927 | ||
| 2908 | break; | 2928 | break; |
| @@ -3314,8 +3334,6 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
| 3314 | struct kvm_run *kvm_run = vcpu->run; | 3334 | struct kvm_run *kvm_run = vcpu->run; |
| 3315 | u32 exit_code = svm->vmcb->control.exit_code; | 3335 | u32 exit_code = svm->vmcb->control.exit_code; |
| 3316 | 3336 | ||
| 3317 | trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); | ||
| 3318 | |||
| 3319 | if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) | 3337 | if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) |
| 3320 | vcpu->arch.cr0 = svm->vmcb->save.cr0; | 3338 | vcpu->arch.cr0 = svm->vmcb->save.cr0; |
| 3321 | if (npt_enabled) | 3339 | if (npt_enabled) |
| @@ -3335,7 +3353,8 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
| 3335 | svm->vmcb->control.exit_info_1, | 3353 | svm->vmcb->control.exit_info_1, |
| 3336 | svm->vmcb->control.exit_info_2, | 3354 | svm->vmcb->control.exit_info_2, |
| 3337 | svm->vmcb->control.exit_int_info, | 3355 | svm->vmcb->control.exit_int_info, |
| 3338 | svm->vmcb->control.exit_int_info_err); | 3356 | svm->vmcb->control.exit_int_info_err, |
| 3357 | KVM_ISA_SVM); | ||
| 3339 | 3358 | ||
| 3340 | vmexit = nested_svm_exit_special(svm); | 3359 | vmexit = nested_svm_exit_special(svm); |
| 3341 | 3360 | ||
| @@ -3768,6 +3787,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
| 3768 | vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; | 3787 | vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; |
| 3769 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; | 3788 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; |
| 3770 | 3789 | ||
| 3790 | trace_kvm_exit(svm->vmcb->control.exit_code, vcpu, KVM_ISA_SVM); | ||
| 3791 | |||
| 3771 | if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) | 3792 | if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) |
| 3772 | kvm_before_handle_nmi(&svm->vcpu); | 3793 | kvm_before_handle_nmi(&svm->vcpu); |
| 3773 | 3794 | ||
| @@ -3897,60 +3918,6 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | |||
| 3897 | } | 3918 | } |
| 3898 | } | 3919 | } |
| 3899 | 3920 | ||
| 3900 | static const struct trace_print_flags svm_exit_reasons_str[] = { | ||
| 3901 | { SVM_EXIT_READ_CR0, "read_cr0" }, | ||
| 3902 | { SVM_EXIT_READ_CR3, "read_cr3" }, | ||
| 3903 | { SVM_EXIT_READ_CR4, "read_cr4" }, | ||
| 3904 | { SVM_EXIT_READ_CR8, "read_cr8" }, | ||
| 3905 | { SVM_EXIT_WRITE_CR0, "write_cr0" }, | ||
| 3906 | { SVM_EXIT_WRITE_CR3, "write_cr3" }, | ||
| 3907 | { SVM_EXIT_WRITE_CR4, "write_cr4" }, | ||
| 3908 | { SVM_EXIT_WRITE_CR8, "write_cr8" }, | ||
| 3909 | { SVM_EXIT_READ_DR0, "read_dr0" }, | ||
| 3910 | { SVM_EXIT_READ_DR1, "read_dr1" }, | ||
| 3911 | { SVM_EXIT_READ_DR2, "read_dr2" }, | ||
| 3912 | { SVM_EXIT_READ_DR3, "read_dr3" }, | ||
| 3913 | { SVM_EXIT_WRITE_DR0, "write_dr0" }, | ||
| 3914 | { SVM_EXIT_WRITE_DR1, "write_dr1" }, | ||
| 3915 | { SVM_EXIT_WRITE_DR2, "write_dr2" }, | ||
| 3916 | { SVM_EXIT_WRITE_DR3, "write_dr3" }, | ||
| 3917 | { SVM_EXIT_WRITE_DR5, "write_dr5" }, | ||
| 3918 | { SVM_EXIT_WRITE_DR7, "write_dr7" }, | ||
| 3919 | { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, | ||
| 3920 | { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, | ||
| 3921 | { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, | ||
| 3922 | { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, | ||
| 3923 | { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, | ||
| 3924 | { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, | ||
| 3925 | { SVM_EXIT_INTR, "interrupt" }, | ||
| 3926 | { SVM_EXIT_NMI, "nmi" }, | ||
| 3927 | { SVM_EXIT_SMI, "smi" }, | ||
| 3928 | { SVM_EXIT_INIT, "init" }, | ||
| 3929 | { SVM_EXIT_VINTR, "vintr" }, | ||
| 3930 | { SVM_EXIT_CPUID, "cpuid" }, | ||
| 3931 | { SVM_EXIT_INVD, "invd" }, | ||
| 3932 | { SVM_EXIT_HLT, "hlt" }, | ||
| 3933 | { SVM_EXIT_INVLPG, "invlpg" }, | ||
| 3934 | { SVM_EXIT_INVLPGA, "invlpga" }, | ||
| 3935 | { SVM_EXIT_IOIO, "io" }, | ||
| 3936 | { SVM_EXIT_MSR, "msr" }, | ||
| 3937 | { SVM_EXIT_TASK_SWITCH, "task_switch" }, | ||
| 3938 | { SVM_EXIT_SHUTDOWN, "shutdown" }, | ||
| 3939 | { SVM_EXIT_VMRUN, "vmrun" }, | ||
| 3940 | { SVM_EXIT_VMMCALL, "hypercall" }, | ||
| 3941 | { SVM_EXIT_VMLOAD, "vmload" }, | ||
| 3942 | { SVM_EXIT_VMSAVE, "vmsave" }, | ||
| 3943 | { SVM_EXIT_STGI, "stgi" }, | ||
| 3944 | { SVM_EXIT_CLGI, "clgi" }, | ||
| 3945 | { SVM_EXIT_SKINIT, "skinit" }, | ||
| 3946 | { SVM_EXIT_WBINVD, "wbinvd" }, | ||
| 3947 | { SVM_EXIT_MONITOR, "monitor" }, | ||
| 3948 | { SVM_EXIT_MWAIT, "mwait" }, | ||
| 3949 | { SVM_EXIT_XSETBV, "xsetbv" }, | ||
| 3950 | { SVM_EXIT_NPF, "npf" }, | ||
| 3951 | { -1, NULL } | ||
| 3952 | }; | ||
| 3953 | |||
| 3954 | static int svm_get_lpage_level(void) | 3921 | static int svm_get_lpage_level(void) |
| 3955 | { | 3922 | { |
| 3956 | return PT_PDPE_LEVEL; | 3923 | return PT_PDPE_LEVEL; |
| @@ -4223,7 +4190,6 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
| 4223 | .get_mt_mask = svm_get_mt_mask, | 4190 | .get_mt_mask = svm_get_mt_mask, |
| 4224 | 4191 | ||
| 4225 | .get_exit_info = svm_get_exit_info, | 4192 | .get_exit_info = svm_get_exit_info, |
| 4226 | .exit_reasons_str = svm_exit_reasons_str, | ||
| 4227 | 4193 | ||
| 4228 | .get_lpage_level = svm_get_lpage_level, | 4194 | .get_lpage_level = svm_get_lpage_level, |
| 4229 | 4195 | ||
| @@ -4239,6 +4205,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
| 4239 | .write_tsc_offset = svm_write_tsc_offset, | 4205 | .write_tsc_offset = svm_write_tsc_offset, |
| 4240 | .adjust_tsc_offset = svm_adjust_tsc_offset, | 4206 | .adjust_tsc_offset = svm_adjust_tsc_offset, |
| 4241 | .compute_tsc_offset = svm_compute_tsc_offset, | 4207 | .compute_tsc_offset = svm_compute_tsc_offset, |
| 4208 | .read_l1_tsc = svm_read_l1_tsc, | ||
| 4242 | 4209 | ||
| 4243 | .set_tdp_cr3 = set_tdp_cr3, | 4210 | .set_tdp_cr3 = set_tdp_cr3, |
| 4244 | 4211 | ||
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 3ff898c104f7..911d2641f14c 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
| @@ -2,6 +2,8 @@ | |||
| 2 | #define _TRACE_KVM_H | 2 | #define _TRACE_KVM_H |
| 3 | 3 | ||
| 4 | #include <linux/tracepoint.h> | 4 | #include <linux/tracepoint.h> |
| 5 | #include <asm/vmx.h> | ||
| 6 | #include <asm/svm.h> | ||
| 5 | 7 | ||
| 6 | #undef TRACE_SYSTEM | 8 | #undef TRACE_SYSTEM |
| 7 | #define TRACE_SYSTEM kvm | 9 | #define TRACE_SYSTEM kvm |
| @@ -181,6 +183,95 @@ TRACE_EVENT(kvm_apic, | |||
| 181 | #define KVM_ISA_VMX 1 | 183 | #define KVM_ISA_VMX 1 |
| 182 | #define KVM_ISA_SVM 2 | 184 | #define KVM_ISA_SVM 2 |
| 183 | 185 | ||
| 186 | #define VMX_EXIT_REASONS \ | ||
| 187 | { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ | ||
| 188 | { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ | ||
| 189 | { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ | ||
| 190 | { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \ | ||
| 191 | { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ | ||
| 192 | { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ | ||
| 193 | { EXIT_REASON_CPUID, "CPUID" }, \ | ||
| 194 | { EXIT_REASON_HLT, "HLT" }, \ | ||
| 195 | { EXIT_REASON_INVLPG, "INVLPG" }, \ | ||
| 196 | { EXIT_REASON_RDPMC, "RDPMC" }, \ | ||
| 197 | { EXIT_REASON_RDTSC, "RDTSC" }, \ | ||
| 198 | { EXIT_REASON_VMCALL, "VMCALL" }, \ | ||
| 199 | { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \ | ||
| 200 | { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \ | ||
| 201 | { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \ | ||
| 202 | { EXIT_REASON_VMPTRST, "VMPTRST" }, \ | ||
| 203 | { EXIT_REASON_VMREAD, "VMREAD" }, \ | ||
| 204 | { EXIT_REASON_VMRESUME, "VMRESUME" }, \ | ||
| 205 | { EXIT_REASON_VMWRITE, "VMWRITE" }, \ | ||
| 206 | { EXIT_REASON_VMOFF, "VMOFF" }, \ | ||
| 207 | { EXIT_REASON_VMON, "VMON" }, \ | ||
| 208 | { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \ | ||
| 209 | { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \ | ||
| 210 | { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \ | ||
| 211 | { EXIT_REASON_MSR_READ, "MSR_READ" }, \ | ||
| 212 | { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ | ||
| 213 | { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ | ||
| 214 | { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ | ||
| 215 | { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \ | ||
| 216 | { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ | ||
| 217 | { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ | ||
| 218 | { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ | ||
| 219 | { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ | ||
| 220 | { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ | ||
| 221 | { EXIT_REASON_WBINVD, "WBINVD" } | ||
| 222 | |||
| 223 | #define SVM_EXIT_REASONS \ | ||
| 224 | { SVM_EXIT_READ_CR0, "read_cr0" }, \ | ||
| 225 | { SVM_EXIT_READ_CR3, "read_cr3" }, \ | ||
| 226 | { SVM_EXIT_READ_CR4, "read_cr4" }, \ | ||
| 227 | { SVM_EXIT_READ_CR8, "read_cr8" }, \ | ||
| 228 | { SVM_EXIT_WRITE_CR0, "write_cr0" }, \ | ||
| 229 | { SVM_EXIT_WRITE_CR3, "write_cr3" }, \ | ||
| 230 | { SVM_EXIT_WRITE_CR4, "write_cr4" }, \ | ||
| 231 | { SVM_EXIT_WRITE_CR8, "write_cr8" }, \ | ||
| 232 | { SVM_EXIT_READ_DR0, "read_dr0" }, \ | ||
| 233 | { SVM_EXIT_READ_DR1, "read_dr1" }, \ | ||
| 234 | { SVM_EXIT_READ_DR2, "read_dr2" }, \ | ||
| 235 | { SVM_EXIT_READ_DR3, "read_dr3" }, \ | ||
| 236 | { SVM_EXIT_WRITE_DR0, "write_dr0" }, \ | ||
| 237 | { SVM_EXIT_WRITE_DR1, "write_dr1" }, \ | ||
| 238 | { SVM_EXIT_WRITE_DR2, "write_dr2" }, \ | ||
| 239 | { SVM_EXIT_WRITE_DR3, "write_dr3" }, \ | ||
| 240 | { SVM_EXIT_WRITE_DR5, "write_dr5" }, \ | ||
| 241 | { SVM_EXIT_WRITE_DR7, "write_dr7" }, \ | ||
| 242 | { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \ | ||
| 243 | { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \ | ||
| 244 | { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ | ||
| 245 | { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ | ||
| 246 | { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ | ||
| 247 | { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ | ||
| 248 | { SVM_EXIT_INTR, "interrupt" }, \ | ||
| 249 | { SVM_EXIT_NMI, "nmi" }, \ | ||
| 250 | { SVM_EXIT_SMI, "smi" }, \ | ||
| 251 | { SVM_EXIT_INIT, "init" }, \ | ||
| 252 | { SVM_EXIT_VINTR, "vintr" }, \ | ||
| 253 | { SVM_EXIT_CPUID, "cpuid" }, \ | ||
| 254 | { SVM_EXIT_INVD, "invd" }, \ | ||
| 255 | { SVM_EXIT_HLT, "hlt" }, \ | ||
| 256 | { SVM_EXIT_INVLPG, "invlpg" }, \ | ||
| 257 | { SVM_EXIT_INVLPGA, "invlpga" }, \ | ||
| 258 | { SVM_EXIT_IOIO, "io" }, \ | ||
| 259 | { SVM_EXIT_MSR, "msr" }, \ | ||
| 260 | { SVM_EXIT_TASK_SWITCH, "task_switch" }, \ | ||
| 261 | { SVM_EXIT_SHUTDOWN, "shutdown" }, \ | ||
| 262 | { SVM_EXIT_VMRUN, "vmrun" }, \ | ||
| 263 | { SVM_EXIT_VMMCALL, "hypercall" }, \ | ||
| 264 | { SVM_EXIT_VMLOAD, "vmload" }, \ | ||
| 265 | { SVM_EXIT_VMSAVE, "vmsave" }, \ | ||
| 266 | { SVM_EXIT_STGI, "stgi" }, \ | ||
| 267 | { SVM_EXIT_CLGI, "clgi" }, \ | ||
| 268 | { SVM_EXIT_SKINIT, "skinit" }, \ | ||
| 269 | { SVM_EXIT_WBINVD, "wbinvd" }, \ | ||
| 270 | { SVM_EXIT_MONITOR, "monitor" }, \ | ||
| 271 | { SVM_EXIT_MWAIT, "mwait" }, \ | ||
| 272 | { SVM_EXIT_XSETBV, "xsetbv" }, \ | ||
| 273 | { SVM_EXIT_NPF, "npf" } | ||
| 274 | |||
| 184 | /* | 275 | /* |
| 185 | * Tracepoint for kvm guest exit: | 276 | * Tracepoint for kvm guest exit: |
| 186 | */ | 277 | */ |
| @@ -205,8 +296,9 @@ TRACE_EVENT(kvm_exit, | |||
| 205 | ), | 296 | ), |
| 206 | 297 | ||
| 207 | TP_printk("reason %s rip 0x%lx info %llx %llx", | 298 | TP_printk("reason %s rip 0x%lx info %llx %llx", |
| 208 | ftrace_print_symbols_seq(p, __entry->exit_reason, | 299 | (__entry->isa == KVM_ISA_VMX) ? |
| 209 | kvm_x86_ops->exit_reasons_str), | 300 | __print_symbolic(__entry->exit_reason, VMX_EXIT_REASONS) : |
| 301 | __print_symbolic(__entry->exit_reason, SVM_EXIT_REASONS), | ||
| 210 | __entry->guest_rip, __entry->info1, __entry->info2) | 302 | __entry->guest_rip, __entry->info1, __entry->info2) |
| 211 | ); | 303 | ); |
| 212 | 304 | ||
| @@ -486,9 +578,9 @@ TRACE_EVENT(kvm_nested_intercepts, | |||
| 486 | TRACE_EVENT(kvm_nested_vmexit, | 578 | TRACE_EVENT(kvm_nested_vmexit, |
| 487 | TP_PROTO(__u64 rip, __u32 exit_code, | 579 | TP_PROTO(__u64 rip, __u32 exit_code, |
| 488 | __u64 exit_info1, __u64 exit_info2, | 580 | __u64 exit_info1, __u64 exit_info2, |
| 489 | __u32 exit_int_info, __u32 exit_int_info_err), | 581 | __u32 exit_int_info, __u32 exit_int_info_err, __u32 isa), |
| 490 | TP_ARGS(rip, exit_code, exit_info1, exit_info2, | 582 | TP_ARGS(rip, exit_code, exit_info1, exit_info2, |
| 491 | exit_int_info, exit_int_info_err), | 583 | exit_int_info, exit_int_info_err, isa), |
| 492 | 584 | ||
| 493 | TP_STRUCT__entry( | 585 | TP_STRUCT__entry( |
| 494 | __field( __u64, rip ) | 586 | __field( __u64, rip ) |
| @@ -497,6 +589,7 @@ TRACE_EVENT(kvm_nested_vmexit, | |||
| 497 | __field( __u64, exit_info2 ) | 589 | __field( __u64, exit_info2 ) |
| 498 | __field( __u32, exit_int_info ) | 590 | __field( __u32, exit_int_info ) |
| 499 | __field( __u32, exit_int_info_err ) | 591 | __field( __u32, exit_int_info_err ) |
| 592 | __field( __u32, isa ) | ||
| 500 | ), | 593 | ), |
| 501 | 594 | ||
| 502 | TP_fast_assign( | 595 | TP_fast_assign( |
| @@ -506,12 +599,14 @@ TRACE_EVENT(kvm_nested_vmexit, | |||
| 506 | __entry->exit_info2 = exit_info2; | 599 | __entry->exit_info2 = exit_info2; |
| 507 | __entry->exit_int_info = exit_int_info; | 600 | __entry->exit_int_info = exit_int_info; |
| 508 | __entry->exit_int_info_err = exit_int_info_err; | 601 | __entry->exit_int_info_err = exit_int_info_err; |
| 602 | __entry->isa = isa; | ||
| 509 | ), | 603 | ), |
| 510 | TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " | 604 | TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " |
| 511 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", | 605 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", |
| 512 | __entry->rip, | 606 | __entry->rip, |
| 513 | ftrace_print_symbols_seq(p, __entry->exit_code, | 607 | (__entry->isa == KVM_ISA_VMX) ? |
| 514 | kvm_x86_ops->exit_reasons_str), | 608 | __print_symbolic(__entry->exit_code, VMX_EXIT_REASONS) : |
| 609 | __print_symbolic(__entry->exit_code, SVM_EXIT_REASONS), | ||
| 515 | __entry->exit_info1, __entry->exit_info2, | 610 | __entry->exit_info1, __entry->exit_info2, |
| 516 | __entry->exit_int_info, __entry->exit_int_info_err) | 611 | __entry->exit_int_info, __entry->exit_int_info_err) |
| 517 | ); | 612 | ); |
| @@ -522,9 +617,9 @@ TRACE_EVENT(kvm_nested_vmexit, | |||
| 522 | TRACE_EVENT(kvm_nested_vmexit_inject, | 617 | TRACE_EVENT(kvm_nested_vmexit_inject, |
| 523 | TP_PROTO(__u32 exit_code, | 618 | TP_PROTO(__u32 exit_code, |
| 524 | __u64 exit_info1, __u64 exit_info2, | 619 | __u64 exit_info1, __u64 exit_info2, |
| 525 | __u32 exit_int_info, __u32 exit_int_info_err), | 620 | __u32 exit_int_info, __u32 exit_int_info_err, __u32 isa), |
| 526 | TP_ARGS(exit_code, exit_info1, exit_info2, | 621 | TP_ARGS(exit_code, exit_info1, exit_info2, |
| 527 | exit_int_info, exit_int_info_err), | 622 | exit_int_info, exit_int_info_err, isa), |
| 528 | 623 | ||
| 529 | TP_STRUCT__entry( | 624 | TP_STRUCT__entry( |
| 530 | __field( __u32, exit_code ) | 625 | __field( __u32, exit_code ) |
| @@ -532,6 +627,7 @@ TRACE_EVENT(kvm_nested_vmexit_inject, | |||
| 532 | __field( __u64, exit_info2 ) | 627 | __field( __u64, exit_info2 ) |
| 533 | __field( __u32, exit_int_info ) | 628 | __field( __u32, exit_int_info ) |
| 534 | __field( __u32, exit_int_info_err ) | 629 | __field( __u32, exit_int_info_err ) |
| 630 | __field( __u32, isa ) | ||
| 535 | ), | 631 | ), |
| 536 | 632 | ||
| 537 | TP_fast_assign( | 633 | TP_fast_assign( |
| @@ -540,12 +636,14 @@ TRACE_EVENT(kvm_nested_vmexit_inject, | |||
| 540 | __entry->exit_info2 = exit_info2; | 636 | __entry->exit_info2 = exit_info2; |
| 541 | __entry->exit_int_info = exit_int_info; | 637 | __entry->exit_int_info = exit_int_info; |
| 542 | __entry->exit_int_info_err = exit_int_info_err; | 638 | __entry->exit_int_info_err = exit_int_info_err; |
| 639 | __entry->isa = isa; | ||
| 543 | ), | 640 | ), |
| 544 | 641 | ||
| 545 | TP_printk("reason: %s ext_inf1: 0x%016llx " | 642 | TP_printk("reason: %s ext_inf1: 0x%016llx " |
| 546 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", | 643 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", |
| 547 | ftrace_print_symbols_seq(p, __entry->exit_code, | 644 | (__entry->isa == KVM_ISA_VMX) ? |
| 548 | kvm_x86_ops->exit_reasons_str), | 645 | __print_symbolic(__entry->exit_code, VMX_EXIT_REASONS) : |
| 646 | __print_symbolic(__entry->exit_code, SVM_EXIT_REASONS), | ||
| 549 | __entry->exit_info1, __entry->exit_info2, | 647 | __entry->exit_info1, __entry->exit_info2, |
| 550 | __entry->exit_int_info, __entry->exit_int_info_err) | 648 | __entry->exit_int_info, __entry->exit_int_info_err) |
| 551 | ); | 649 | ); |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e65a158dee64..a0d6bd9ad442 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
| @@ -71,6 +71,9 @@ module_param(vmm_exclusive, bool, S_IRUGO); | |||
| 71 | static int __read_mostly yield_on_hlt = 1; | 71 | static int __read_mostly yield_on_hlt = 1; |
| 72 | module_param(yield_on_hlt, bool, S_IRUGO); | 72 | module_param(yield_on_hlt, bool, S_IRUGO); |
| 73 | 73 | ||
| 74 | static int __read_mostly fasteoi = 1; | ||
| 75 | module_param(fasteoi, bool, S_IRUGO); | ||
| 76 | |||
| 74 | /* | 77 | /* |
| 75 | * If nested=1, nested virtualization is supported, i.e., guests may use | 78 | * If nested=1, nested virtualization is supported, i.e., guests may use |
| 76 | * VMX and be a hypervisor for its own guests. If nested=0, guests may not | 79 | * VMX and be a hypervisor for its own guests. If nested=0, guests may not |
| @@ -1748,6 +1751,21 @@ static u64 guest_read_tsc(void) | |||
| 1748 | } | 1751 | } |
| 1749 | 1752 | ||
| 1750 | /* | 1753 | /* |
| 1754 | * Like guest_read_tsc, but always returns L1's notion of the timestamp | ||
| 1755 | * counter, even if a nested guest (L2) is currently running. | ||
| 1756 | */ | ||
| 1757 | u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu) | ||
| 1758 | { | ||
| 1759 | u64 host_tsc, tsc_offset; | ||
| 1760 | |||
| 1761 | rdtscll(host_tsc); | ||
| 1762 | tsc_offset = is_guest_mode(vcpu) ? | ||
| 1763 | to_vmx(vcpu)->nested.vmcs01_tsc_offset : | ||
| 1764 | vmcs_read64(TSC_OFFSET); | ||
| 1765 | return host_tsc + tsc_offset; | ||
| 1766 | } | ||
| 1767 | |||
| 1768 | /* | ||
| 1751 | * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ | 1769 | * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ |
| 1752 | * ioctl. In this case the call-back should update internal vmx state to make | 1770 | * ioctl. In this case the call-back should update internal vmx state to make |
| 1753 | * the changes effective. | 1771 | * the changes effective. |
| @@ -1762,15 +1780,23 @@ static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) | |||
| 1762 | */ | 1780 | */ |
| 1763 | static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) | 1781 | static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) |
| 1764 | { | 1782 | { |
| 1765 | vmcs_write64(TSC_OFFSET, offset); | 1783 | if (is_guest_mode(vcpu)) { |
| 1766 | if (is_guest_mode(vcpu)) | ||
| 1767 | /* | 1784 | /* |
| 1768 | * We're here if L1 chose not to trap the TSC MSR. Since | 1785 | * We're here if L1 chose not to trap WRMSR to TSC. According |
| 1769 | * prepare_vmcs12() does not copy tsc_offset, we need to also | 1786 | * to the spec, this should set L1's TSC; The offset that L1 |
| 1770 | * set the vmcs12 field here. | 1787 | * set for L2 remains unchanged, and still needs to be added |
| 1788 | * to the newly set TSC to get L2's TSC. | ||
| 1771 | */ | 1789 | */ |
| 1772 | get_vmcs12(vcpu)->tsc_offset = offset - | 1790 | struct vmcs12 *vmcs12; |
| 1773 | to_vmx(vcpu)->nested.vmcs01_tsc_offset; | 1791 | to_vmx(vcpu)->nested.vmcs01_tsc_offset = offset; |
| 1792 | /* recalculate vmcs02.TSC_OFFSET: */ | ||
| 1793 | vmcs12 = get_vmcs12(vcpu); | ||
| 1794 | vmcs_write64(TSC_OFFSET, offset + | ||
| 1795 | (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ? | ||
| 1796 | vmcs12->tsc_offset : 0)); | ||
| 1797 | } else { | ||
| 1798 | vmcs_write64(TSC_OFFSET, offset); | ||
| 1799 | } | ||
| 1774 | } | 1800 | } |
| 1775 | 1801 | ||
| 1776 | static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) | 1802 | static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) |
| @@ -2736,8 +2762,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu) | |||
| 2736 | 2762 | ||
| 2737 | guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); | 2763 | guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); |
| 2738 | if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { | 2764 | if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { |
| 2739 | printk(KERN_DEBUG "%s: tss fixup for long mode. \n", | 2765 | pr_debug_ratelimited("%s: tss fixup for long mode. \n", |
| 2740 | __func__); | 2766 | __func__); |
| 2741 | vmcs_write32(GUEST_TR_AR_BYTES, | 2767 | vmcs_write32(GUEST_TR_AR_BYTES, |
| 2742 | (guest_tr_ar & ~AR_TYPE_MASK) | 2768 | (guest_tr_ar & ~AR_TYPE_MASK) |
| 2743 | | AR_TYPE_BUSY_64_TSS); | 2769 | | AR_TYPE_BUSY_64_TSS); |
| @@ -4115,8 +4141,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
| 4115 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); | 4141 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); |
| 4116 | if (is_page_fault(intr_info)) { | 4142 | if (is_page_fault(intr_info)) { |
| 4117 | /* EPT won't cause page fault directly */ | 4143 | /* EPT won't cause page fault directly */ |
| 4118 | if (enable_ept) | 4144 | BUG_ON(enable_ept); |
| 4119 | BUG(); | ||
| 4120 | cr2 = vmcs_readl(EXIT_QUALIFICATION); | 4145 | cr2 = vmcs_readl(EXIT_QUALIFICATION); |
| 4121 | trace_kvm_page_fault(cr2, error_code); | 4146 | trace_kvm_page_fault(cr2, error_code); |
| 4122 | 4147 | ||
| @@ -4518,6 +4543,24 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu) | |||
| 4518 | 4543 | ||
| 4519 | static int handle_apic_access(struct kvm_vcpu *vcpu) | 4544 | static int handle_apic_access(struct kvm_vcpu *vcpu) |
| 4520 | { | 4545 | { |
| 4546 | if (likely(fasteoi)) { | ||
| 4547 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | ||
| 4548 | int access_type, offset; | ||
| 4549 | |||
| 4550 | access_type = exit_qualification & APIC_ACCESS_TYPE; | ||
| 4551 | offset = exit_qualification & APIC_ACCESS_OFFSET; | ||
| 4552 | /* | ||
| 4553 | * Sane guest uses MOV to write EOI, with written value | ||
| 4554 | * not cared. So make a short-circuit here by avoiding | ||
| 4555 | * heavy instruction emulation. | ||
| 4556 | */ | ||
| 4557 | if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) && | ||
| 4558 | (offset == APIC_EOI)) { | ||
| 4559 | kvm_lapic_set_eoi(vcpu); | ||
| 4560 | skip_emulated_instruction(vcpu); | ||
| 4561 | return 1; | ||
| 4562 | } | ||
| 4563 | } | ||
| 4521 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; | 4564 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; |
| 4522 | } | 4565 | } |
| 4523 | 4566 | ||
| @@ -5591,8 +5634,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
| 5591 | return 0; | 5634 | return 0; |
| 5592 | 5635 | ||
| 5593 | if (unlikely(vmx->fail)) { | 5636 | if (unlikely(vmx->fail)) { |
| 5594 | printk(KERN_INFO "%s failed vm entry %x\n", | 5637 | pr_info_ratelimited("%s failed vm entry %x\n", __func__, |
| 5595 | __func__, vmcs_read32(VM_INSTRUCTION_ERROR)); | 5638 | vmcs_read32(VM_INSTRUCTION_ERROR)); |
| 5596 | return 1; | 5639 | return 1; |
| 5597 | } | 5640 | } |
| 5598 | 5641 | ||
| @@ -5696,8 +5739,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
| 5696 | u32 exit_reason = vmx->exit_reason; | 5739 | u32 exit_reason = vmx->exit_reason; |
| 5697 | u32 vectoring_info = vmx->idt_vectoring_info; | 5740 | u32 vectoring_info = vmx->idt_vectoring_info; |
| 5698 | 5741 | ||
| 5699 | trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); | ||
| 5700 | |||
| 5701 | /* If guest state is invalid, start emulating */ | 5742 | /* If guest state is invalid, start emulating */ |
| 5702 | if (vmx->emulation_required && emulate_invalid_guest_state) | 5743 | if (vmx->emulation_required && emulate_invalid_guest_state) |
| 5703 | return handle_invalid_guest_state(vcpu); | 5744 | return handle_invalid_guest_state(vcpu); |
| @@ -6101,6 +6142,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
| 6101 | vmx->loaded_vmcs->launched = 1; | 6142 | vmx->loaded_vmcs->launched = 1; |
| 6102 | 6143 | ||
| 6103 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); | 6144 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); |
| 6145 | trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX); | ||
| 6104 | 6146 | ||
| 6105 | vmx_complete_atomic_exit(vmx); | 6147 | vmx_complete_atomic_exit(vmx); |
| 6106 | vmx_recover_nmi_blocking(vmx); | 6148 | vmx_recover_nmi_blocking(vmx); |
| @@ -6241,49 +6283,6 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | |||
| 6241 | return ret; | 6283 | return ret; |
| 6242 | } | 6284 | } |
| 6243 | 6285 | ||
| 6244 | #define _ER(x) { EXIT_REASON_##x, #x } | ||
| 6245 | |||
| 6246 | static const struct trace_print_flags vmx_exit_reasons_str[] = { | ||
| 6247 | _ER(EXCEPTION_NMI), | ||
| 6248 | _ER(EXTERNAL_INTERRUPT), | ||
| 6249 | _ER(TRIPLE_FAULT), | ||
| 6250 | _ER(PENDING_INTERRUPT), | ||
| 6251 | _ER(NMI_WINDOW), | ||
| 6252 | _ER(TASK_SWITCH), | ||
| 6253 | _ER(CPUID), | ||
| 6254 | _ER(HLT), | ||
| 6255 | _ER(INVLPG), | ||
| 6256 | _ER(RDPMC), | ||
| 6257 | _ER(RDTSC), | ||
| 6258 | _ER(VMCALL), | ||
| 6259 | _ER(VMCLEAR), | ||
| 6260 | _ER(VMLAUNCH), | ||
| 6261 | _ER(VMPTRLD), | ||
| 6262 | _ER(VMPTRST), | ||
| 6263 | _ER(VMREAD), | ||
| 6264 | _ER(VMRESUME), | ||
| 6265 | _ER(VMWRITE), | ||
| 6266 | _ER(VMOFF), | ||
| 6267 | _ER(VMON), | ||
| 6268 | _ER(CR_ACCESS), | ||
| 6269 | _ER(DR_ACCESS), | ||
| 6270 | _ER(IO_INSTRUCTION), | ||
| 6271 | _ER(MSR_READ), | ||
| 6272 | _ER(MSR_WRITE), | ||
| 6273 | _ER(MWAIT_INSTRUCTION), | ||
| 6274 | _ER(MONITOR_INSTRUCTION), | ||
| 6275 | _ER(PAUSE_INSTRUCTION), | ||
| 6276 | _ER(MCE_DURING_VMENTRY), | ||
| 6277 | _ER(TPR_BELOW_THRESHOLD), | ||
| 6278 | _ER(APIC_ACCESS), | ||
| 6279 | _ER(EPT_VIOLATION), | ||
| 6280 | _ER(EPT_MISCONFIG), | ||
| 6281 | _ER(WBINVD), | ||
| 6282 | { -1, NULL } | ||
| 6283 | }; | ||
| 6284 | |||
| 6285 | #undef _ER | ||
| 6286 | |||
| 6287 | static int vmx_get_lpage_level(void) | 6286 | static int vmx_get_lpage_level(void) |
| 6288 | { | 6287 | { |
| 6289 | if (enable_ept && !cpu_has_vmx_ept_1g_page()) | 6288 | if (enable_ept && !cpu_has_vmx_ept_1g_page()) |
| @@ -6514,8 +6513,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
| 6514 | 6513 | ||
| 6515 | set_cr4_guest_host_mask(vmx); | 6514 | set_cr4_guest_host_mask(vmx); |
| 6516 | 6515 | ||
| 6517 | vmcs_write64(TSC_OFFSET, | 6516 | if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) |
| 6518 | vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset); | 6517 | vmcs_write64(TSC_OFFSET, |
| 6518 | vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset); | ||
| 6519 | else | ||
| 6520 | vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); | ||
| 6519 | 6521 | ||
| 6520 | if (enable_vpid) { | 6522 | if (enable_vpid) { |
| 6521 | /* | 6523 | /* |
| @@ -6610,9 +6612,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
| 6610 | if (vmcs12->vm_entry_msr_load_count > 0 || | 6612 | if (vmcs12->vm_entry_msr_load_count > 0 || |
| 6611 | vmcs12->vm_exit_msr_load_count > 0 || | 6613 | vmcs12->vm_exit_msr_load_count > 0 || |
| 6612 | vmcs12->vm_exit_msr_store_count > 0) { | 6614 | vmcs12->vm_exit_msr_store_count > 0) { |
| 6613 | if (printk_ratelimit()) | 6615 | pr_warn_ratelimited("%s: VMCS MSR_{LOAD,STORE} unsupported\n", |
| 6614 | printk(KERN_WARNING | 6616 | __func__); |
| 6615 | "%s: VMCS MSR_{LOAD,STORE} unsupported\n", __func__); | ||
| 6616 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 6617 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); |
| 6617 | return 1; | 6618 | return 1; |
| 6618 | } | 6619 | } |
| @@ -6922,7 +6923,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | |||
| 6922 | 6923 | ||
| 6923 | load_vmcs12_host_state(vcpu, vmcs12); | 6924 | load_vmcs12_host_state(vcpu, vmcs12); |
| 6924 | 6925 | ||
| 6925 | /* Update TSC_OFFSET if vmx_adjust_tsc_offset() was used while L2 ran */ | 6926 | /* Update TSC_OFFSET if TSC was changed while L2 ran */ |
| 6926 | vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); | 6927 | vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); |
| 6927 | 6928 | ||
| 6928 | /* This is needed for same reason as it was needed in prepare_vmcs02 */ | 6929 | /* This is needed for same reason as it was needed in prepare_vmcs02 */ |
| @@ -7039,7 +7040,6 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
| 7039 | .get_mt_mask = vmx_get_mt_mask, | 7040 | .get_mt_mask = vmx_get_mt_mask, |
| 7040 | 7041 | ||
| 7041 | .get_exit_info = vmx_get_exit_info, | 7042 | .get_exit_info = vmx_get_exit_info, |
| 7042 | .exit_reasons_str = vmx_exit_reasons_str, | ||
| 7043 | 7043 | ||
| 7044 | .get_lpage_level = vmx_get_lpage_level, | 7044 | .get_lpage_level = vmx_get_lpage_level, |
| 7045 | 7045 | ||
| @@ -7055,6 +7055,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
| 7055 | .write_tsc_offset = vmx_write_tsc_offset, | 7055 | .write_tsc_offset = vmx_write_tsc_offset, |
| 7056 | .adjust_tsc_offset = vmx_adjust_tsc_offset, | 7056 | .adjust_tsc_offset = vmx_adjust_tsc_offset, |
| 7057 | .compute_tsc_offset = vmx_compute_tsc_offset, | 7057 | .compute_tsc_offset = vmx_compute_tsc_offset, |
| 7058 | .read_l1_tsc = vmx_read_l1_tsc, | ||
| 7058 | 7059 | ||
| 7059 | .set_tdp_cr3 = vmx_set_cr3, | 7060 | .set_tdp_cr3 = vmx_set_cr3, |
| 7060 | 7061 | ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 84a28ea45fa4..cf269096eadf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -83,6 +83,7 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); | |||
| 83 | static void update_cr8_intercept(struct kvm_vcpu *vcpu); | 83 | static void update_cr8_intercept(struct kvm_vcpu *vcpu); |
| 84 | static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | 84 | static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, |
| 85 | struct kvm_cpuid_entry2 __user *entries); | 85 | struct kvm_cpuid_entry2 __user *entries); |
| 86 | static void process_nmi(struct kvm_vcpu *vcpu); | ||
| 86 | 87 | ||
| 87 | struct kvm_x86_ops *kvm_x86_ops; | 88 | struct kvm_x86_ops *kvm_x86_ops; |
| 88 | EXPORT_SYMBOL_GPL(kvm_x86_ops); | 89 | EXPORT_SYMBOL_GPL(kvm_x86_ops); |
| @@ -359,8 +360,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) | |||
| 359 | 360 | ||
| 360 | void kvm_inject_nmi(struct kvm_vcpu *vcpu) | 361 | void kvm_inject_nmi(struct kvm_vcpu *vcpu) |
| 361 | { | 362 | { |
| 362 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 363 | atomic_inc(&vcpu->arch.nmi_queued); |
| 363 | vcpu->arch.nmi_pending = 1; | 364 | kvm_make_request(KVM_REQ_NMI, vcpu); |
| 364 | } | 365 | } |
| 365 | EXPORT_SYMBOL_GPL(kvm_inject_nmi); | 366 | EXPORT_SYMBOL_GPL(kvm_inject_nmi); |
| 366 | 367 | ||
| @@ -599,6 +600,8 @@ static bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu) | |||
| 599 | static void update_cpuid(struct kvm_vcpu *vcpu) | 600 | static void update_cpuid(struct kvm_vcpu *vcpu) |
| 600 | { | 601 | { |
| 601 | struct kvm_cpuid_entry2 *best; | 602 | struct kvm_cpuid_entry2 *best; |
| 603 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
| 604 | u32 timer_mode_mask; | ||
| 602 | 605 | ||
| 603 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | 606 | best = kvm_find_cpuid_entry(vcpu, 1, 0); |
| 604 | if (!best) | 607 | if (!best) |
| @@ -610,6 +613,16 @@ static void update_cpuid(struct kvm_vcpu *vcpu) | |||
| 610 | if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) | 613 | if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) |
| 611 | best->ecx |= bit(X86_FEATURE_OSXSAVE); | 614 | best->ecx |= bit(X86_FEATURE_OSXSAVE); |
| 612 | } | 615 | } |
| 616 | |||
| 617 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && | ||
| 618 | best->function == 0x1) { | ||
| 619 | best->ecx |= bit(X86_FEATURE_TSC_DEADLINE_TIMER); | ||
| 620 | timer_mode_mask = 3 << 17; | ||
| 621 | } else | ||
| 622 | timer_mode_mask = 1 << 17; | ||
| 623 | |||
| 624 | if (apic) | ||
| 625 | apic->lapic_timer.timer_mode_mask = timer_mode_mask; | ||
| 613 | } | 626 | } |
| 614 | 627 | ||
| 615 | int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 628 | int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
| @@ -825,6 +838,7 @@ static u32 msrs_to_save[] = { | |||
| 825 | static unsigned num_msrs_to_save; | 838 | static unsigned num_msrs_to_save; |
| 826 | 839 | ||
| 827 | static u32 emulated_msrs[] = { | 840 | static u32 emulated_msrs[] = { |
| 841 | MSR_IA32_TSCDEADLINE, | ||
| 828 | MSR_IA32_MISC_ENABLE, | 842 | MSR_IA32_MISC_ENABLE, |
| 829 | MSR_IA32_MCG_STATUS, | 843 | MSR_IA32_MCG_STATUS, |
| 830 | MSR_IA32_MCG_CTL, | 844 | MSR_IA32_MCG_CTL, |
| @@ -1000,7 +1014,7 @@ static inline int kvm_tsc_changes_freq(void) | |||
| 1000 | return ret; | 1014 | return ret; |
| 1001 | } | 1015 | } |
| 1002 | 1016 | ||
| 1003 | static u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) | 1017 | u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) |
| 1004 | { | 1018 | { |
| 1005 | if (vcpu->arch.virtual_tsc_khz) | 1019 | if (vcpu->arch.virtual_tsc_khz) |
| 1006 | return vcpu->arch.virtual_tsc_khz; | 1020 | return vcpu->arch.virtual_tsc_khz; |
| @@ -1098,7 +1112,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
| 1098 | 1112 | ||
| 1099 | /* Keep irq disabled to prevent changes to the clock */ | 1113 | /* Keep irq disabled to prevent changes to the clock */ |
| 1100 | local_irq_save(flags); | 1114 | local_irq_save(flags); |
| 1101 | kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); | 1115 | tsc_timestamp = kvm_x86_ops->read_l1_tsc(v); |
| 1102 | kernel_ns = get_kernel_ns(); | 1116 | kernel_ns = get_kernel_ns(); |
| 1103 | this_tsc_khz = vcpu_tsc_khz(v); | 1117 | this_tsc_khz = vcpu_tsc_khz(v); |
| 1104 | if (unlikely(this_tsc_khz == 0)) { | 1118 | if (unlikely(this_tsc_khz == 0)) { |
| @@ -1564,6 +1578,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
| 1564 | break; | 1578 | break; |
| 1565 | case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: | 1579 | case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: |
| 1566 | return kvm_x2apic_msr_write(vcpu, msr, data); | 1580 | return kvm_x2apic_msr_write(vcpu, msr, data); |
| 1581 | case MSR_IA32_TSCDEADLINE: | ||
| 1582 | kvm_set_lapic_tscdeadline_msr(vcpu, data); | ||
| 1583 | break; | ||
| 1567 | case MSR_IA32_MISC_ENABLE: | 1584 | case MSR_IA32_MISC_ENABLE: |
| 1568 | vcpu->arch.ia32_misc_enable_msr = data; | 1585 | vcpu->arch.ia32_misc_enable_msr = data; |
| 1569 | break; | 1586 | break; |
| @@ -1825,6 +1842,9 @@ static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
| 1825 | return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); | 1842 | return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); |
| 1826 | case HV_X64_MSR_TPR: | 1843 | case HV_X64_MSR_TPR: |
| 1827 | return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); | 1844 | return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); |
| 1845 | case HV_X64_MSR_APIC_ASSIST_PAGE: | ||
| 1846 | data = vcpu->arch.hv_vapic; | ||
| 1847 | break; | ||
| 1828 | default: | 1848 | default: |
| 1829 | pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | 1849 | pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); |
| 1830 | return 1; | 1850 | return 1; |
| @@ -1839,7 +1859,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
| 1839 | 1859 | ||
| 1840 | switch (msr) { | 1860 | switch (msr) { |
| 1841 | case MSR_IA32_PLATFORM_ID: | 1861 | case MSR_IA32_PLATFORM_ID: |
| 1842 | case MSR_IA32_UCODE_REV: | ||
| 1843 | case MSR_IA32_EBL_CR_POWERON: | 1862 | case MSR_IA32_EBL_CR_POWERON: |
| 1844 | case MSR_IA32_DEBUGCTLMSR: | 1863 | case MSR_IA32_DEBUGCTLMSR: |
| 1845 | case MSR_IA32_LASTBRANCHFROMIP: | 1864 | case MSR_IA32_LASTBRANCHFROMIP: |
| @@ -1860,6 +1879,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
| 1860 | case MSR_FAM10H_MMIO_CONF_BASE: | 1879 | case MSR_FAM10H_MMIO_CONF_BASE: |
| 1861 | data = 0; | 1880 | data = 0; |
| 1862 | break; | 1881 | break; |
| 1882 | case MSR_IA32_UCODE_REV: | ||
| 1883 | data = 0x100000000ULL; | ||
| 1884 | break; | ||
| 1863 | case MSR_MTRRcap: | 1885 | case MSR_MTRRcap: |
| 1864 | data = 0x500 | KVM_NR_VAR_MTRR; | 1886 | data = 0x500 | KVM_NR_VAR_MTRR; |
| 1865 | break; | 1887 | break; |
| @@ -1888,6 +1910,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
| 1888 | case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: | 1910 | case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: |
| 1889 | return kvm_x2apic_msr_read(vcpu, msr, pdata); | 1911 | return kvm_x2apic_msr_read(vcpu, msr, pdata); |
| 1890 | break; | 1912 | break; |
| 1913 | case MSR_IA32_TSCDEADLINE: | ||
| 1914 | data = kvm_get_lapic_tscdeadline_msr(vcpu); | ||
| 1915 | break; | ||
| 1891 | case MSR_IA32_MISC_ENABLE: | 1916 | case MSR_IA32_MISC_ENABLE: |
| 1892 | data = vcpu->arch.ia32_misc_enable_msr; | 1917 | data = vcpu->arch.ia32_misc_enable_msr; |
| 1893 | break; | 1918 | break; |
| @@ -2086,6 +2111,9 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 2086 | r = !kvm_x86_ops->cpu_has_accelerated_tpr(); | 2111 | r = !kvm_x86_ops->cpu_has_accelerated_tpr(); |
| 2087 | break; | 2112 | break; |
| 2088 | case KVM_CAP_NR_VCPUS: | 2113 | case KVM_CAP_NR_VCPUS: |
| 2114 | r = KVM_SOFT_MAX_VCPUS; | ||
| 2115 | break; | ||
| 2116 | case KVM_CAP_MAX_VCPUS: | ||
| 2089 | r = KVM_MAX_VCPUS; | 2117 | r = KVM_MAX_VCPUS; |
| 2090 | break; | 2118 | break; |
| 2091 | case KVM_CAP_NR_MEMSLOTS: | 2119 | case KVM_CAP_NR_MEMSLOTS: |
| @@ -2210,7 +2238,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
| 2210 | s64 tsc_delta; | 2238 | s64 tsc_delta; |
| 2211 | u64 tsc; | 2239 | u64 tsc; |
| 2212 | 2240 | ||
| 2213 | kvm_get_msr(vcpu, MSR_IA32_TSC, &tsc); | 2241 | tsc = kvm_x86_ops->read_l1_tsc(vcpu); |
| 2214 | tsc_delta = !vcpu->arch.last_guest_tsc ? 0 : | 2242 | tsc_delta = !vcpu->arch.last_guest_tsc ? 0 : |
| 2215 | tsc - vcpu->arch.last_guest_tsc; | 2243 | tsc - vcpu->arch.last_guest_tsc; |
| 2216 | 2244 | ||
| @@ -2234,7 +2262,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | |||
| 2234 | { | 2262 | { |
| 2235 | kvm_x86_ops->vcpu_put(vcpu); | 2263 | kvm_x86_ops->vcpu_put(vcpu); |
| 2236 | kvm_put_guest_fpu(vcpu); | 2264 | kvm_put_guest_fpu(vcpu); |
| 2237 | kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); | 2265 | vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); |
| 2238 | } | 2266 | } |
| 2239 | 2267 | ||
| 2240 | static int is_efer_nx(void) | 2268 | static int is_efer_nx(void) |
| @@ -2819,6 +2847,7 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, | |||
| 2819 | static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | 2847 | static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, |
| 2820 | struct kvm_vcpu_events *events) | 2848 | struct kvm_vcpu_events *events) |
| 2821 | { | 2849 | { |
| 2850 | process_nmi(vcpu); | ||
| 2822 | events->exception.injected = | 2851 | events->exception.injected = |
| 2823 | vcpu->arch.exception.pending && | 2852 | vcpu->arch.exception.pending && |
| 2824 | !kvm_exception_is_soft(vcpu->arch.exception.nr); | 2853 | !kvm_exception_is_soft(vcpu->arch.exception.nr); |
| @@ -2836,7 +2865,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
| 2836 | KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI); | 2865 | KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI); |
| 2837 | 2866 | ||
| 2838 | events->nmi.injected = vcpu->arch.nmi_injected; | 2867 | events->nmi.injected = vcpu->arch.nmi_injected; |
| 2839 | events->nmi.pending = vcpu->arch.nmi_pending; | 2868 | events->nmi.pending = vcpu->arch.nmi_pending != 0; |
| 2840 | events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); | 2869 | events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); |
| 2841 | events->nmi.pad = 0; | 2870 | events->nmi.pad = 0; |
| 2842 | 2871 | ||
| @@ -2856,6 +2885,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
| 2856 | | KVM_VCPUEVENT_VALID_SHADOW)) | 2885 | | KVM_VCPUEVENT_VALID_SHADOW)) |
| 2857 | return -EINVAL; | 2886 | return -EINVAL; |
| 2858 | 2887 | ||
| 2888 | process_nmi(vcpu); | ||
| 2859 | vcpu->arch.exception.pending = events->exception.injected; | 2889 | vcpu->arch.exception.pending = events->exception.injected; |
| 2860 | vcpu->arch.exception.nr = events->exception.nr; | 2890 | vcpu->arch.exception.nr = events->exception.nr; |
| 2861 | vcpu->arch.exception.has_error_code = events->exception.has_error_code; | 2891 | vcpu->arch.exception.has_error_code = events->exception.has_error_code; |
| @@ -3556,7 +3586,11 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 3556 | if (r) { | 3586 | if (r) { |
| 3557 | mutex_lock(&kvm->slots_lock); | 3587 | mutex_lock(&kvm->slots_lock); |
| 3558 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, | 3588 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, |
| 3559 | &vpic->dev); | 3589 | &vpic->dev_master); |
| 3590 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, | ||
| 3591 | &vpic->dev_slave); | ||
| 3592 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, | ||
| 3593 | &vpic->dev_eclr); | ||
| 3560 | mutex_unlock(&kvm->slots_lock); | 3594 | mutex_unlock(&kvm->slots_lock); |
| 3561 | kfree(vpic); | 3595 | kfree(vpic); |
| 3562 | goto create_irqchip_unlock; | 3596 | goto create_irqchip_unlock; |
| @@ -4045,84 +4079,105 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, | |||
| 4045 | return 0; | 4079 | return 0; |
| 4046 | } | 4080 | } |
| 4047 | 4081 | ||
| 4048 | static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, | 4082 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, |
| 4049 | unsigned long addr, | 4083 | const void *val, int bytes) |
| 4050 | void *val, | ||
| 4051 | unsigned int bytes, | ||
| 4052 | struct x86_exception *exception) | ||
| 4053 | { | 4084 | { |
| 4054 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | 4085 | int ret; |
| 4055 | gpa_t gpa; | ||
| 4056 | int handled, ret; | ||
| 4057 | 4086 | ||
| 4087 | ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); | ||
| 4088 | if (ret < 0) | ||
| 4089 | return 0; | ||
| 4090 | kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1); | ||
| 4091 | return 1; | ||
| 4092 | } | ||
| 4093 | |||
| 4094 | struct read_write_emulator_ops { | ||
| 4095 | int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val, | ||
| 4096 | int bytes); | ||
| 4097 | int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa, | ||
| 4098 | void *val, int bytes); | ||
| 4099 | int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa, | ||
| 4100 | int bytes, void *val); | ||
| 4101 | int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa, | ||
| 4102 | void *val, int bytes); | ||
| 4103 | bool write; | ||
| 4104 | }; | ||
| 4105 | |||
| 4106 | static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes) | ||
| 4107 | { | ||
| 4058 | if (vcpu->mmio_read_completed) { | 4108 | if (vcpu->mmio_read_completed) { |
| 4059 | memcpy(val, vcpu->mmio_data, bytes); | 4109 | memcpy(val, vcpu->mmio_data, bytes); |
| 4060 | trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, | 4110 | trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, |
| 4061 | vcpu->mmio_phys_addr, *(u64 *)val); | 4111 | vcpu->mmio_phys_addr, *(u64 *)val); |
| 4062 | vcpu->mmio_read_completed = 0; | 4112 | vcpu->mmio_read_completed = 0; |
| 4063 | return X86EMUL_CONTINUE; | 4113 | return 1; |
| 4064 | } | 4114 | } |
| 4065 | 4115 | ||
| 4066 | ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, false); | 4116 | return 0; |
| 4067 | 4117 | } | |
| 4068 | if (ret < 0) | ||
| 4069 | return X86EMUL_PROPAGATE_FAULT; | ||
| 4070 | |||
| 4071 | if (ret) | ||
| 4072 | goto mmio; | ||
| 4073 | |||
| 4074 | if (kvm_read_guest_virt(ctxt, addr, val, bytes, exception) | ||
| 4075 | == X86EMUL_CONTINUE) | ||
| 4076 | return X86EMUL_CONTINUE; | ||
| 4077 | 4118 | ||
| 4078 | mmio: | 4119 | static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, |
| 4079 | /* | 4120 | void *val, int bytes) |
| 4080 | * Is this MMIO handled locally? | 4121 | { |
| 4081 | */ | 4122 | return !kvm_read_guest(vcpu->kvm, gpa, val, bytes); |
| 4082 | handled = vcpu_mmio_read(vcpu, gpa, bytes, val); | 4123 | } |
| 4083 | 4124 | ||
| 4084 | if (handled == bytes) | 4125 | static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, |
| 4085 | return X86EMUL_CONTINUE; | 4126 | void *val, int bytes) |
| 4127 | { | ||
| 4128 | return emulator_write_phys(vcpu, gpa, val, bytes); | ||
| 4129 | } | ||
| 4086 | 4130 | ||
| 4087 | gpa += handled; | 4131 | static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val) |
| 4088 | bytes -= handled; | 4132 | { |
| 4089 | val += handled; | 4133 | trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val); |
| 4134 | return vcpu_mmio_write(vcpu, gpa, bytes, val); | ||
| 4135 | } | ||
| 4090 | 4136 | ||
| 4137 | static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, | ||
| 4138 | void *val, int bytes) | ||
| 4139 | { | ||
| 4091 | trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); | 4140 | trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); |
| 4092 | |||
| 4093 | vcpu->mmio_needed = 1; | ||
| 4094 | vcpu->run->exit_reason = KVM_EXIT_MMIO; | ||
| 4095 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; | ||
| 4096 | vcpu->mmio_size = bytes; | ||
| 4097 | vcpu->run->mmio.len = min(vcpu->mmio_size, 8); | ||
| 4098 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0; | ||
| 4099 | vcpu->mmio_index = 0; | ||
| 4100 | |||
| 4101 | return X86EMUL_IO_NEEDED; | 4141 | return X86EMUL_IO_NEEDED; |
| 4102 | } | 4142 | } |
| 4103 | 4143 | ||
| 4104 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | 4144 | static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, |
| 4105 | const void *val, int bytes) | 4145 | void *val, int bytes) |
| 4106 | { | 4146 | { |
| 4107 | int ret; | 4147 | memcpy(vcpu->mmio_data, val, bytes); |
| 4108 | 4148 | memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8); | |
| 4109 | ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); | 4149 | return X86EMUL_CONTINUE; |
| 4110 | if (ret < 0) | ||
| 4111 | return 0; | ||
| 4112 | kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1); | ||
| 4113 | return 1; | ||
| 4114 | } | 4150 | } |
| 4115 | 4151 | ||
| 4116 | static int emulator_write_emulated_onepage(unsigned long addr, | 4152 | static struct read_write_emulator_ops read_emultor = { |
| 4117 | const void *val, | 4153 | .read_write_prepare = read_prepare, |
| 4118 | unsigned int bytes, | 4154 | .read_write_emulate = read_emulate, |
| 4119 | struct x86_exception *exception, | 4155 | .read_write_mmio = vcpu_mmio_read, |
| 4120 | struct kvm_vcpu *vcpu) | 4156 | .read_write_exit_mmio = read_exit_mmio, |
| 4157 | }; | ||
| 4158 | |||
| 4159 | static struct read_write_emulator_ops write_emultor = { | ||
| 4160 | .read_write_emulate = write_emulate, | ||
| 4161 | .read_write_mmio = write_mmio, | ||
| 4162 | .read_write_exit_mmio = write_exit_mmio, | ||
| 4163 | .write = true, | ||
| 4164 | }; | ||
| 4165 | |||
| 4166 | static int emulator_read_write_onepage(unsigned long addr, void *val, | ||
| 4167 | unsigned int bytes, | ||
| 4168 | struct x86_exception *exception, | ||
| 4169 | struct kvm_vcpu *vcpu, | ||
| 4170 | struct read_write_emulator_ops *ops) | ||
| 4121 | { | 4171 | { |
| 4122 | gpa_t gpa; | 4172 | gpa_t gpa; |
| 4123 | int handled, ret; | 4173 | int handled, ret; |
| 4174 | bool write = ops->write; | ||
| 4124 | 4175 | ||
| 4125 | ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, true); | 4176 | if (ops->read_write_prepare && |
| 4177 | ops->read_write_prepare(vcpu, val, bytes)) | ||
| 4178 | return X86EMUL_CONTINUE; | ||
| 4179 | |||
| 4180 | ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write); | ||
| 4126 | 4181 | ||
| 4127 | if (ret < 0) | 4182 | if (ret < 0) |
| 4128 | return X86EMUL_PROPAGATE_FAULT; | 4183 | return X86EMUL_PROPAGATE_FAULT; |
| @@ -4131,15 +4186,14 @@ static int emulator_write_emulated_onepage(unsigned long addr, | |||
| 4131 | if (ret) | 4186 | if (ret) |
| 4132 | goto mmio; | 4187 | goto mmio; |
| 4133 | 4188 | ||
| 4134 | if (emulator_write_phys(vcpu, gpa, val, bytes)) | 4189 | if (ops->read_write_emulate(vcpu, gpa, val, bytes)) |
| 4135 | return X86EMUL_CONTINUE; | 4190 | return X86EMUL_CONTINUE; |
| 4136 | 4191 | ||
| 4137 | mmio: | 4192 | mmio: |
| 4138 | trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val); | ||
| 4139 | /* | 4193 | /* |
| 4140 | * Is this MMIO handled locally? | 4194 | * Is this MMIO handled locally? |
| 4141 | */ | 4195 | */ |
| 4142 | handled = vcpu_mmio_write(vcpu, gpa, bytes, val); | 4196 | handled = ops->read_write_mmio(vcpu, gpa, bytes, val); |
| 4143 | if (handled == bytes) | 4197 | if (handled == bytes) |
| 4144 | return X86EMUL_CONTINUE; | 4198 | return X86EMUL_CONTINUE; |
| 4145 | 4199 | ||
| @@ -4148,23 +4202,20 @@ mmio: | |||
| 4148 | val += handled; | 4202 | val += handled; |
| 4149 | 4203 | ||
| 4150 | vcpu->mmio_needed = 1; | 4204 | vcpu->mmio_needed = 1; |
| 4151 | memcpy(vcpu->mmio_data, val, bytes); | ||
| 4152 | vcpu->run->exit_reason = KVM_EXIT_MMIO; | 4205 | vcpu->run->exit_reason = KVM_EXIT_MMIO; |
| 4153 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; | 4206 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; |
| 4154 | vcpu->mmio_size = bytes; | 4207 | vcpu->mmio_size = bytes; |
| 4155 | vcpu->run->mmio.len = min(vcpu->mmio_size, 8); | 4208 | vcpu->run->mmio.len = min(vcpu->mmio_size, 8); |
| 4156 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1; | 4209 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = write; |
| 4157 | memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8); | ||
| 4158 | vcpu->mmio_index = 0; | 4210 | vcpu->mmio_index = 0; |
| 4159 | 4211 | ||
| 4160 | return X86EMUL_CONTINUE; | 4212 | return ops->read_write_exit_mmio(vcpu, gpa, val, bytes); |
| 4161 | } | 4213 | } |
| 4162 | 4214 | ||
| 4163 | int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, | 4215 | int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, |
| 4164 | unsigned long addr, | 4216 | void *val, unsigned int bytes, |
| 4165 | const void *val, | 4217 | struct x86_exception *exception, |
| 4166 | unsigned int bytes, | 4218 | struct read_write_emulator_ops *ops) |
| 4167 | struct x86_exception *exception) | ||
| 4168 | { | 4219 | { |
| 4169 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | 4220 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); |
| 4170 | 4221 | ||
| @@ -4173,16 +4224,38 @@ int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, | |||
| 4173 | int rc, now; | 4224 | int rc, now; |
| 4174 | 4225 | ||
| 4175 | now = -addr & ~PAGE_MASK; | 4226 | now = -addr & ~PAGE_MASK; |
| 4176 | rc = emulator_write_emulated_onepage(addr, val, now, exception, | 4227 | rc = emulator_read_write_onepage(addr, val, now, exception, |
| 4177 | vcpu); | 4228 | vcpu, ops); |
| 4229 | |||
| 4178 | if (rc != X86EMUL_CONTINUE) | 4230 | if (rc != X86EMUL_CONTINUE) |
| 4179 | return rc; | 4231 | return rc; |
| 4180 | addr += now; | 4232 | addr += now; |
| 4181 | val += now; | 4233 | val += now; |
| 4182 | bytes -= now; | 4234 | bytes -= now; |
| 4183 | } | 4235 | } |
| 4184 | return emulator_write_emulated_onepage(addr, val, bytes, exception, | 4236 | |
| 4185 | vcpu); | 4237 | return emulator_read_write_onepage(addr, val, bytes, exception, |
| 4238 | vcpu, ops); | ||
| 4239 | } | ||
| 4240 | |||
| 4241 | static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, | ||
| 4242 | unsigned long addr, | ||
| 4243 | void *val, | ||
| 4244 | unsigned int bytes, | ||
| 4245 | struct x86_exception *exception) | ||
| 4246 | { | ||
| 4247 | return emulator_read_write(ctxt, addr, val, bytes, | ||
| 4248 | exception, &read_emultor); | ||
| 4249 | } | ||
| 4250 | |||
| 4251 | int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, | ||
| 4252 | unsigned long addr, | ||
| 4253 | const void *val, | ||
| 4254 | unsigned int bytes, | ||
| 4255 | struct x86_exception *exception) | ||
| 4256 | { | ||
| 4257 | return emulator_read_write(ctxt, addr, (void *)val, bytes, | ||
| 4258 | exception, &write_emultor); | ||
| 4186 | } | 4259 | } |
| 4187 | 4260 | ||
| 4188 | #define CMPXCHG_TYPE(t, ptr, old, new) \ | 4261 | #define CMPXCHG_TYPE(t, ptr, old, new) \ |
| @@ -4712,7 +4785,7 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) | |||
| 4712 | kvm_set_rflags(vcpu, ctxt->eflags); | 4785 | kvm_set_rflags(vcpu, ctxt->eflags); |
| 4713 | 4786 | ||
| 4714 | if (irq == NMI_VECTOR) | 4787 | if (irq == NMI_VECTOR) |
| 4715 | vcpu->arch.nmi_pending = false; | 4788 | vcpu->arch.nmi_pending = 0; |
| 4716 | else | 4789 | else |
| 4717 | vcpu->arch.interrupt.pending = false; | 4790 | vcpu->arch.interrupt.pending = false; |
| 4718 | 4791 | ||
| @@ -4788,7 +4861,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 4788 | 4861 | ||
| 4789 | trace_kvm_emulate_insn_start(vcpu); | 4862 | trace_kvm_emulate_insn_start(vcpu); |
| 4790 | ++vcpu->stat.insn_emulation; | 4863 | ++vcpu->stat.insn_emulation; |
| 4791 | if (r) { | 4864 | if (r != EMULATION_OK) { |
| 4792 | if (emulation_type & EMULTYPE_TRAP_UD) | 4865 | if (emulation_type & EMULTYPE_TRAP_UD) |
| 4793 | return EMULATE_FAIL; | 4866 | return EMULATE_FAIL; |
| 4794 | if (reexecute_instruction(vcpu, cr2)) | 4867 | if (reexecute_instruction(vcpu, cr2)) |
| @@ -5521,7 +5594,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) | |||
| 5521 | /* try to inject new event if pending */ | 5594 | /* try to inject new event if pending */ |
| 5522 | if (vcpu->arch.nmi_pending) { | 5595 | if (vcpu->arch.nmi_pending) { |
| 5523 | if (kvm_x86_ops->nmi_allowed(vcpu)) { | 5596 | if (kvm_x86_ops->nmi_allowed(vcpu)) { |
| 5524 | vcpu->arch.nmi_pending = false; | 5597 | --vcpu->arch.nmi_pending; |
| 5525 | vcpu->arch.nmi_injected = true; | 5598 | vcpu->arch.nmi_injected = true; |
| 5526 | kvm_x86_ops->set_nmi(vcpu); | 5599 | kvm_x86_ops->set_nmi(vcpu); |
| 5527 | } | 5600 | } |
| @@ -5553,10 +5626,26 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) | |||
| 5553 | } | 5626 | } |
| 5554 | } | 5627 | } |
| 5555 | 5628 | ||
| 5629 | static void process_nmi(struct kvm_vcpu *vcpu) | ||
| 5630 | { | ||
| 5631 | unsigned limit = 2; | ||
| 5632 | |||
| 5633 | /* | ||
| 5634 | * x86 is limited to one NMI running, and one NMI pending after it. | ||
| 5635 | * If an NMI is already in progress, limit further NMIs to just one. | ||
| 5636 | * Otherwise, allow two (and we'll inject the first one immediately). | ||
| 5637 | */ | ||
| 5638 | if (kvm_x86_ops->get_nmi_mask(vcpu) || vcpu->arch.nmi_injected) | ||
| 5639 | limit = 1; | ||
| 5640 | |||
| 5641 | vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0); | ||
| 5642 | vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit); | ||
| 5643 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
| 5644 | } | ||
| 5645 | |||
| 5556 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | 5646 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
| 5557 | { | 5647 | { |
| 5558 | int r; | 5648 | int r; |
| 5559 | bool nmi_pending; | ||
| 5560 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && | 5649 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && |
| 5561 | vcpu->run->request_interrupt_window; | 5650 | vcpu->run->request_interrupt_window; |
| 5562 | 5651 | ||
| @@ -5596,6 +5685,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
| 5596 | } | 5685 | } |
| 5597 | if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) | 5686 | if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) |
| 5598 | record_steal_time(vcpu); | 5687 | record_steal_time(vcpu); |
| 5688 | if (kvm_check_request(KVM_REQ_NMI, vcpu)) | ||
| 5689 | process_nmi(vcpu); | ||
| 5599 | 5690 | ||
| 5600 | } | 5691 | } |
| 5601 | 5692 | ||
| @@ -5603,19 +5694,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
| 5603 | if (unlikely(r)) | 5694 | if (unlikely(r)) |
| 5604 | goto out; | 5695 | goto out; |
| 5605 | 5696 | ||
| 5606 | /* | ||
| 5607 | * An NMI can be injected between local nmi_pending read and | ||
| 5608 | * vcpu->arch.nmi_pending read inside inject_pending_event(). | ||
| 5609 | * But in that case, KVM_REQ_EVENT will be set, which makes | ||
| 5610 | * the race described above benign. | ||
| 5611 | */ | ||
| 5612 | nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending); | ||
| 5613 | |||
| 5614 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { | 5697 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { |
| 5615 | inject_pending_event(vcpu); | 5698 | inject_pending_event(vcpu); |
| 5616 | 5699 | ||
| 5617 | /* enable NMI/IRQ window open exits if needed */ | 5700 | /* enable NMI/IRQ window open exits if needed */ |
| 5618 | if (nmi_pending) | 5701 | if (vcpu->arch.nmi_pending) |
| 5619 | kvm_x86_ops->enable_nmi_window(vcpu); | 5702 | kvm_x86_ops->enable_nmi_window(vcpu); |
| 5620 | else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) | 5703 | else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) |
| 5621 | kvm_x86_ops->enable_irq_window(vcpu); | 5704 | kvm_x86_ops->enable_irq_window(vcpu); |
| @@ -5678,7 +5761,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
| 5678 | if (hw_breakpoint_active()) | 5761 | if (hw_breakpoint_active()) |
| 5679 | hw_breakpoint_restore(); | 5762 | hw_breakpoint_restore(); |
| 5680 | 5763 | ||
| 5681 | kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); | 5764 | vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); |
| 5682 | 5765 | ||
| 5683 | vcpu->mode = OUTSIDE_GUEST_MODE; | 5766 | vcpu->mode = OUTSIDE_GUEST_MODE; |
| 5684 | smp_wmb(); | 5767 | smp_wmb(); |
| @@ -6323,7 +6406,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | |||
| 6323 | 6406 | ||
| 6324 | int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | 6407 | int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) |
| 6325 | { | 6408 | { |
| 6326 | vcpu->arch.nmi_pending = false; | 6409 | atomic_set(&vcpu->arch.nmi_queued, 0); |
| 6410 | vcpu->arch.nmi_pending = 0; | ||
| 6327 | vcpu->arch.nmi_injected = false; | 6411 | vcpu->arch.nmi_injected = false; |
| 6328 | 6412 | ||
| 6329 | vcpu->arch.switch_db_regs = 0; | 6413 | vcpu->arch.switch_db_regs = 0; |
| @@ -6598,7 +6682,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
| 6598 | !vcpu->arch.apf.halted) | 6682 | !vcpu->arch.apf.halted) |
| 6599 | || !list_empty_careful(&vcpu->async_pf.done) | 6683 | || !list_empty_careful(&vcpu->async_pf.done) |
| 6600 | || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED | 6684 | || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED |
| 6601 | || vcpu->arch.nmi_pending || | 6685 | || atomic_read(&vcpu->arch.nmi_queued) || |
| 6602 | (kvm_arch_interrupt_allowed(vcpu) && | 6686 | (kvm_arch_interrupt_allowed(vcpu) && |
| 6603 | kvm_cpu_has_interrupt(vcpu)); | 6687 | kvm_cpu_has_interrupt(vcpu)); |
| 6604 | } | 6688 | } |
diff --git a/include/linux/kvm.h b/include/linux/kvm.h index aace6b8691a2..f47fcd30273d 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h | |||
| @@ -371,6 +371,7 @@ struct kvm_s390_psw { | |||
| 371 | #define KVM_S390_INT_VIRTIO 0xffff2603u | 371 | #define KVM_S390_INT_VIRTIO 0xffff2603u |
| 372 | #define KVM_S390_INT_SERVICE 0xffff2401u | 372 | #define KVM_S390_INT_SERVICE 0xffff2401u |
| 373 | #define KVM_S390_INT_EMERGENCY 0xffff1201u | 373 | #define KVM_S390_INT_EMERGENCY 0xffff1201u |
| 374 | #define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u | ||
| 374 | 375 | ||
| 375 | struct kvm_s390_interrupt { | 376 | struct kvm_s390_interrupt { |
| 376 | __u32 type; | 377 | __u32 type; |
| @@ -463,7 +464,7 @@ struct kvm_ppc_pvinfo { | |||
| 463 | #define KVM_CAP_VAPIC 6 | 464 | #define KVM_CAP_VAPIC 6 |
| 464 | #define KVM_CAP_EXT_CPUID 7 | 465 | #define KVM_CAP_EXT_CPUID 7 |
| 465 | #define KVM_CAP_CLOCKSOURCE 8 | 466 | #define KVM_CAP_CLOCKSOURCE 8 |
| 466 | #define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */ | 467 | #define KVM_CAP_NR_VCPUS 9 /* returns recommended max vcpus per vm */ |
| 467 | #define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */ | 468 | #define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */ |
| 468 | #define KVM_CAP_PIT 11 | 469 | #define KVM_CAP_PIT 11 |
| 469 | #define KVM_CAP_NOP_IO_DELAY 12 | 470 | #define KVM_CAP_NOP_IO_DELAY 12 |
| @@ -553,6 +554,9 @@ struct kvm_ppc_pvinfo { | |||
| 553 | #define KVM_CAP_SPAPR_TCE 63 | 554 | #define KVM_CAP_SPAPR_TCE 63 |
| 554 | #define KVM_CAP_PPC_SMT 64 | 555 | #define KVM_CAP_PPC_SMT 64 |
| 555 | #define KVM_CAP_PPC_RMA 65 | 556 | #define KVM_CAP_PPC_RMA 65 |
| 557 | #define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */ | ||
| 558 | #define KVM_CAP_PPC_HIOR 67 | ||
| 559 | #define KVM_CAP_PPC_PAPR 68 | ||
| 556 | #define KVM_CAP_S390_GMAP 71 | 560 | #define KVM_CAP_S390_GMAP 71 |
| 557 | 561 | ||
| 558 | #ifdef KVM_CAP_IRQ_ROUTING | 562 | #ifdef KVM_CAP_IRQ_ROUTING |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index eabb21a30c34..d52623199978 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include <linux/msi.h> | 18 | #include <linux/msi.h> |
| 19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
| 20 | #include <linux/rcupdate.h> | 20 | #include <linux/rcupdate.h> |
| 21 | #include <linux/ratelimit.h> | ||
| 21 | #include <asm/signal.h> | 22 | #include <asm/signal.h> |
| 22 | 23 | ||
| 23 | #include <linux/kvm.h> | 24 | #include <linux/kvm.h> |
| @@ -48,6 +49,7 @@ | |||
| 48 | #define KVM_REQ_EVENT 11 | 49 | #define KVM_REQ_EVENT 11 |
| 49 | #define KVM_REQ_APF_HALT 12 | 50 | #define KVM_REQ_APF_HALT 12 |
| 50 | #define KVM_REQ_STEAL_UPDATE 13 | 51 | #define KVM_REQ_STEAL_UPDATE 13 |
| 52 | #define KVM_REQ_NMI 14 | ||
| 51 | 53 | ||
| 52 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 | 54 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 |
| 53 | 55 | ||
| @@ -55,16 +57,16 @@ struct kvm; | |||
| 55 | struct kvm_vcpu; | 57 | struct kvm_vcpu; |
| 56 | extern struct kmem_cache *kvm_vcpu_cache; | 58 | extern struct kmem_cache *kvm_vcpu_cache; |
| 57 | 59 | ||
| 58 | /* | 60 | struct kvm_io_range { |
| 59 | * It would be nice to use something smarter than a linear search, TBD... | 61 | gpa_t addr; |
| 60 | * Thankfully we dont expect many devices to register (famous last words :), | 62 | int len; |
| 61 | * so until then it will suffice. At least its abstracted so we can change | 63 | struct kvm_io_device *dev; |
| 62 | * in one place. | 64 | }; |
| 63 | */ | 65 | |
| 64 | struct kvm_io_bus { | 66 | struct kvm_io_bus { |
| 65 | int dev_count; | 67 | int dev_count; |
| 66 | #define NR_IOBUS_DEVS 200 | 68 | #define NR_IOBUS_DEVS 300 |
| 67 | struct kvm_io_device *devs[NR_IOBUS_DEVS]; | 69 | struct kvm_io_range range[NR_IOBUS_DEVS]; |
| 68 | }; | 70 | }; |
| 69 | 71 | ||
| 70 | enum kvm_bus { | 72 | enum kvm_bus { |
| @@ -77,8 +79,8 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
| 77 | int len, const void *val); | 79 | int len, const void *val); |
| 78 | int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, | 80 | int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, |
| 79 | void *val); | 81 | void *val); |
| 80 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, | 82 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
| 81 | struct kvm_io_device *dev); | 83 | int len, struct kvm_io_device *dev); |
| 82 | int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | 84 | int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
| 83 | struct kvm_io_device *dev); | 85 | struct kvm_io_device *dev); |
| 84 | 86 | ||
| @@ -256,8 +258,9 @@ struct kvm { | |||
| 256 | struct kvm_arch arch; | 258 | struct kvm_arch arch; |
| 257 | atomic_t users_count; | 259 | atomic_t users_count; |
| 258 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 260 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
| 259 | struct kvm_coalesced_mmio_dev *coalesced_mmio_dev; | ||
| 260 | struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; | 261 | struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; |
| 262 | spinlock_t ring_lock; | ||
| 263 | struct list_head coalesced_zones; | ||
| 261 | #endif | 264 | #endif |
| 262 | 265 | ||
| 263 | struct mutex irq_lock; | 266 | struct mutex irq_lock; |
| @@ -281,11 +284,8 @@ struct kvm { | |||
| 281 | 284 | ||
| 282 | /* The guest did something we don't support. */ | 285 | /* The guest did something we don't support. */ |
| 283 | #define pr_unimpl(vcpu, fmt, ...) \ | 286 | #define pr_unimpl(vcpu, fmt, ...) \ |
| 284 | do { \ | 287 | pr_err_ratelimited("kvm: %i: cpu%i " fmt, \ |
| 285 | if (printk_ratelimit()) \ | 288 | current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__) |
| 286 | printk(KERN_ERR "kvm: %i: cpu%i " fmt, \ | ||
| 287 | current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__); \ | ||
| 288 | } while (0) | ||
| 289 | 289 | ||
| 290 | #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt) | 290 | #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt) |
| 291 | #define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt) | 291 | #define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt) |
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index eaf3a50f9769..3ad0925d23a9 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c | |||
| @@ -58,8 +58,6 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel | |||
| 58 | static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id) | 58 | static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id) |
| 59 | { | 59 | { |
| 60 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | 60 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; |
| 61 | u32 vector; | ||
| 62 | int index; | ||
| 63 | 61 | ||
| 64 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_INTX) { | 62 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_INTX) { |
| 65 | spin_lock(&assigned_dev->intx_lock); | 63 | spin_lock(&assigned_dev->intx_lock); |
| @@ -68,31 +66,35 @@ static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id) | |||
| 68 | spin_unlock(&assigned_dev->intx_lock); | 66 | spin_unlock(&assigned_dev->intx_lock); |
| 69 | } | 67 | } |
| 70 | 68 | ||
| 71 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | 69 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, |
| 72 | index = find_index_from_host_irq(assigned_dev, irq); | 70 | assigned_dev->guest_irq, 1); |
| 73 | if (index >= 0) { | 71 | |
| 74 | vector = assigned_dev-> | 72 | return IRQ_HANDLED; |
| 75 | guest_msix_entries[index].vector; | 73 | } |
| 76 | kvm_set_irq(assigned_dev->kvm, | 74 | |
| 77 | assigned_dev->irq_source_id, vector, 1); | 75 | #ifdef __KVM_HAVE_MSIX |
| 78 | } | 76 | static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) |
| 79 | } else | 77 | { |
| 78 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | ||
| 79 | int index = find_index_from_host_irq(assigned_dev, irq); | ||
| 80 | u32 vector; | ||
| 81 | |||
| 82 | if (index >= 0) { | ||
| 83 | vector = assigned_dev->guest_msix_entries[index].vector; | ||
| 80 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | 84 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, |
| 81 | assigned_dev->guest_irq, 1); | 85 | vector, 1); |
| 86 | } | ||
| 82 | 87 | ||
| 83 | return IRQ_HANDLED; | 88 | return IRQ_HANDLED; |
| 84 | } | 89 | } |
| 90 | #endif | ||
| 85 | 91 | ||
| 86 | /* Ack the irq line for an assigned device */ | 92 | /* Ack the irq line for an assigned device */ |
| 87 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | 93 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) |
| 88 | { | 94 | { |
| 89 | struct kvm_assigned_dev_kernel *dev; | 95 | struct kvm_assigned_dev_kernel *dev = |
| 90 | 96 | container_of(kian, struct kvm_assigned_dev_kernel, | |
| 91 | if (kian->gsi == -1) | 97 | ack_notifier); |
| 92 | return; | ||
| 93 | |||
| 94 | dev = container_of(kian, struct kvm_assigned_dev_kernel, | ||
| 95 | ack_notifier); | ||
| 96 | 98 | ||
| 97 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); | 99 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); |
| 98 | 100 | ||
| @@ -110,8 +112,9 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | |||
| 110 | static void deassign_guest_irq(struct kvm *kvm, | 112 | static void deassign_guest_irq(struct kvm *kvm, |
| 111 | struct kvm_assigned_dev_kernel *assigned_dev) | 113 | struct kvm_assigned_dev_kernel *assigned_dev) |
| 112 | { | 114 | { |
| 113 | kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); | 115 | if (assigned_dev->ack_notifier.gsi != -1) |
| 114 | assigned_dev->ack_notifier.gsi = -1; | 116 | kvm_unregister_irq_ack_notifier(kvm, |
| 117 | &assigned_dev->ack_notifier); | ||
| 115 | 118 | ||
| 116 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | 119 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, |
| 117 | assigned_dev->guest_irq, 0); | 120 | assigned_dev->guest_irq, 0); |
| @@ -143,7 +146,7 @@ static void deassign_host_irq(struct kvm *kvm, | |||
| 143 | 146 | ||
| 144 | for (i = 0; i < assigned_dev->entries_nr; i++) | 147 | for (i = 0; i < assigned_dev->entries_nr; i++) |
| 145 | free_irq(assigned_dev->host_msix_entries[i].vector, | 148 | free_irq(assigned_dev->host_msix_entries[i].vector, |
| 146 | (void *)assigned_dev); | 149 | assigned_dev); |
| 147 | 150 | ||
| 148 | assigned_dev->entries_nr = 0; | 151 | assigned_dev->entries_nr = 0; |
| 149 | kfree(assigned_dev->host_msix_entries); | 152 | kfree(assigned_dev->host_msix_entries); |
| @@ -153,7 +156,7 @@ static void deassign_host_irq(struct kvm *kvm, | |||
| 153 | /* Deal with MSI and INTx */ | 156 | /* Deal with MSI and INTx */ |
| 154 | disable_irq(assigned_dev->host_irq); | 157 | disable_irq(assigned_dev->host_irq); |
| 155 | 158 | ||
| 156 | free_irq(assigned_dev->host_irq, (void *)assigned_dev); | 159 | free_irq(assigned_dev->host_irq, assigned_dev); |
| 157 | 160 | ||
| 158 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) | 161 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) |
| 159 | pci_disable_msi(assigned_dev->dev); | 162 | pci_disable_msi(assigned_dev->dev); |
| @@ -239,7 +242,7 @@ static int assigned_device_enable_host_intx(struct kvm *kvm, | |||
| 239 | * are going to be long delays in accepting, acking, etc. | 242 | * are going to be long delays in accepting, acking, etc. |
| 240 | */ | 243 | */ |
| 241 | if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, | 244 | if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, |
| 242 | IRQF_ONESHOT, dev->irq_name, (void *)dev)) | 245 | IRQF_ONESHOT, dev->irq_name, dev)) |
| 243 | return -EIO; | 246 | return -EIO; |
| 244 | return 0; | 247 | return 0; |
| 245 | } | 248 | } |
| @@ -258,7 +261,7 @@ static int assigned_device_enable_host_msi(struct kvm *kvm, | |||
| 258 | 261 | ||
| 259 | dev->host_irq = dev->dev->irq; | 262 | dev->host_irq = dev->dev->irq; |
| 260 | if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, | 263 | if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, |
| 261 | 0, dev->irq_name, (void *)dev)) { | 264 | 0, dev->irq_name, dev)) { |
| 262 | pci_disable_msi(dev->dev); | 265 | pci_disable_msi(dev->dev); |
| 263 | return -EIO; | 266 | return -EIO; |
| 264 | } | 267 | } |
| @@ -284,8 +287,8 @@ static int assigned_device_enable_host_msix(struct kvm *kvm, | |||
| 284 | 287 | ||
| 285 | for (i = 0; i < dev->entries_nr; i++) { | 288 | for (i = 0; i < dev->entries_nr; i++) { |
| 286 | r = request_threaded_irq(dev->host_msix_entries[i].vector, | 289 | r = request_threaded_irq(dev->host_msix_entries[i].vector, |
| 287 | NULL, kvm_assigned_dev_thread, | 290 | NULL, kvm_assigned_dev_thread_msix, |
| 288 | 0, dev->irq_name, (void *)dev); | 291 | 0, dev->irq_name, dev); |
| 289 | if (r) | 292 | if (r) |
| 290 | goto err; | 293 | goto err; |
| 291 | } | 294 | } |
| @@ -293,7 +296,7 @@ static int assigned_device_enable_host_msix(struct kvm *kvm, | |||
| 293 | return 0; | 296 | return 0; |
| 294 | err: | 297 | err: |
| 295 | for (i -= 1; i >= 0; i--) | 298 | for (i -= 1; i >= 0; i--) |
| 296 | free_irq(dev->host_msix_entries[i].vector, (void *)dev); | 299 | free_irq(dev->host_msix_entries[i].vector, dev); |
| 297 | pci_disable_msix(dev->dev); | 300 | pci_disable_msix(dev->dev); |
| 298 | return r; | 301 | return r; |
| 299 | } | 302 | } |
| @@ -406,7 +409,8 @@ static int assign_guest_irq(struct kvm *kvm, | |||
| 406 | 409 | ||
| 407 | if (!r) { | 410 | if (!r) { |
| 408 | dev->irq_requested_type |= guest_irq_type; | 411 | dev->irq_requested_type |= guest_irq_type; |
| 409 | kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); | 412 | if (dev->ack_notifier.gsi != -1) |
| 413 | kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); | ||
| 410 | } else | 414 | } else |
| 411 | kvm_free_irq_source_id(kvm, dev->irq_source_id); | 415 | kvm_free_irq_source_id(kvm, dev->irq_source_id); |
| 412 | 416 | ||
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index fc8487564d1f..a6ec206f36ba 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c | |||
| @@ -24,10 +24,19 @@ static inline struct kvm_coalesced_mmio_dev *to_mmio(struct kvm_io_device *dev) | |||
| 24 | static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, | 24 | static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, |
| 25 | gpa_t addr, int len) | 25 | gpa_t addr, int len) |
| 26 | { | 26 | { |
| 27 | struct kvm_coalesced_mmio_zone *zone; | 27 | /* is it in a batchable area ? |
| 28 | * (addr,len) is fully included in | ||
| 29 | * (zone->addr, zone->size) | ||
| 30 | */ | ||
| 31 | |||
| 32 | return (dev->zone.addr <= addr && | ||
| 33 | addr + len <= dev->zone.addr + dev->zone.size); | ||
| 34 | } | ||
| 35 | |||
| 36 | static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev) | ||
| 37 | { | ||
| 28 | struct kvm_coalesced_mmio_ring *ring; | 38 | struct kvm_coalesced_mmio_ring *ring; |
| 29 | unsigned avail; | 39 | unsigned avail; |
| 30 | int i; | ||
| 31 | 40 | ||
| 32 | /* Are we able to batch it ? */ | 41 | /* Are we able to batch it ? */ |
| 33 | 42 | ||
| @@ -37,25 +46,12 @@ static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, | |||
| 37 | */ | 46 | */ |
| 38 | ring = dev->kvm->coalesced_mmio_ring; | 47 | ring = dev->kvm->coalesced_mmio_ring; |
| 39 | avail = (ring->first - ring->last - 1) % KVM_COALESCED_MMIO_MAX; | 48 | avail = (ring->first - ring->last - 1) % KVM_COALESCED_MMIO_MAX; |
| 40 | if (avail < KVM_MAX_VCPUS) { | 49 | if (avail == 0) { |
| 41 | /* full */ | 50 | /* full */ |
| 42 | return 0; | 51 | return 0; |
| 43 | } | 52 | } |
| 44 | 53 | ||
| 45 | /* is it in a batchable area ? */ | 54 | return 1; |
| 46 | |||
| 47 | for (i = 0; i < dev->nb_zones; i++) { | ||
| 48 | zone = &dev->zone[i]; | ||
| 49 | |||
| 50 | /* (addr,len) is fully included in | ||
| 51 | * (zone->addr, zone->size) | ||
| 52 | */ | ||
| 53 | |||
| 54 | if (zone->addr <= addr && | ||
| 55 | addr + len <= zone->addr + zone->size) | ||
| 56 | return 1; | ||
| 57 | } | ||
| 58 | return 0; | ||
| 59 | } | 55 | } |
| 60 | 56 | ||
| 61 | static int coalesced_mmio_write(struct kvm_io_device *this, | 57 | static int coalesced_mmio_write(struct kvm_io_device *this, |
| @@ -63,10 +59,16 @@ static int coalesced_mmio_write(struct kvm_io_device *this, | |||
| 63 | { | 59 | { |
| 64 | struct kvm_coalesced_mmio_dev *dev = to_mmio(this); | 60 | struct kvm_coalesced_mmio_dev *dev = to_mmio(this); |
| 65 | struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; | 61 | struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; |
| 62 | |||
| 66 | if (!coalesced_mmio_in_range(dev, addr, len)) | 63 | if (!coalesced_mmio_in_range(dev, addr, len)) |
| 67 | return -EOPNOTSUPP; | 64 | return -EOPNOTSUPP; |
| 68 | 65 | ||
| 69 | spin_lock(&dev->lock); | 66 | spin_lock(&dev->kvm->ring_lock); |
| 67 | |||
| 68 | if (!coalesced_mmio_has_room(dev)) { | ||
| 69 | spin_unlock(&dev->kvm->ring_lock); | ||
| 70 | return -EOPNOTSUPP; | ||
| 71 | } | ||
| 70 | 72 | ||
| 71 | /* copy data in first free entry of the ring */ | 73 | /* copy data in first free entry of the ring */ |
| 72 | 74 | ||
| @@ -75,7 +77,7 @@ static int coalesced_mmio_write(struct kvm_io_device *this, | |||
| 75 | memcpy(ring->coalesced_mmio[ring->last].data, val, len); | 77 | memcpy(ring->coalesced_mmio[ring->last].data, val, len); |
| 76 | smp_wmb(); | 78 | smp_wmb(); |
| 77 | ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX; | 79 | ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX; |
| 78 | spin_unlock(&dev->lock); | 80 | spin_unlock(&dev->kvm->ring_lock); |
| 79 | return 0; | 81 | return 0; |
| 80 | } | 82 | } |
| 81 | 83 | ||
| @@ -83,6 +85,8 @@ static void coalesced_mmio_destructor(struct kvm_io_device *this) | |||
| 83 | { | 85 | { |
| 84 | struct kvm_coalesced_mmio_dev *dev = to_mmio(this); | 86 | struct kvm_coalesced_mmio_dev *dev = to_mmio(this); |
| 85 | 87 | ||
| 88 | list_del(&dev->list); | ||
| 89 | |||
| 86 | kfree(dev); | 90 | kfree(dev); |
| 87 | } | 91 | } |
| 88 | 92 | ||
| @@ -93,7 +97,6 @@ static const struct kvm_io_device_ops coalesced_mmio_ops = { | |||
| 93 | 97 | ||
| 94 | int kvm_coalesced_mmio_init(struct kvm *kvm) | 98 | int kvm_coalesced_mmio_init(struct kvm *kvm) |
| 95 | { | 99 | { |
| 96 | struct kvm_coalesced_mmio_dev *dev; | ||
| 97 | struct page *page; | 100 | struct page *page; |
| 98 | int ret; | 101 | int ret; |
| 99 | 102 | ||
| @@ -101,31 +104,18 @@ int kvm_coalesced_mmio_init(struct kvm *kvm) | |||
| 101 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 104 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
| 102 | if (!page) | 105 | if (!page) |
| 103 | goto out_err; | 106 | goto out_err; |
| 104 | kvm->coalesced_mmio_ring = page_address(page); | ||
| 105 | |||
| 106 | ret = -ENOMEM; | ||
| 107 | dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); | ||
| 108 | if (!dev) | ||
| 109 | goto out_free_page; | ||
| 110 | spin_lock_init(&dev->lock); | ||
| 111 | kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); | ||
| 112 | dev->kvm = kvm; | ||
| 113 | kvm->coalesced_mmio_dev = dev; | ||
| 114 | 107 | ||
| 115 | mutex_lock(&kvm->slots_lock); | 108 | ret = 0; |
| 116 | ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev); | 109 | kvm->coalesced_mmio_ring = page_address(page); |
| 117 | mutex_unlock(&kvm->slots_lock); | ||
| 118 | if (ret < 0) | ||
| 119 | goto out_free_dev; | ||
| 120 | 110 | ||
| 121 | return ret; | 111 | /* |
| 112 | * We're using this spinlock to sync access to the coalesced ring. | ||
| 113 | * The list doesn't need it's own lock since device registration and | ||
| 114 | * unregistration should only happen when kvm->slots_lock is held. | ||
| 115 | */ | ||
| 116 | spin_lock_init(&kvm->ring_lock); | ||
| 117 | INIT_LIST_HEAD(&kvm->coalesced_zones); | ||
| 122 | 118 | ||
| 123 | out_free_dev: | ||
| 124 | kvm->coalesced_mmio_dev = NULL; | ||
| 125 | kfree(dev); | ||
| 126 | out_free_page: | ||
| 127 | kvm->coalesced_mmio_ring = NULL; | ||
| 128 | __free_page(page); | ||
| 129 | out_err: | 119 | out_err: |
| 130 | return ret; | 120 | return ret; |
| 131 | } | 121 | } |
| @@ -139,51 +129,50 @@ void kvm_coalesced_mmio_free(struct kvm *kvm) | |||
| 139 | int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, | 129 | int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, |
| 140 | struct kvm_coalesced_mmio_zone *zone) | 130 | struct kvm_coalesced_mmio_zone *zone) |
| 141 | { | 131 | { |
| 142 | struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; | 132 | int ret; |
| 133 | struct kvm_coalesced_mmio_dev *dev; | ||
| 143 | 134 | ||
| 144 | if (dev == NULL) | 135 | dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); |
| 145 | return -ENXIO; | 136 | if (!dev) |
| 137 | return -ENOMEM; | ||
| 138 | |||
| 139 | kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); | ||
| 140 | dev->kvm = kvm; | ||
| 141 | dev->zone = *zone; | ||
| 146 | 142 | ||
| 147 | mutex_lock(&kvm->slots_lock); | 143 | mutex_lock(&kvm->slots_lock); |
| 148 | if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { | 144 | ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, zone->addr, |
| 149 | mutex_unlock(&kvm->slots_lock); | 145 | zone->size, &dev->dev); |
| 150 | return -ENOBUFS; | 146 | if (ret < 0) |
| 151 | } | 147 | goto out_free_dev; |
| 148 | list_add_tail(&dev->list, &kvm->coalesced_zones); | ||
| 149 | mutex_unlock(&kvm->slots_lock); | ||
| 152 | 150 | ||
| 153 | dev->zone[dev->nb_zones] = *zone; | 151 | return ret; |
| 154 | dev->nb_zones++; | ||
| 155 | 152 | ||
| 153 | out_free_dev: | ||
| 156 | mutex_unlock(&kvm->slots_lock); | 154 | mutex_unlock(&kvm->slots_lock); |
| 155 | |||
| 156 | kfree(dev); | ||
| 157 | |||
| 158 | if (dev == NULL) | ||
| 159 | return -ENXIO; | ||
| 160 | |||
| 157 | return 0; | 161 | return 0; |
| 158 | } | 162 | } |
| 159 | 163 | ||
| 160 | int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, | 164 | int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, |
| 161 | struct kvm_coalesced_mmio_zone *zone) | 165 | struct kvm_coalesced_mmio_zone *zone) |
| 162 | { | 166 | { |
| 163 | int i; | 167 | struct kvm_coalesced_mmio_dev *dev, *tmp; |
| 164 | struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; | ||
| 165 | struct kvm_coalesced_mmio_zone *z; | ||
| 166 | |||
| 167 | if (dev == NULL) | ||
| 168 | return -ENXIO; | ||
| 169 | 168 | ||
| 170 | mutex_lock(&kvm->slots_lock); | 169 | mutex_lock(&kvm->slots_lock); |
| 171 | 170 | ||
| 172 | i = dev->nb_zones; | 171 | list_for_each_entry_safe(dev, tmp, &kvm->coalesced_zones, list) |
| 173 | while (i) { | 172 | if (coalesced_mmio_in_range(dev, zone->addr, zone->size)) { |
| 174 | z = &dev->zone[i - 1]; | 173 | kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dev->dev); |
| 175 | 174 | kvm_iodevice_destructor(&dev->dev); | |
| 176 | /* unregister all zones | ||
| 177 | * included in (zone->addr, zone->size) | ||
| 178 | */ | ||
| 179 | |||
| 180 | if (zone->addr <= z->addr && | ||
| 181 | z->addr + z->size <= zone->addr + zone->size) { | ||
| 182 | dev->nb_zones--; | ||
| 183 | *z = dev->zone[dev->nb_zones]; | ||
| 184 | } | 175 | } |
| 185 | i--; | ||
| 186 | } | ||
| 187 | 176 | ||
| 188 | mutex_unlock(&kvm->slots_lock); | 177 | mutex_unlock(&kvm->slots_lock); |
| 189 | 178 | ||
diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h index 8a5959e3535f..b280c20444d1 100644 --- a/virt/kvm/coalesced_mmio.h +++ b/virt/kvm/coalesced_mmio.h | |||
| @@ -12,14 +12,13 @@ | |||
| 12 | 12 | ||
| 13 | #ifdef CONFIG_KVM_MMIO | 13 | #ifdef CONFIG_KVM_MMIO |
| 14 | 14 | ||
| 15 | #define KVM_COALESCED_MMIO_ZONE_MAX 100 | 15 | #include <linux/list.h> |
| 16 | 16 | ||
| 17 | struct kvm_coalesced_mmio_dev { | 17 | struct kvm_coalesced_mmio_dev { |
| 18 | struct list_head list; | ||
| 18 | struct kvm_io_device dev; | 19 | struct kvm_io_device dev; |
| 19 | struct kvm *kvm; | 20 | struct kvm *kvm; |
| 20 | spinlock_t lock; | 21 | struct kvm_coalesced_mmio_zone zone; |
| 21 | int nb_zones; | ||
| 22 | struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX]; | ||
| 23 | }; | 22 | }; |
| 24 | 23 | ||
| 25 | int kvm_coalesced_mmio_init(struct kvm *kvm); | 24 | int kvm_coalesced_mmio_init(struct kvm *kvm); |
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 73358d256fa2..f59c1e8de7a2 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
| @@ -586,7 +586,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | |||
| 586 | 586 | ||
| 587 | kvm_iodevice_init(&p->dev, &ioeventfd_ops); | 587 | kvm_iodevice_init(&p->dev, &ioeventfd_ops); |
| 588 | 588 | ||
| 589 | ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev); | 589 | ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length, |
| 590 | &p->dev); | ||
| 590 | if (ret < 0) | 591 | if (ret < 0) |
| 591 | goto unlock_fail; | 592 | goto unlock_fail; |
| 592 | 593 | ||
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 8df1ca104a7f..3eed61eb4867 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c | |||
| @@ -394,7 +394,8 @@ int kvm_ioapic_init(struct kvm *kvm) | |||
| 394 | kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); | 394 | kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); |
| 395 | ioapic->kvm = kvm; | 395 | ioapic->kvm = kvm; |
| 396 | mutex_lock(&kvm->slots_lock); | 396 | mutex_lock(&kvm->slots_lock); |
| 397 | ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); | 397 | ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address, |
| 398 | IOAPIC_MEM_LENGTH, &ioapic->dev); | ||
| 398 | mutex_unlock(&kvm->slots_lock); | 399 | mutex_unlock(&kvm->slots_lock); |
| 399 | if (ret < 0) { | 400 | if (ret < 0) { |
| 400 | kvm->arch.vioapic = NULL; | 401 | kvm->arch.vioapic = NULL; |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index aefdda390f5e..d9cfb782cb81 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
| @@ -47,6 +47,8 @@ | |||
| 47 | #include <linux/srcu.h> | 47 | #include <linux/srcu.h> |
| 48 | #include <linux/hugetlb.h> | 48 | #include <linux/hugetlb.h> |
| 49 | #include <linux/slab.h> | 49 | #include <linux/slab.h> |
| 50 | #include <linux/sort.h> | ||
| 51 | #include <linux/bsearch.h> | ||
| 50 | 52 | ||
| 51 | #include <asm/processor.h> | 53 | #include <asm/processor.h> |
| 52 | #include <asm/io.h> | 54 | #include <asm/io.h> |
| @@ -2391,24 +2393,92 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) | |||
| 2391 | int i; | 2393 | int i; |
| 2392 | 2394 | ||
| 2393 | for (i = 0; i < bus->dev_count; i++) { | 2395 | for (i = 0; i < bus->dev_count; i++) { |
| 2394 | struct kvm_io_device *pos = bus->devs[i]; | 2396 | struct kvm_io_device *pos = bus->range[i].dev; |
| 2395 | 2397 | ||
| 2396 | kvm_iodevice_destructor(pos); | 2398 | kvm_iodevice_destructor(pos); |
| 2397 | } | 2399 | } |
| 2398 | kfree(bus); | 2400 | kfree(bus); |
| 2399 | } | 2401 | } |
| 2400 | 2402 | ||
| 2403 | int kvm_io_bus_sort_cmp(const void *p1, const void *p2) | ||
| 2404 | { | ||
| 2405 | const struct kvm_io_range *r1 = p1; | ||
| 2406 | const struct kvm_io_range *r2 = p2; | ||
| 2407 | |||
| 2408 | if (r1->addr < r2->addr) | ||
| 2409 | return -1; | ||
| 2410 | if (r1->addr + r1->len > r2->addr + r2->len) | ||
| 2411 | return 1; | ||
| 2412 | return 0; | ||
| 2413 | } | ||
| 2414 | |||
| 2415 | int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, | ||
| 2416 | gpa_t addr, int len) | ||
| 2417 | { | ||
| 2418 | if (bus->dev_count == NR_IOBUS_DEVS) | ||
| 2419 | return -ENOSPC; | ||
| 2420 | |||
| 2421 | bus->range[bus->dev_count++] = (struct kvm_io_range) { | ||
| 2422 | .addr = addr, | ||
| 2423 | .len = len, | ||
| 2424 | .dev = dev, | ||
| 2425 | }; | ||
| 2426 | |||
| 2427 | sort(bus->range, bus->dev_count, sizeof(struct kvm_io_range), | ||
| 2428 | kvm_io_bus_sort_cmp, NULL); | ||
| 2429 | |||
| 2430 | return 0; | ||
| 2431 | } | ||
| 2432 | |||
| 2433 | int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, | ||
| 2434 | gpa_t addr, int len) | ||
| 2435 | { | ||
| 2436 | struct kvm_io_range *range, key; | ||
| 2437 | int off; | ||
| 2438 | |||
| 2439 | key = (struct kvm_io_range) { | ||
| 2440 | .addr = addr, | ||
| 2441 | .len = len, | ||
| 2442 | }; | ||
| 2443 | |||
| 2444 | range = bsearch(&key, bus->range, bus->dev_count, | ||
| 2445 | sizeof(struct kvm_io_range), kvm_io_bus_sort_cmp); | ||
| 2446 | if (range == NULL) | ||
| 2447 | return -ENOENT; | ||
| 2448 | |||
| 2449 | off = range - bus->range; | ||
| 2450 | |||
| 2451 | while (off > 0 && kvm_io_bus_sort_cmp(&key, &bus->range[off-1]) == 0) | ||
| 2452 | off--; | ||
| 2453 | |||
| 2454 | return off; | ||
| 2455 | } | ||
| 2456 | |||
| 2401 | /* kvm_io_bus_write - called under kvm->slots_lock */ | 2457 | /* kvm_io_bus_write - called under kvm->slots_lock */ |
| 2402 | int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | 2458 | int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
| 2403 | int len, const void *val) | 2459 | int len, const void *val) |
| 2404 | { | 2460 | { |
| 2405 | int i; | 2461 | int idx; |
| 2406 | struct kvm_io_bus *bus; | 2462 | struct kvm_io_bus *bus; |
| 2463 | struct kvm_io_range range; | ||
| 2464 | |||
| 2465 | range = (struct kvm_io_range) { | ||
| 2466 | .addr = addr, | ||
| 2467 | .len = len, | ||
| 2468 | }; | ||
| 2407 | 2469 | ||
| 2408 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | 2470 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); |
| 2409 | for (i = 0; i < bus->dev_count; i++) | 2471 | idx = kvm_io_bus_get_first_dev(bus, addr, len); |
| 2410 | if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) | 2472 | if (idx < 0) |
| 2473 | return -EOPNOTSUPP; | ||
| 2474 | |||
| 2475 | while (idx < bus->dev_count && | ||
| 2476 | kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) { | ||
| 2477 | if (!kvm_iodevice_write(bus->range[idx].dev, addr, len, val)) | ||
| 2411 | return 0; | 2478 | return 0; |
| 2479 | idx++; | ||
| 2480 | } | ||
| 2481 | |||
| 2412 | return -EOPNOTSUPP; | 2482 | return -EOPNOTSUPP; |
| 2413 | } | 2483 | } |
| 2414 | 2484 | ||
| @@ -2416,19 +2486,33 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
| 2416 | int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | 2486 | int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
| 2417 | int len, void *val) | 2487 | int len, void *val) |
| 2418 | { | 2488 | { |
| 2419 | int i; | 2489 | int idx; |
| 2420 | struct kvm_io_bus *bus; | 2490 | struct kvm_io_bus *bus; |
| 2491 | struct kvm_io_range range; | ||
| 2492 | |||
| 2493 | range = (struct kvm_io_range) { | ||
| 2494 | .addr = addr, | ||
| 2495 | .len = len, | ||
| 2496 | }; | ||
| 2421 | 2497 | ||
| 2422 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | 2498 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); |
| 2423 | for (i = 0; i < bus->dev_count; i++) | 2499 | idx = kvm_io_bus_get_first_dev(bus, addr, len); |
| 2424 | if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) | 2500 | if (idx < 0) |
| 2501 | return -EOPNOTSUPP; | ||
| 2502 | |||
| 2503 | while (idx < bus->dev_count && | ||
| 2504 | kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) { | ||
| 2505 | if (!kvm_iodevice_read(bus->range[idx].dev, addr, len, val)) | ||
| 2425 | return 0; | 2506 | return 0; |
| 2507 | idx++; | ||
| 2508 | } | ||
| 2509 | |||
| 2426 | return -EOPNOTSUPP; | 2510 | return -EOPNOTSUPP; |
| 2427 | } | 2511 | } |
| 2428 | 2512 | ||
| 2429 | /* Caller must hold slots_lock. */ | 2513 | /* Caller must hold slots_lock. */ |
| 2430 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, | 2514 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
| 2431 | struct kvm_io_device *dev) | 2515 | int len, struct kvm_io_device *dev) |
| 2432 | { | 2516 | { |
| 2433 | struct kvm_io_bus *new_bus, *bus; | 2517 | struct kvm_io_bus *new_bus, *bus; |
| 2434 | 2518 | ||
| @@ -2440,7 +2524,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, | |||
| 2440 | if (!new_bus) | 2524 | if (!new_bus) |
| 2441 | return -ENOMEM; | 2525 | return -ENOMEM; |
| 2442 | memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); | 2526 | memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); |
| 2443 | new_bus->devs[new_bus->dev_count++] = dev; | 2527 | kvm_io_bus_insert_dev(new_bus, dev, addr, len); |
| 2444 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); | 2528 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); |
| 2445 | synchronize_srcu_expedited(&kvm->srcu); | 2529 | synchronize_srcu_expedited(&kvm->srcu); |
| 2446 | kfree(bus); | 2530 | kfree(bus); |
| @@ -2464,9 +2548,13 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | |||
| 2464 | 2548 | ||
| 2465 | r = -ENOENT; | 2549 | r = -ENOENT; |
| 2466 | for (i = 0; i < new_bus->dev_count; i++) | 2550 | for (i = 0; i < new_bus->dev_count; i++) |
| 2467 | if (new_bus->devs[i] == dev) { | 2551 | if (new_bus->range[i].dev == dev) { |
| 2468 | r = 0; | 2552 | r = 0; |
| 2469 | new_bus->devs[i] = new_bus->devs[--new_bus->dev_count]; | 2553 | new_bus->dev_count--; |
| 2554 | new_bus->range[i] = new_bus->range[new_bus->dev_count]; | ||
| 2555 | sort(new_bus->range, new_bus->dev_count, | ||
| 2556 | sizeof(struct kvm_io_range), | ||
| 2557 | kvm_io_bus_sort_cmp, NULL); | ||
| 2470 | break; | 2558 | break; |
| 2471 | } | 2559 | } |
| 2472 | 2560 | ||
