diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-24 16:07:18 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-24 16:07:18 -0500 |
commit | 89f883372fa60f604d136924baf3e89ff1870e9e (patch) | |
tree | cb69b0a14957945ba00d3d392bf9ccbbef56f3b8 /arch | |
parent | 9e2d59ad580d590134285f361a0e80f0e98c0207 (diff) | |
parent | 6b73a96065e89dc9fa75ba4f78b1aa3a3bbd0470 (diff) |
Merge tag 'kvm-3.9-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Marcelo Tosatti:
"KVM updates for the 3.9 merge window, including x86 real mode
emulation fixes, stronger memory slot interface restrictions, mmu_lock
spinlock hold time reduction, improved handling of large page faults
on shadow, initial APICv HW acceleration support, s390 channel IO
based virtio, amongst others"
* tag 'kvm-3.9-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (143 commits)
Revert "KVM: MMU: lazily drop large spte"
x86: pvclock kvm: align allocation size to page size
KVM: nVMX: Remove redundant get_vmcs12 from nested_vmx_exit_handled_msr
x86 emulator: fix parity calculation for AAD instruction
KVM: PPC: BookE: Handle alignment interrupts
booke: Added DBCR4 SPR number
KVM: PPC: booke: Allow multiple exception types
KVM: PPC: booke: use vcpu reference from thread_struct
KVM: Remove user_alloc from struct kvm_memory_slot
KVM: VMX: disable apicv by default
KVM: s390: Fix handling of iscs.
KVM: MMU: cleanup __direct_map
KVM: MMU: remove pt_access in mmu_set_spte
KVM: MMU: cleanup mapping-level
KVM: MMU: lazily drop large spte
KVM: VMX: cleanup vmx_set_cr0().
KVM: VMX: add missing exit names to VMX_EXIT_REASONS array
KVM: VMX: disable SMEP feature when guest is in non-paging mode
KVM: Remove duplicate text in api.txt
Revert "KVM: MMU: split kvm_mmu_free_page"
...
Diffstat (limited to 'arch')
51 files changed, 3044 insertions, 1539 deletions
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 6d6a5ac48d85..cfa74983c675 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h | |||
@@ -23,9 +23,7 @@ | |||
23 | #ifndef __ASM_KVM_HOST_H | 23 | #ifndef __ASM_KVM_HOST_H |
24 | #define __ASM_KVM_HOST_H | 24 | #define __ASM_KVM_HOST_H |
25 | 25 | ||
26 | #define KVM_MEMORY_SLOTS 32 | 26 | #define KVM_USER_MEM_SLOTS 32 |
27 | /* memory slots that does not exposed to userspace */ | ||
28 | #define KVM_PRIVATE_MEM_SLOTS 4 | ||
29 | 27 | ||
30 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 | 28 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 |
31 | 29 | ||
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index bd1c51555038..ad3126a58644 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c | |||
@@ -955,7 +955,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
955 | kvm_mem.guest_phys_addr; | 955 | kvm_mem.guest_phys_addr; |
956 | kvm_userspace_mem.memory_size = kvm_mem.memory_size; | 956 | kvm_userspace_mem.memory_size = kvm_mem.memory_size; |
957 | r = kvm_vm_ioctl_set_memory_region(kvm, | 957 | r = kvm_vm_ioctl_set_memory_region(kvm, |
958 | &kvm_userspace_mem, 0); | 958 | &kvm_userspace_mem, false); |
959 | if (r) | 959 | if (r) |
960 | goto out; | 960 | goto out; |
961 | break; | 961 | break; |
@@ -1580,7 +1580,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
1580 | struct kvm_memory_slot *memslot, | 1580 | struct kvm_memory_slot *memslot, |
1581 | struct kvm_memory_slot old, | 1581 | struct kvm_memory_slot old, |
1582 | struct kvm_userspace_memory_region *mem, | 1582 | struct kvm_userspace_memory_region *mem, |
1583 | int user_alloc) | 1583 | bool user_alloc) |
1584 | { | 1584 | { |
1585 | unsigned long i; | 1585 | unsigned long i; |
1586 | unsigned long pfn; | 1586 | unsigned long pfn; |
@@ -1611,7 +1611,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
1611 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 1611 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
1612 | struct kvm_userspace_memory_region *mem, | 1612 | struct kvm_userspace_memory_region *mem, |
1613 | struct kvm_memory_slot old, | 1613 | struct kvm_memory_slot old, |
1614 | int user_alloc) | 1614 | bool user_alloc) |
1615 | { | 1615 | { |
1616 | return; | 1616 | return; |
1617 | } | 1617 | } |
@@ -1834,7 +1834,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
1834 | mutex_lock(&kvm->slots_lock); | 1834 | mutex_lock(&kvm->slots_lock); |
1835 | 1835 | ||
1836 | r = -EINVAL; | 1836 | r = -EINVAL; |
1837 | if (log->slot >= KVM_MEMORY_SLOTS) | 1837 | if (log->slot >= KVM_USER_MEM_SLOTS) |
1838 | goto out; | 1838 | goto out; |
1839 | 1839 | ||
1840 | memslot = id_to_memslot(kvm->memslots, log->slot); | 1840 | memslot = id_to_memslot(kvm->memslots, log->slot); |
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h index c5f92a926a9a..c3e2935b6db4 100644 --- a/arch/ia64/kvm/lapic.h +++ b/arch/ia64/kvm/lapic.h | |||
@@ -27,4 +27,10 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); | |||
27 | #define kvm_apic_present(x) (true) | 27 | #define kvm_apic_present(x) (true) |
28 | #define kvm_lapic_enabled(x) (true) | 28 | #define kvm_lapic_enabled(x) (true) |
29 | 29 | ||
30 | static inline bool kvm_apic_vid_enabled(void) | ||
31 | { | ||
32 | /* IA64 has no apicv supporting, do nothing here */ | ||
33 | return false; | ||
34 | } | ||
35 | |||
30 | #endif | 36 | #endif |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 03d7beae89a0..d1bb86074721 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -37,10 +37,8 @@ | |||
37 | 37 | ||
38 | #define KVM_MAX_VCPUS NR_CPUS | 38 | #define KVM_MAX_VCPUS NR_CPUS |
39 | #define KVM_MAX_VCORES NR_CPUS | 39 | #define KVM_MAX_VCORES NR_CPUS |
40 | #define KVM_MEMORY_SLOTS 32 | 40 | #define KVM_USER_MEM_SLOTS 32 |
41 | /* memory slots that does not exposed to userspace */ | 41 | #define KVM_MEM_SLOTS_NUM KVM_USER_MEM_SLOTS |
42 | #define KVM_PRIVATE_MEM_SLOTS 4 | ||
43 | #define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) | ||
44 | 42 | ||
45 | #ifdef CONFIG_KVM_MMIO | 43 | #ifdef CONFIG_KVM_MMIO |
46 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 | 44 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 |
@@ -523,6 +521,8 @@ struct kvm_vcpu_arch { | |||
523 | u8 sane; | 521 | u8 sane; |
524 | u8 cpu_type; | 522 | u8 cpu_type; |
525 | u8 hcall_needed; | 523 | u8 hcall_needed; |
524 | u8 epr_enabled; | ||
525 | u8 epr_needed; | ||
526 | 526 | ||
527 | u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ | 527 | u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ |
528 | 528 | ||
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 572aa7530619..44a657adf416 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
@@ -44,12 +44,11 @@ enum emulation_result { | |||
44 | EMULATE_DO_DCR, /* kvm_run filled with DCR request */ | 44 | EMULATE_DO_DCR, /* kvm_run filled with DCR request */ |
45 | EMULATE_FAIL, /* can't emulate this instruction */ | 45 | EMULATE_FAIL, /* can't emulate this instruction */ |
46 | EMULATE_AGAIN, /* something went wrong. go again */ | 46 | EMULATE_AGAIN, /* something went wrong. go again */ |
47 | EMULATE_DO_PAPR, /* kvm_run filled with PAPR request */ | ||
47 | }; | 48 | }; |
48 | 49 | ||
49 | extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); | 50 | extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); |
50 | extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); | 51 | extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); |
51 | extern char kvmppc_handlers_start[]; | ||
52 | extern unsigned long kvmppc_handler_len; | ||
53 | extern void kvmppc_handler_highmem(void); | 52 | extern void kvmppc_handler_highmem(void); |
54 | 53 | ||
55 | extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu); | 54 | extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu); |
@@ -263,6 +262,15 @@ static inline void kvm_linear_init(void) | |||
263 | {} | 262 | {} |
264 | #endif | 263 | #endif |
265 | 264 | ||
265 | static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) | ||
266 | { | ||
267 | #ifdef CONFIG_KVM_BOOKE_HV | ||
268 | mtspr(SPRN_GEPR, epr); | ||
269 | #elif defined(CONFIG_BOOKE) | ||
270 | vcpu->arch.epr = epr; | ||
271 | #endif | ||
272 | } | ||
273 | |||
266 | int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, | 274 | int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, |
267 | struct kvm_config_tlb *cfg); | 275 | struct kvm_config_tlb *cfg); |
268 | int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, | 276 | int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, |
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 7035e608f3fa..e66586122030 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h | |||
@@ -956,8 +956,6 @@ | |||
956 | #define SPRN_SPRG_RSCRATCH_DBG SPRN_SPRG9 | 956 | #define SPRN_SPRG_RSCRATCH_DBG SPRN_SPRG9 |
957 | #define SPRN_SPRG_WSCRATCH_DBG SPRN_SPRG9 | 957 | #define SPRN_SPRG_WSCRATCH_DBG SPRN_SPRG9 |
958 | #endif | 958 | #endif |
959 | #define SPRN_SPRG_RVCPU SPRN_SPRG1 | ||
960 | #define SPRN_SPRG_WVCPU SPRN_SPRG1 | ||
961 | #endif | 959 | #endif |
962 | 960 | ||
963 | #ifdef CONFIG_8xx | 961 | #ifdef CONFIG_8xx |
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index e07e6af5e1ff..b417de3cc2c4 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h | |||
@@ -56,6 +56,7 @@ | |||
56 | #define SPRN_SPRG7W 0x117 /* Special Purpose Register General 7 Write */ | 56 | #define SPRN_SPRG7W 0x117 /* Special Purpose Register General 7 Write */ |
57 | #define SPRN_EPCR 0x133 /* Embedded Processor Control Register */ | 57 | #define SPRN_EPCR 0x133 /* Embedded Processor Control Register */ |
58 | #define SPRN_DBCR2 0x136 /* Debug Control Register 2 */ | 58 | #define SPRN_DBCR2 0x136 /* Debug Control Register 2 */ |
59 | #define SPRN_DBCR4 0x233 /* Debug Control Register 4 */ | ||
59 | #define SPRN_MSRP 0x137 /* MSR Protect Register */ | 60 | #define SPRN_MSRP 0x137 /* MSR Protect Register */ |
60 | #define SPRN_IAC3 0x13A /* Instruction Address Compare 3 */ | 61 | #define SPRN_IAC3 0x13A /* Instruction Address Compare 3 */ |
61 | #define SPRN_IAC4 0x13B /* Instruction Address Compare 4 */ | 62 | #define SPRN_IAC4 0x13B /* Instruction Address Compare 4 */ |
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 2fba8a66fb10..16064d00adb9 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h | |||
@@ -114,7 +114,10 @@ struct kvm_regs { | |||
114 | /* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */ | 114 | /* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */ |
115 | #define KVM_SREGS_E_SPE (1 << 9) | 115 | #define KVM_SREGS_E_SPE (1 << 9) |
116 | 116 | ||
117 | /* External Proxy (EXP) -- EPR */ | 117 | /* |
118 | * DEPRECATED! USE ONE_REG FOR THIS ONE! | ||
119 | * External Proxy (EXP) -- EPR | ||
120 | */ | ||
118 | #define KVM_SREGS_EXP (1 << 10) | 121 | #define KVM_SREGS_EXP (1 << 10) |
119 | 122 | ||
120 | /* External PID (E.PD) -- EPSC/EPLC */ | 123 | /* External PID (E.PD) -- EPSC/EPLC */ |
@@ -412,5 +415,6 @@ struct kvm_get_htab_header { | |||
412 | #define KVM_REG_PPC_VPA_DTL (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x84) | 415 | #define KVM_REG_PPC_VPA_DTL (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x84) |
413 | 416 | ||
414 | #define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85) | 417 | #define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85) |
418 | #define KVM_REG_PPC_EPR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86) | ||
415 | 419 | ||
416 | #endif /* __LINUX_KVM_POWERPC_H */ | 420 | #endif /* __LINUX_KVM_POWERPC_H */ |
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 781190367292..b6c17ec9b169 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -118,7 +118,7 @@ int main(void) | |||
118 | #ifdef CONFIG_KVM_BOOK3S_32_HANDLER | 118 | #ifdef CONFIG_KVM_BOOK3S_32_HANDLER |
119 | DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); | 119 | DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); |
120 | #endif | 120 | #endif |
121 | #ifdef CONFIG_KVM_BOOKE_HV | 121 | #if defined(CONFIG_KVM) && defined(CONFIG_BOOKE) |
122 | DEFINE(THREAD_KVM_VCPU, offsetof(struct thread_struct, kvm_vcpu)); | 122 | DEFINE(THREAD_KVM_VCPU, offsetof(struct thread_struct, kvm_vcpu)); |
123 | #endif | 123 | #endif |
124 | 124 | ||
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 1e473d46322c..b772eded8c26 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile | |||
@@ -10,7 +10,8 @@ common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \ | |||
10 | eventfd.o) | 10 | eventfd.o) |
11 | 11 | ||
12 | CFLAGS_44x_tlb.o := -I. | 12 | CFLAGS_44x_tlb.o := -I. |
13 | CFLAGS_e500_tlb.o := -I. | 13 | CFLAGS_e500_mmu.o := -I. |
14 | CFLAGS_e500_mmu_host.o := -I. | ||
14 | CFLAGS_emulate.o := -I. | 15 | CFLAGS_emulate.o := -I. |
15 | 16 | ||
16 | common-objs-y += powerpc.o emulate.o | 17 | common-objs-y += powerpc.o emulate.o |
@@ -35,7 +36,8 @@ kvm-e500-objs := \ | |||
35 | booke_emulate.o \ | 36 | booke_emulate.o \ |
36 | booke_interrupts.o \ | 37 | booke_interrupts.o \ |
37 | e500.o \ | 38 | e500.o \ |
38 | e500_tlb.o \ | 39 | e500_mmu.o \ |
40 | e500_mmu_host.o \ | ||
39 | e500_emulate.o | 41 | e500_emulate.o |
40 | kvm-objs-$(CONFIG_KVM_E500V2) := $(kvm-e500-objs) | 42 | kvm-objs-$(CONFIG_KVM_E500V2) := $(kvm-e500-objs) |
41 | 43 | ||
@@ -45,7 +47,8 @@ kvm-e500mc-objs := \ | |||
45 | booke_emulate.o \ | 47 | booke_emulate.o \ |
46 | bookehv_interrupts.o \ | 48 | bookehv_interrupts.o \ |
47 | e500mc.o \ | 49 | e500mc.o \ |
48 | e500_tlb.o \ | 50 | e500_mmu.o \ |
51 | e500_mmu_host.o \ | ||
49 | e500_emulate.o | 52 | e500_emulate.o |
50 | kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs) | 53 | kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs) |
51 | 54 | ||
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index d31a716f7f2b..836c56975e21 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c | |||
@@ -34,6 +34,8 @@ | |||
34 | #define OP_31_XOP_MTSRIN 242 | 34 | #define OP_31_XOP_MTSRIN 242 |
35 | #define OP_31_XOP_TLBIEL 274 | 35 | #define OP_31_XOP_TLBIEL 274 |
36 | #define OP_31_XOP_TLBIE 306 | 36 | #define OP_31_XOP_TLBIE 306 |
37 | /* Opcode is officially reserved, reuse it as sc 1 when sc 1 doesn't trap */ | ||
38 | #define OP_31_XOP_FAKE_SC1 308 | ||
37 | #define OP_31_XOP_SLBMTE 402 | 39 | #define OP_31_XOP_SLBMTE 402 |
38 | #define OP_31_XOP_SLBIE 434 | 40 | #define OP_31_XOP_SLBIE 434 |
39 | #define OP_31_XOP_SLBIA 498 | 41 | #define OP_31_XOP_SLBIA 498 |
@@ -170,6 +172,32 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
170 | vcpu->arch.mmu.tlbie(vcpu, addr, large); | 172 | vcpu->arch.mmu.tlbie(vcpu, addr, large); |
171 | break; | 173 | break; |
172 | } | 174 | } |
175 | #ifdef CONFIG_KVM_BOOK3S_64_PR | ||
176 | case OP_31_XOP_FAKE_SC1: | ||
177 | { | ||
178 | /* SC 1 papr hypercalls */ | ||
179 | ulong cmd = kvmppc_get_gpr(vcpu, 3); | ||
180 | int i; | ||
181 | |||
182 | if ((vcpu->arch.shared->msr & MSR_PR) || | ||
183 | !vcpu->arch.papr_enabled) { | ||
184 | emulated = EMULATE_FAIL; | ||
185 | break; | ||
186 | } | ||
187 | |||
188 | if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) | ||
189 | break; | ||
190 | |||
191 | run->papr_hcall.nr = cmd; | ||
192 | for (i = 0; i < 9; ++i) { | ||
193 | ulong gpr = kvmppc_get_gpr(vcpu, 4 + i); | ||
194 | run->papr_hcall.args[i] = gpr; | ||
195 | } | ||
196 | |||
197 | emulated = EMULATE_DO_PAPR; | ||
198 | break; | ||
199 | } | ||
200 | #endif | ||
173 | case OP_31_XOP_EIOIO: | 201 | case OP_31_XOP_EIOIO: |
174 | break; | 202 | break; |
175 | case OP_31_XOP_SLBMTE: | 203 | case OP_31_XOP_SLBMTE: |
@@ -427,6 +455,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) | |||
427 | case SPRN_PMC3_GEKKO: | 455 | case SPRN_PMC3_GEKKO: |
428 | case SPRN_PMC4_GEKKO: | 456 | case SPRN_PMC4_GEKKO: |
429 | case SPRN_WPAR_GEKKO: | 457 | case SPRN_WPAR_GEKKO: |
458 | case SPRN_MSSSR0: | ||
430 | break; | 459 | break; |
431 | unprivileged: | 460 | unprivileged: |
432 | default: | 461 | default: |
@@ -523,6 +552,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) | |||
523 | case SPRN_PMC3_GEKKO: | 552 | case SPRN_PMC3_GEKKO: |
524 | case SPRN_PMC4_GEKKO: | 553 | case SPRN_PMC4_GEKKO: |
525 | case SPRN_WPAR_GEKKO: | 554 | case SPRN_WPAR_GEKKO: |
555 | case SPRN_MSSSR0: | ||
526 | *spr_val = 0; | 556 | *spr_val = 0; |
527 | break; | 557 | break; |
528 | default: | 558 | default: |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 71d0c90b62bf..80dcc53a1aba 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
@@ -1549,7 +1549,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) | |||
1549 | mutex_lock(&kvm->slots_lock); | 1549 | mutex_lock(&kvm->slots_lock); |
1550 | 1550 | ||
1551 | r = -EINVAL; | 1551 | r = -EINVAL; |
1552 | if (log->slot >= KVM_MEMORY_SLOTS) | 1552 | if (log->slot >= KVM_USER_MEM_SLOTS) |
1553 | goto out; | 1553 | goto out; |
1554 | 1554 | ||
1555 | memslot = id_to_memslot(kvm->memslots, log->slot); | 1555 | memslot = id_to_memslot(kvm->memslots, log->slot); |
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 6702442ca818..5e93438afb06 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c | |||
@@ -762,6 +762,11 @@ program_interrupt: | |||
762 | run->exit_reason = KVM_EXIT_MMIO; | 762 | run->exit_reason = KVM_EXIT_MMIO; |
763 | r = RESUME_HOST_NV; | 763 | r = RESUME_HOST_NV; |
764 | break; | 764 | break; |
765 | case EMULATE_DO_PAPR: | ||
766 | run->exit_reason = KVM_EXIT_PAPR_HCALL; | ||
767 | vcpu->arch.hcall_needed = 1; | ||
768 | r = RESUME_HOST_NV; | ||
769 | break; | ||
765 | default: | 770 | default: |
766 | BUG(); | 771 | BUG(); |
767 | } | 772 | } |
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 69f114015780..020923e43134 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c | |||
@@ -182,6 +182,14 @@ static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, | |||
182 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE); | 182 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE); |
183 | } | 183 | } |
184 | 184 | ||
185 | static void kvmppc_core_queue_alignment(struct kvm_vcpu *vcpu, ulong dear_flags, | ||
186 | ulong esr_flags) | ||
187 | { | ||
188 | vcpu->arch.queued_dear = dear_flags; | ||
189 | vcpu->arch.queued_esr = esr_flags; | ||
190 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALIGNMENT); | ||
191 | } | ||
192 | |||
185 | void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags) | 193 | void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags) |
186 | { | 194 | { |
187 | vcpu->arch.queued_esr = esr_flags; | 195 | vcpu->arch.queued_esr = esr_flags; |
@@ -300,13 +308,22 @@ static void set_guest_esr(struct kvm_vcpu *vcpu, u32 esr) | |||
300 | #endif | 308 | #endif |
301 | } | 309 | } |
302 | 310 | ||
311 | static unsigned long get_guest_epr(struct kvm_vcpu *vcpu) | ||
312 | { | ||
313 | #ifdef CONFIG_KVM_BOOKE_HV | ||
314 | return mfspr(SPRN_GEPR); | ||
315 | #else | ||
316 | return vcpu->arch.epr; | ||
317 | #endif | ||
318 | } | ||
319 | |||
303 | /* Deliver the interrupt of the corresponding priority, if possible. */ | 320 | /* Deliver the interrupt of the corresponding priority, if possible. */ |
304 | static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, | 321 | static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, |
305 | unsigned int priority) | 322 | unsigned int priority) |
306 | { | 323 | { |
307 | int allowed = 0; | 324 | int allowed = 0; |
308 | ulong msr_mask = 0; | 325 | ulong msr_mask = 0; |
309 | bool update_esr = false, update_dear = false; | 326 | bool update_esr = false, update_dear = false, update_epr = false; |
310 | ulong crit_raw = vcpu->arch.shared->critical; | 327 | ulong crit_raw = vcpu->arch.shared->critical; |
311 | ulong crit_r1 = kvmppc_get_gpr(vcpu, 1); | 328 | ulong crit_r1 = kvmppc_get_gpr(vcpu, 1); |
312 | bool crit; | 329 | bool crit; |
@@ -330,9 +347,13 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, | |||
330 | keep_irq = true; | 347 | keep_irq = true; |
331 | } | 348 | } |
332 | 349 | ||
350 | if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_enabled) | ||
351 | update_epr = true; | ||
352 | |||
333 | switch (priority) { | 353 | switch (priority) { |
334 | case BOOKE_IRQPRIO_DTLB_MISS: | 354 | case BOOKE_IRQPRIO_DTLB_MISS: |
335 | case BOOKE_IRQPRIO_DATA_STORAGE: | 355 | case BOOKE_IRQPRIO_DATA_STORAGE: |
356 | case BOOKE_IRQPRIO_ALIGNMENT: | ||
336 | update_dear = true; | 357 | update_dear = true; |
337 | /* fall through */ | 358 | /* fall through */ |
338 | case BOOKE_IRQPRIO_INST_STORAGE: | 359 | case BOOKE_IRQPRIO_INST_STORAGE: |
@@ -346,7 +367,6 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, | |||
346 | case BOOKE_IRQPRIO_SPE_FP_DATA: | 367 | case BOOKE_IRQPRIO_SPE_FP_DATA: |
347 | case BOOKE_IRQPRIO_SPE_FP_ROUND: | 368 | case BOOKE_IRQPRIO_SPE_FP_ROUND: |
348 | case BOOKE_IRQPRIO_AP_UNAVAIL: | 369 | case BOOKE_IRQPRIO_AP_UNAVAIL: |
349 | case BOOKE_IRQPRIO_ALIGNMENT: | ||
350 | allowed = 1; | 370 | allowed = 1; |
351 | msr_mask = MSR_CE | MSR_ME | MSR_DE; | 371 | msr_mask = MSR_CE | MSR_ME | MSR_DE; |
352 | int_class = INT_CLASS_NONCRIT; | 372 | int_class = INT_CLASS_NONCRIT; |
@@ -408,6 +428,8 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, | |||
408 | set_guest_esr(vcpu, vcpu->arch.queued_esr); | 428 | set_guest_esr(vcpu, vcpu->arch.queued_esr); |
409 | if (update_dear == true) | 429 | if (update_dear == true) |
410 | set_guest_dear(vcpu, vcpu->arch.queued_dear); | 430 | set_guest_dear(vcpu, vcpu->arch.queued_dear); |
431 | if (update_epr == true) | ||
432 | kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); | ||
411 | 433 | ||
412 | new_msr &= msr_mask; | 434 | new_msr &= msr_mask; |
413 | #if defined(CONFIG_64BIT) | 435 | #if defined(CONFIG_64BIT) |
@@ -581,6 +603,11 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) | |||
581 | 603 | ||
582 | kvmppc_core_check_exceptions(vcpu); | 604 | kvmppc_core_check_exceptions(vcpu); |
583 | 605 | ||
606 | if (vcpu->requests) { | ||
607 | /* Exception delivery raised request; start over */ | ||
608 | return 1; | ||
609 | } | ||
610 | |||
584 | if (vcpu->arch.shared->msr & MSR_WE) { | 611 | if (vcpu->arch.shared->msr & MSR_WE) { |
585 | local_irq_enable(); | 612 | local_irq_enable(); |
586 | kvm_vcpu_block(vcpu); | 613 | kvm_vcpu_block(vcpu); |
@@ -610,6 +637,13 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) | |||
610 | r = 0; | 637 | r = 0; |
611 | } | 638 | } |
612 | 639 | ||
640 | if (kvm_check_request(KVM_REQ_EPR_EXIT, vcpu)) { | ||
641 | vcpu->run->epr.epr = 0; | ||
642 | vcpu->arch.epr_needed = true; | ||
643 | vcpu->run->exit_reason = KVM_EXIT_EPR; | ||
644 | r = 0; | ||
645 | } | ||
646 | |||
613 | return r; | 647 | return r; |
614 | } | 648 | } |
615 | 649 | ||
@@ -945,6 +979,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
945 | r = RESUME_GUEST; | 979 | r = RESUME_GUEST; |
946 | break; | 980 | break; |
947 | 981 | ||
982 | case BOOKE_INTERRUPT_ALIGNMENT: | ||
983 | kvmppc_core_queue_alignment(vcpu, vcpu->arch.fault_dear, | ||
984 | vcpu->arch.fault_esr); | ||
985 | r = RESUME_GUEST; | ||
986 | break; | ||
987 | |||
948 | #ifdef CONFIG_KVM_BOOKE_HV | 988 | #ifdef CONFIG_KVM_BOOKE_HV |
949 | case BOOKE_INTERRUPT_HV_SYSCALL: | 989 | case BOOKE_INTERRUPT_HV_SYSCALL: |
950 | if (!(vcpu->arch.shared->msr & MSR_PR)) { | 990 | if (!(vcpu->arch.shared->msr & MSR_PR)) { |
@@ -1388,6 +1428,11 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | |||
1388 | &vcpu->arch.dbg_reg.dac[dac], sizeof(u64)); | 1428 | &vcpu->arch.dbg_reg.dac[dac], sizeof(u64)); |
1389 | break; | 1429 | break; |
1390 | } | 1430 | } |
1431 | case KVM_REG_PPC_EPR: { | ||
1432 | u32 epr = get_guest_epr(vcpu); | ||
1433 | r = put_user(epr, (u32 __user *)(long)reg->addr); | ||
1434 | break; | ||
1435 | } | ||
1391 | #if defined(CONFIG_64BIT) | 1436 | #if defined(CONFIG_64BIT) |
1392 | case KVM_REG_PPC_EPCR: | 1437 | case KVM_REG_PPC_EPCR: |
1393 | r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr); | 1438 | r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr); |
@@ -1420,6 +1465,13 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | |||
1420 | (u64 __user *)(long)reg->addr, sizeof(u64)); | 1465 | (u64 __user *)(long)reg->addr, sizeof(u64)); |
1421 | break; | 1466 | break; |
1422 | } | 1467 | } |
1468 | case KVM_REG_PPC_EPR: { | ||
1469 | u32 new_epr; | ||
1470 | r = get_user(new_epr, (u32 __user *)(long)reg->addr); | ||
1471 | if (!r) | ||
1472 | kvmppc_set_epr(vcpu, new_epr); | ||
1473 | break; | ||
1474 | } | ||
1423 | #if defined(CONFIG_64BIT) | 1475 | #if defined(CONFIG_64BIT) |
1424 | case KVM_REG_PPC_EPCR: { | 1476 | case KVM_REG_PPC_EPCR: { |
1425 | u32 new_epcr; | 1477 | u32 new_epcr; |
@@ -1556,7 +1608,9 @@ int __init kvmppc_booke_init(void) | |||
1556 | { | 1608 | { |
1557 | #ifndef CONFIG_KVM_BOOKE_HV | 1609 | #ifndef CONFIG_KVM_BOOKE_HV |
1558 | unsigned long ivor[16]; | 1610 | unsigned long ivor[16]; |
1611 | unsigned long *handler = kvmppc_booke_handler_addr; | ||
1559 | unsigned long max_ivor = 0; | 1612 | unsigned long max_ivor = 0; |
1613 | unsigned long handler_len; | ||
1560 | int i; | 1614 | int i; |
1561 | 1615 | ||
1562 | /* We install our own exception handlers by hijacking IVPR. IVPR must | 1616 | /* We install our own exception handlers by hijacking IVPR. IVPR must |
@@ -1589,14 +1643,16 @@ int __init kvmppc_booke_init(void) | |||
1589 | 1643 | ||
1590 | for (i = 0; i < 16; i++) { | 1644 | for (i = 0; i < 16; i++) { |
1591 | if (ivor[i] > max_ivor) | 1645 | if (ivor[i] > max_ivor) |
1592 | max_ivor = ivor[i]; | 1646 | max_ivor = i; |
1593 | 1647 | ||
1648 | handler_len = handler[i + 1] - handler[i]; | ||
1594 | memcpy((void *)kvmppc_booke_handlers + ivor[i], | 1649 | memcpy((void *)kvmppc_booke_handlers + ivor[i], |
1595 | kvmppc_handlers_start + i * kvmppc_handler_len, | 1650 | (void *)handler[i], handler_len); |
1596 | kvmppc_handler_len); | ||
1597 | } | 1651 | } |
1598 | flush_icache_range(kvmppc_booke_handlers, | 1652 | |
1599 | kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); | 1653 | handler_len = handler[max_ivor + 1] - handler[max_ivor]; |
1654 | flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers + | ||
1655 | ivor[max_ivor] + handler_len); | ||
1600 | #endif /* !BOOKE_HV */ | 1656 | #endif /* !BOOKE_HV */ |
1601 | return 0; | 1657 | return 0; |
1602 | } | 1658 | } |
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index e9b88e433f64..5fd1ba693579 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h | |||
@@ -65,6 +65,7 @@ | |||
65 | (1 << BOOKE_IRQPRIO_CRITICAL)) | 65 | (1 << BOOKE_IRQPRIO_CRITICAL)) |
66 | 66 | ||
67 | extern unsigned long kvmppc_booke_handlers; | 67 | extern unsigned long kvmppc_booke_handlers; |
68 | extern unsigned long kvmppc_booke_handler_addr[]; | ||
68 | 69 | ||
69 | void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr); | 70 | void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr); |
70 | void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr); | 71 | void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr); |
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 4685b8cf2249..27a4b2877c10 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c | |||
@@ -269,6 +269,9 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) | |||
269 | case SPRN_ESR: | 269 | case SPRN_ESR: |
270 | *spr_val = vcpu->arch.shared->esr; | 270 | *spr_val = vcpu->arch.shared->esr; |
271 | break; | 271 | break; |
272 | case SPRN_EPR: | ||
273 | *spr_val = vcpu->arch.epr; | ||
274 | break; | ||
272 | case SPRN_CSRR0: | 275 | case SPRN_CSRR0: |
273 | *spr_val = vcpu->arch.csrr0; | 276 | *spr_val = vcpu->arch.csrr0; |
274 | break; | 277 | break; |
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index bb46b32f9813..f4bb55c96517 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S | |||
@@ -45,18 +45,21 @@ | |||
45 | (1<<BOOKE_INTERRUPT_DEBUG)) | 45 | (1<<BOOKE_INTERRUPT_DEBUG)) |
46 | 46 | ||
47 | #define NEED_DEAR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \ | 47 | #define NEED_DEAR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \ |
48 | (1<<BOOKE_INTERRUPT_DTLB_MISS)) | 48 | (1<<BOOKE_INTERRUPT_DTLB_MISS) | \ |
49 | (1<<BOOKE_INTERRUPT_ALIGNMENT)) | ||
49 | 50 | ||
50 | #define NEED_ESR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \ | 51 | #define NEED_ESR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \ |
51 | (1<<BOOKE_INTERRUPT_INST_STORAGE) | \ | 52 | (1<<BOOKE_INTERRUPT_INST_STORAGE) | \ |
52 | (1<<BOOKE_INTERRUPT_PROGRAM) | \ | 53 | (1<<BOOKE_INTERRUPT_PROGRAM) | \ |
53 | (1<<BOOKE_INTERRUPT_DTLB_MISS)) | 54 | (1<<BOOKE_INTERRUPT_DTLB_MISS) | \ |
55 | (1<<BOOKE_INTERRUPT_ALIGNMENT)) | ||
54 | 56 | ||
55 | .macro KVM_HANDLER ivor_nr scratch srr0 | 57 | .macro KVM_HANDLER ivor_nr scratch srr0 |
56 | _GLOBAL(kvmppc_handler_\ivor_nr) | 58 | _GLOBAL(kvmppc_handler_\ivor_nr) |
57 | /* Get pointer to vcpu and record exit number. */ | 59 | /* Get pointer to vcpu and record exit number. */ |
58 | mtspr \scratch , r4 | 60 | mtspr \scratch , r4 |
59 | mfspr r4, SPRN_SPRG_RVCPU | 61 | mfspr r4, SPRN_SPRG_THREAD |
62 | lwz r4, THREAD_KVM_VCPU(r4) | ||
60 | stw r3, VCPU_GPR(R3)(r4) | 63 | stw r3, VCPU_GPR(R3)(r4) |
61 | stw r5, VCPU_GPR(R5)(r4) | 64 | stw r5, VCPU_GPR(R5)(r4) |
62 | stw r6, VCPU_GPR(R6)(r4) | 65 | stw r6, VCPU_GPR(R6)(r4) |
@@ -73,6 +76,14 @@ _GLOBAL(kvmppc_handler_\ivor_nr) | |||
73 | bctr | 76 | bctr |
74 | .endm | 77 | .endm |
75 | 78 | ||
79 | .macro KVM_HANDLER_ADDR ivor_nr | ||
80 | .long kvmppc_handler_\ivor_nr | ||
81 | .endm | ||
82 | |||
83 | .macro KVM_HANDLER_END | ||
84 | .long kvmppc_handlers_end | ||
85 | .endm | ||
86 | |||
76 | _GLOBAL(kvmppc_handlers_start) | 87 | _GLOBAL(kvmppc_handlers_start) |
77 | KVM_HANDLER BOOKE_INTERRUPT_CRITICAL SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 | 88 | KVM_HANDLER BOOKE_INTERRUPT_CRITICAL SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 |
78 | KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK SPRN_SPRG_RSCRATCH_MC SPRN_MCSRR0 | 89 | KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK SPRN_SPRG_RSCRATCH_MC SPRN_MCSRR0 |
@@ -93,9 +104,7 @@ KVM_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 | |||
93 | KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0 | 104 | KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0 |
94 | KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0 | 105 | KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0 |
95 | KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0 | 106 | KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0 |
96 | 107 | _GLOBAL(kvmppc_handlers_end) | |
97 | _GLOBAL(kvmppc_handler_len) | ||
98 | .long kvmppc_handler_1 - kvmppc_handler_0 | ||
99 | 108 | ||
100 | /* Registers: | 109 | /* Registers: |
101 | * SPRG_SCRATCH0: guest r4 | 110 | * SPRG_SCRATCH0: guest r4 |
@@ -402,9 +411,6 @@ lightweight_exit: | |||
402 | lwz r8, kvmppc_booke_handlers@l(r8) | 411 | lwz r8, kvmppc_booke_handlers@l(r8) |
403 | mtspr SPRN_IVPR, r8 | 412 | mtspr SPRN_IVPR, r8 |
404 | 413 | ||
405 | /* Save vcpu pointer for the exception handlers. */ | ||
406 | mtspr SPRN_SPRG_WVCPU, r4 | ||
407 | |||
408 | lwz r5, VCPU_SHARED(r4) | 414 | lwz r5, VCPU_SHARED(r4) |
409 | 415 | ||
410 | /* Can't switch the stack pointer until after IVPR is switched, | 416 | /* Can't switch the stack pointer until after IVPR is switched, |
@@ -463,6 +469,31 @@ lightweight_exit: | |||
463 | lwz r4, VCPU_GPR(R4)(r4) | 469 | lwz r4, VCPU_GPR(R4)(r4) |
464 | rfi | 470 | rfi |
465 | 471 | ||
472 | .data | ||
473 | .align 4 | ||
474 | .globl kvmppc_booke_handler_addr | ||
475 | kvmppc_booke_handler_addr: | ||
476 | KVM_HANDLER_ADDR BOOKE_INTERRUPT_CRITICAL | ||
477 | KVM_HANDLER_ADDR BOOKE_INTERRUPT_MACHINE_CHECK | ||
478 | KVM_HANDLER_ADDR BOOKE_INTERRUPT_DATA_STORAGE | ||
479 | KVM_HANDLER_ADDR BOOKE_INTERRUPT_INST_STORAGE | ||
480 | KVM_HANDLER_ADDR BOOKE_INTERRUPT_EXTERNAL | ||
481 | KVM_HANDLER_ADDR BOOKE_INTERRUPT_ALIGNMENT | ||
482 | KVM_HANDLER_ADDR BOOKE_INTERRUPT_PROGRAM | ||
483 | KVM_HANDLER_ADDR BOOKE_INTERRUPT_FP_UNAVAIL | ||
484 | KVM_HANDLER_ADDR BOOKE_INTERRUPT_SYSCALL | ||
485 | KVM_HANDLER_ADDR BOOKE_INTERRUPT_AP_UNAVAIL | ||
486 | KVM_HANDLER_ADDR BOOKE_INTERRUPT_DECREMENTER | ||
487 | KVM_HANDLER_ADDR BOOKE_INTERRUPT_FIT | ||
488 | KVM_HANDLER_ADDR BOOKE_INTERRUPT_WATCHDOG | ||
489 | KVM_HANDLER_ADDR BOOKE_INTERRUPT_DTLB_MISS | ||
490 | KVM_HANDLER_ADDR BOOKE_INTERRUPT_ITLB_MISS | ||
491 | KVM_HANDLER_ADDR BOOKE_INTERRUPT_DEBUG | ||
492 | KVM_HANDLER_ADDR BOOKE_INTERRUPT_SPE_UNAVAIL | ||
493 | KVM_HANDLER_ADDR BOOKE_INTERRUPT_SPE_FP_DATA | ||
494 | KVM_HANDLER_ADDR BOOKE_INTERRUPT_SPE_FP_ROUND | ||
495 | KVM_HANDLER_END /*Always keep this in end*/ | ||
496 | |||
466 | #ifdef CONFIG_SPE | 497 | #ifdef CONFIG_SPE |
467 | _GLOBAL(kvmppc_save_guest_spe) | 498 | _GLOBAL(kvmppc_save_guest_spe) |
468 | cmpi 0,r3,0 | 499 | cmpi 0,r3,0 |
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index b479ed77c515..6dd4de7802bf 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c | |||
@@ -491,6 +491,9 @@ static int __init kvmppc_e500_init(void) | |||
491 | { | 491 | { |
492 | int r, i; | 492 | int r, i; |
493 | unsigned long ivor[3]; | 493 | unsigned long ivor[3]; |
494 | /* Process remaining handlers above the generic first 16 */ | ||
495 | unsigned long *handler = &kvmppc_booke_handler_addr[16]; | ||
496 | unsigned long handler_len; | ||
494 | unsigned long max_ivor = 0; | 497 | unsigned long max_ivor = 0; |
495 | 498 | ||
496 | r = kvmppc_core_check_processor_compat(); | 499 | r = kvmppc_core_check_processor_compat(); |
@@ -506,15 +509,16 @@ static int __init kvmppc_e500_init(void) | |||
506 | ivor[1] = mfspr(SPRN_IVOR33); | 509 | ivor[1] = mfspr(SPRN_IVOR33); |
507 | ivor[2] = mfspr(SPRN_IVOR34); | 510 | ivor[2] = mfspr(SPRN_IVOR34); |
508 | for (i = 0; i < 3; i++) { | 511 | for (i = 0; i < 3; i++) { |
509 | if (ivor[i] > max_ivor) | 512 | if (ivor[i] > ivor[max_ivor]) |
510 | max_ivor = ivor[i]; | 513 | max_ivor = i; |
511 | 514 | ||
515 | handler_len = handler[i + 1] - handler[i]; | ||
512 | memcpy((void *)kvmppc_booke_handlers + ivor[i], | 516 | memcpy((void *)kvmppc_booke_handlers + ivor[i], |
513 | kvmppc_handlers_start + (i + 16) * kvmppc_handler_len, | 517 | (void *)handler[i], handler_len); |
514 | kvmppc_handler_len); | ||
515 | } | 518 | } |
516 | flush_icache_range(kvmppc_booke_handlers, | 519 | handler_len = handler[max_ivor + 1] - handler[max_ivor]; |
517 | kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); | 520 | flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers + |
521 | ivor[max_ivor] + handler_len); | ||
518 | 522 | ||
519 | return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); | 523 | return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); |
520 | } | 524 | } |
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index c70d37ed770a..41cefd43655f 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h | |||
@@ -28,6 +28,7 @@ | |||
28 | 28 | ||
29 | #define E500_TLB_VALID 1 | 29 | #define E500_TLB_VALID 1 |
30 | #define E500_TLB_BITMAP 2 | 30 | #define E500_TLB_BITMAP 2 |
31 | #define E500_TLB_TLB0 (1 << 2) | ||
31 | 32 | ||
32 | struct tlbe_ref { | 33 | struct tlbe_ref { |
33 | pfn_t pfn; | 34 | pfn_t pfn; |
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_mmu.c index cf3f18012371..5c4475983f78 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_mmu.c | |||
@@ -1,10 +1,11 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. | 2 | * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved. |
3 | * | 3 | * |
4 | * Author: Yu Liu, yu.liu@freescale.com | 4 | * Author: Yu Liu, yu.liu@freescale.com |
5 | * Scott Wood, scottwood@freescale.com | 5 | * Scott Wood, scottwood@freescale.com |
6 | * Ashish Kalra, ashish.kalra@freescale.com | 6 | * Ashish Kalra, ashish.kalra@freescale.com |
7 | * Varun Sethi, varun.sethi@freescale.com | 7 | * Varun Sethi, varun.sethi@freescale.com |
8 | * Alexander Graf, agraf@suse.de | ||
8 | * | 9 | * |
9 | * Description: | 10 | * Description: |
10 | * This file is based on arch/powerpc/kvm/44x_tlb.c, | 11 | * This file is based on arch/powerpc/kvm/44x_tlb.c, |
@@ -33,10 +34,7 @@ | |||
33 | #include "e500.h" | 34 | #include "e500.h" |
34 | #include "trace.h" | 35 | #include "trace.h" |
35 | #include "timing.h" | 36 | #include "timing.h" |
36 | 37 | #include "e500_mmu_host.h" | |
37 | #define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1) | ||
38 | |||
39 | static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM]; | ||
40 | 38 | ||
41 | static inline unsigned int gtlb0_get_next_victim( | 39 | static inline unsigned int gtlb0_get_next_victim( |
42 | struct kvmppc_vcpu_e500 *vcpu_e500) | 40 | struct kvmppc_vcpu_e500 *vcpu_e500) |
@@ -50,174 +48,6 @@ static inline unsigned int gtlb0_get_next_victim( | |||
50 | return victim; | 48 | return victim; |
51 | } | 49 | } |
52 | 50 | ||
53 | static inline unsigned int tlb1_max_shadow_size(void) | ||
54 | { | ||
55 | /* reserve one entry for magic page */ | ||
56 | return host_tlb_params[1].entries - tlbcam_index - 1; | ||
57 | } | ||
58 | |||
59 | static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe) | ||
60 | { | ||
61 | return tlbe->mas7_3 & (MAS3_SW|MAS3_UW); | ||
62 | } | ||
63 | |||
64 | static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) | ||
65 | { | ||
66 | /* Mask off reserved bits. */ | ||
67 | mas3 &= MAS3_ATTRIB_MASK; | ||
68 | |||
69 | #ifndef CONFIG_KVM_BOOKE_HV | ||
70 | if (!usermode) { | ||
71 | /* Guest is in supervisor mode, | ||
72 | * so we need to translate guest | ||
73 | * supervisor permissions into user permissions. */ | ||
74 | mas3 &= ~E500_TLB_USER_PERM_MASK; | ||
75 | mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1; | ||
76 | } | ||
77 | mas3 |= E500_TLB_SUPER_PERM_MASK; | ||
78 | #endif | ||
79 | return mas3; | ||
80 | } | ||
81 | |||
82 | static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode) | ||
83 | { | ||
84 | #ifdef CONFIG_SMP | ||
85 | return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M; | ||
86 | #else | ||
87 | return mas2 & MAS2_ATTRIB_MASK; | ||
88 | #endif | ||
89 | } | ||
90 | |||
91 | /* | ||
92 | * writing shadow tlb entry to host TLB | ||
93 | */ | ||
94 | static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, | ||
95 | uint32_t mas0) | ||
96 | { | ||
97 | unsigned long flags; | ||
98 | |||
99 | local_irq_save(flags); | ||
100 | mtspr(SPRN_MAS0, mas0); | ||
101 | mtspr(SPRN_MAS1, stlbe->mas1); | ||
102 | mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2); | ||
103 | mtspr(SPRN_MAS3, (u32)stlbe->mas7_3); | ||
104 | mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32)); | ||
105 | #ifdef CONFIG_KVM_BOOKE_HV | ||
106 | mtspr(SPRN_MAS8, stlbe->mas8); | ||
107 | #endif | ||
108 | asm volatile("isync; tlbwe" : : : "memory"); | ||
109 | |||
110 | #ifdef CONFIG_KVM_BOOKE_HV | ||
111 | /* Must clear mas8 for other host tlbwe's */ | ||
112 | mtspr(SPRN_MAS8, 0); | ||
113 | isync(); | ||
114 | #endif | ||
115 | local_irq_restore(flags); | ||
116 | |||
117 | trace_kvm_booke206_stlb_write(mas0, stlbe->mas8, stlbe->mas1, | ||
118 | stlbe->mas2, stlbe->mas7_3); | ||
119 | } | ||
120 | |||
121 | /* | ||
122 | * Acquire a mas0 with victim hint, as if we just took a TLB miss. | ||
123 | * | ||
124 | * We don't care about the address we're searching for, other than that it's | ||
125 | * in the right set and is not present in the TLB. Using a zero PID and a | ||
126 | * userspace address means we don't have to set and then restore MAS5, or | ||
127 | * calculate a proper MAS6 value. | ||
128 | */ | ||
129 | static u32 get_host_mas0(unsigned long eaddr) | ||
130 | { | ||
131 | unsigned long flags; | ||
132 | u32 mas0; | ||
133 | |||
134 | local_irq_save(flags); | ||
135 | mtspr(SPRN_MAS6, 0); | ||
136 | asm volatile("tlbsx 0, %0" : : "b" (eaddr & ~CONFIG_PAGE_OFFSET)); | ||
137 | mas0 = mfspr(SPRN_MAS0); | ||
138 | local_irq_restore(flags); | ||
139 | |||
140 | return mas0; | ||
141 | } | ||
142 | |||
143 | /* sesel is for tlb1 only */ | ||
144 | static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, | ||
145 | int tlbsel, int sesel, struct kvm_book3e_206_tlb_entry *stlbe) | ||
146 | { | ||
147 | u32 mas0; | ||
148 | |||
149 | if (tlbsel == 0) { | ||
150 | mas0 = get_host_mas0(stlbe->mas2); | ||
151 | __write_host_tlbe(stlbe, mas0); | ||
152 | } else { | ||
153 | __write_host_tlbe(stlbe, | ||
154 | MAS0_TLBSEL(1) | | ||
155 | MAS0_ESEL(to_htlb1_esel(sesel))); | ||
156 | } | ||
157 | } | ||
158 | |||
159 | #ifdef CONFIG_KVM_E500V2 | ||
160 | void kvmppc_map_magic(struct kvm_vcpu *vcpu) | ||
161 | { | ||
162 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); | ||
163 | struct kvm_book3e_206_tlb_entry magic; | ||
164 | ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; | ||
165 | unsigned int stid; | ||
166 | pfn_t pfn; | ||
167 | |||
168 | pfn = (pfn_t)virt_to_phys((void *)shared_page) >> PAGE_SHIFT; | ||
169 | get_page(pfn_to_page(pfn)); | ||
170 | |||
171 | preempt_disable(); | ||
172 | stid = kvmppc_e500_get_sid(vcpu_e500, 0, 0, 0, 0); | ||
173 | |||
174 | magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) | | ||
175 | MAS1_TSIZE(BOOK3E_PAGESZ_4K); | ||
176 | magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M; | ||
177 | magic.mas7_3 = ((u64)pfn << PAGE_SHIFT) | | ||
178 | MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR; | ||
179 | magic.mas8 = 0; | ||
180 | |||
181 | __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index)); | ||
182 | preempt_enable(); | ||
183 | } | ||
184 | #endif | ||
185 | |||
186 | static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, | ||
187 | int tlbsel, int esel) | ||
188 | { | ||
189 | struct kvm_book3e_206_tlb_entry *gtlbe = | ||
190 | get_entry(vcpu_e500, tlbsel, esel); | ||
191 | |||
192 | if (tlbsel == 1 && | ||
193 | vcpu_e500->gtlb_priv[1][esel].ref.flags & E500_TLB_BITMAP) { | ||
194 | u64 tmp = vcpu_e500->g2h_tlb1_map[esel]; | ||
195 | int hw_tlb_indx; | ||
196 | unsigned long flags; | ||
197 | |||
198 | local_irq_save(flags); | ||
199 | while (tmp) { | ||
200 | hw_tlb_indx = __ilog2_u64(tmp & -tmp); | ||
201 | mtspr(SPRN_MAS0, | ||
202 | MAS0_TLBSEL(1) | | ||
203 | MAS0_ESEL(to_htlb1_esel(hw_tlb_indx))); | ||
204 | mtspr(SPRN_MAS1, 0); | ||
205 | asm volatile("tlbwe"); | ||
206 | vcpu_e500->h2g_tlb1_rmap[hw_tlb_indx] = 0; | ||
207 | tmp &= tmp - 1; | ||
208 | } | ||
209 | mb(); | ||
210 | vcpu_e500->g2h_tlb1_map[esel] = 0; | ||
211 | vcpu_e500->gtlb_priv[1][esel].ref.flags &= ~E500_TLB_BITMAP; | ||
212 | local_irq_restore(flags); | ||
213 | |||
214 | return; | ||
215 | } | ||
216 | |||
217 | /* Guest tlbe is backed by at most one host tlbe per shadow pid. */ | ||
218 | kvmppc_e500_tlbil_one(vcpu_e500, gtlbe); | ||
219 | } | ||
220 | |||
221 | static int tlb0_set_base(gva_t addr, int sets, int ways) | 51 | static int tlb0_set_base(gva_t addr, int sets, int ways) |
222 | { | 52 | { |
223 | int set_base; | 53 | int set_base; |
@@ -296,70 +126,6 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, | |||
296 | return -1; | 126 | return -1; |
297 | } | 127 | } |
298 | 128 | ||
299 | static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, | ||
300 | struct kvm_book3e_206_tlb_entry *gtlbe, | ||
301 | pfn_t pfn) | ||
302 | { | ||
303 | ref->pfn = pfn; | ||
304 | ref->flags = E500_TLB_VALID; | ||
305 | |||
306 | if (tlbe_is_writable(gtlbe)) | ||
307 | kvm_set_pfn_dirty(pfn); | ||
308 | } | ||
309 | |||
310 | static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) | ||
311 | { | ||
312 | if (ref->flags & E500_TLB_VALID) { | ||
313 | trace_kvm_booke206_ref_release(ref->pfn, ref->flags); | ||
314 | ref->flags = 0; | ||
315 | } | ||
316 | } | ||
317 | |||
318 | static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500) | ||
319 | { | ||
320 | if (vcpu_e500->g2h_tlb1_map) | ||
321 | memset(vcpu_e500->g2h_tlb1_map, 0, | ||
322 | sizeof(u64) * vcpu_e500->gtlb_params[1].entries); | ||
323 | if (vcpu_e500->h2g_tlb1_rmap) | ||
324 | memset(vcpu_e500->h2g_tlb1_rmap, 0, | ||
325 | sizeof(unsigned int) * host_tlb_params[1].entries); | ||
326 | } | ||
327 | |||
328 | static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500) | ||
329 | { | ||
330 | int tlbsel = 0; | ||
331 | int i; | ||
332 | |||
333 | for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) { | ||
334 | struct tlbe_ref *ref = | ||
335 | &vcpu_e500->gtlb_priv[tlbsel][i].ref; | ||
336 | kvmppc_e500_ref_release(ref); | ||
337 | } | ||
338 | } | ||
339 | |||
340 | static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500) | ||
341 | { | ||
342 | int stlbsel = 1; | ||
343 | int i; | ||
344 | |||
345 | kvmppc_e500_tlbil_all(vcpu_e500); | ||
346 | |||
347 | for (i = 0; i < host_tlb_params[stlbsel].entries; i++) { | ||
348 | struct tlbe_ref *ref = | ||
349 | &vcpu_e500->tlb_refs[stlbsel][i]; | ||
350 | kvmppc_e500_ref_release(ref); | ||
351 | } | ||
352 | |||
353 | clear_tlb_privs(vcpu_e500); | ||
354 | } | ||
355 | |||
356 | void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu) | ||
357 | { | ||
358 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); | ||
359 | clear_tlb_refs(vcpu_e500); | ||
360 | clear_tlb1_bitmap(vcpu_e500); | ||
361 | } | ||
362 | |||
363 | static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, | 129 | static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, |
364 | unsigned int eaddr, int as) | 130 | unsigned int eaddr, int as) |
365 | { | 131 | { |
@@ -385,216 +151,6 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, | |||
385 | | (as ? MAS6_SAS : 0); | 151 | | (as ? MAS6_SAS : 0); |
386 | } | 152 | } |
387 | 153 | ||
388 | /* TID must be supplied by the caller */ | ||
389 | static inline void kvmppc_e500_setup_stlbe( | ||
390 | struct kvm_vcpu *vcpu, | ||
391 | struct kvm_book3e_206_tlb_entry *gtlbe, | ||
392 | int tsize, struct tlbe_ref *ref, u64 gvaddr, | ||
393 | struct kvm_book3e_206_tlb_entry *stlbe) | ||
394 | { | ||
395 | pfn_t pfn = ref->pfn; | ||
396 | u32 pr = vcpu->arch.shared->msr & MSR_PR; | ||
397 | |||
398 | BUG_ON(!(ref->flags & E500_TLB_VALID)); | ||
399 | |||
400 | /* Force IPROT=0 for all guest mappings. */ | ||
401 | stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID; | ||
402 | stlbe->mas2 = (gvaddr & MAS2_EPN) | | ||
403 | e500_shadow_mas2_attrib(gtlbe->mas2, pr); | ||
404 | stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) | | ||
405 | e500_shadow_mas3_attrib(gtlbe->mas7_3, pr); | ||
406 | |||
407 | #ifdef CONFIG_KVM_BOOKE_HV | ||
408 | stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid; | ||
409 | #endif | ||
410 | } | ||
411 | |||
412 | static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, | ||
413 | u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, | ||
414 | int tlbsel, struct kvm_book3e_206_tlb_entry *stlbe, | ||
415 | struct tlbe_ref *ref) | ||
416 | { | ||
417 | struct kvm_memory_slot *slot; | ||
418 | unsigned long pfn = 0; /* silence GCC warning */ | ||
419 | unsigned long hva; | ||
420 | int pfnmap = 0; | ||
421 | int tsize = BOOK3E_PAGESZ_4K; | ||
422 | |||
423 | /* | ||
424 | * Translate guest physical to true physical, acquiring | ||
425 | * a page reference if it is normal, non-reserved memory. | ||
426 | * | ||
427 | * gfn_to_memslot() must succeed because otherwise we wouldn't | ||
428 | * have gotten this far. Eventually we should just pass the slot | ||
429 | * pointer through from the first lookup. | ||
430 | */ | ||
431 | slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn); | ||
432 | hva = gfn_to_hva_memslot(slot, gfn); | ||
433 | |||
434 | if (tlbsel == 1) { | ||
435 | struct vm_area_struct *vma; | ||
436 | down_read(¤t->mm->mmap_sem); | ||
437 | |||
438 | vma = find_vma(current->mm, hva); | ||
439 | if (vma && hva >= vma->vm_start && | ||
440 | (vma->vm_flags & VM_PFNMAP)) { | ||
441 | /* | ||
442 | * This VMA is a physically contiguous region (e.g. | ||
443 | * /dev/mem) that bypasses normal Linux page | ||
444 | * management. Find the overlap between the | ||
445 | * vma and the memslot. | ||
446 | */ | ||
447 | |||
448 | unsigned long start, end; | ||
449 | unsigned long slot_start, slot_end; | ||
450 | |||
451 | pfnmap = 1; | ||
452 | |||
453 | start = vma->vm_pgoff; | ||
454 | end = start + | ||
455 | ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); | ||
456 | |||
457 | pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT); | ||
458 | |||
459 | slot_start = pfn - (gfn - slot->base_gfn); | ||
460 | slot_end = slot_start + slot->npages; | ||
461 | |||
462 | if (start < slot_start) | ||
463 | start = slot_start; | ||
464 | if (end > slot_end) | ||
465 | end = slot_end; | ||
466 | |||
467 | tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> | ||
468 | MAS1_TSIZE_SHIFT; | ||
469 | |||
470 | /* | ||
471 | * e500 doesn't implement the lowest tsize bit, | ||
472 | * or 1K pages. | ||
473 | */ | ||
474 | tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); | ||
475 | |||
476 | /* | ||
477 | * Now find the largest tsize (up to what the guest | ||
478 | * requested) that will cover gfn, stay within the | ||
479 | * range, and for which gfn and pfn are mutually | ||
480 | * aligned. | ||
481 | */ | ||
482 | |||
483 | for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) { | ||
484 | unsigned long gfn_start, gfn_end, tsize_pages; | ||
485 | tsize_pages = 1 << (tsize - 2); | ||
486 | |||
487 | gfn_start = gfn & ~(tsize_pages - 1); | ||
488 | gfn_end = gfn_start + tsize_pages; | ||
489 | |||
490 | if (gfn_start + pfn - gfn < start) | ||
491 | continue; | ||
492 | if (gfn_end + pfn - gfn > end) | ||
493 | continue; | ||
494 | if ((gfn & (tsize_pages - 1)) != | ||
495 | (pfn & (tsize_pages - 1))) | ||
496 | continue; | ||
497 | |||
498 | gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); | ||
499 | pfn &= ~(tsize_pages - 1); | ||
500 | break; | ||
501 | } | ||
502 | } else if (vma && hva >= vma->vm_start && | ||
503 | (vma->vm_flags & VM_HUGETLB)) { | ||
504 | unsigned long psize = vma_kernel_pagesize(vma); | ||
505 | |||
506 | tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> | ||
507 | MAS1_TSIZE_SHIFT; | ||
508 | |||
509 | /* | ||
510 | * Take the largest page size that satisfies both host | ||
511 | * and guest mapping | ||
512 | */ | ||
513 | tsize = min(__ilog2(psize) - 10, tsize); | ||
514 | |||
515 | /* | ||
516 | * e500 doesn't implement the lowest tsize bit, | ||
517 | * or 1K pages. | ||
518 | */ | ||
519 | tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); | ||
520 | } | ||
521 | |||
522 | up_read(¤t->mm->mmap_sem); | ||
523 | } | ||
524 | |||
525 | if (likely(!pfnmap)) { | ||
526 | unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); | ||
527 | pfn = gfn_to_pfn_memslot(slot, gfn); | ||
528 | if (is_error_noslot_pfn(pfn)) { | ||
529 | printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", | ||
530 | (long)gfn); | ||
531 | return; | ||
532 | } | ||
533 | |||
534 | /* Align guest and physical address to page map boundaries */ | ||
535 | pfn &= ~(tsize_pages - 1); | ||
536 | gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); | ||
537 | } | ||
538 | |||
539 | /* Drop old ref and setup new one. */ | ||
540 | kvmppc_e500_ref_release(ref); | ||
541 | kvmppc_e500_ref_setup(ref, gtlbe, pfn); | ||
542 | |||
543 | kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize, | ||
544 | ref, gvaddr, stlbe); | ||
545 | |||
546 | /* Clear i-cache for new pages */ | ||
547 | kvmppc_mmu_flush_icache(pfn); | ||
548 | |||
549 | /* Drop refcount on page, so that mmu notifiers can clear it */ | ||
550 | kvm_release_pfn_clean(pfn); | ||
551 | } | ||
552 | |||
553 | /* XXX only map the one-one case, for now use TLB0 */ | ||
554 | static void kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, | ||
555 | int esel, | ||
556 | struct kvm_book3e_206_tlb_entry *stlbe) | ||
557 | { | ||
558 | struct kvm_book3e_206_tlb_entry *gtlbe; | ||
559 | struct tlbe_ref *ref; | ||
560 | |||
561 | gtlbe = get_entry(vcpu_e500, 0, esel); | ||
562 | ref = &vcpu_e500->gtlb_priv[0][esel].ref; | ||
563 | |||
564 | kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe), | ||
565 | get_tlb_raddr(gtlbe) >> PAGE_SHIFT, | ||
566 | gtlbe, 0, stlbe, ref); | ||
567 | } | ||
568 | |||
569 | /* Caller must ensure that the specified guest TLB entry is safe to insert into | ||
570 | * the shadow TLB. */ | ||
571 | /* XXX for both one-one and one-to-many , for now use TLB1 */ | ||
572 | static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, | ||
573 | u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, | ||
574 | struct kvm_book3e_206_tlb_entry *stlbe, int esel) | ||
575 | { | ||
576 | struct tlbe_ref *ref; | ||
577 | unsigned int victim; | ||
578 | |||
579 | victim = vcpu_e500->host_tlb1_nv++; | ||
580 | |||
581 | if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size())) | ||
582 | vcpu_e500->host_tlb1_nv = 0; | ||
583 | |||
584 | ref = &vcpu_e500->tlb_refs[1][victim]; | ||
585 | kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, ref); | ||
586 | |||
587 | vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << victim; | ||
588 | vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; | ||
589 | if (vcpu_e500->h2g_tlb1_rmap[victim]) { | ||
590 | unsigned int idx = vcpu_e500->h2g_tlb1_rmap[victim]; | ||
591 | vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << victim); | ||
592 | } | ||
593 | vcpu_e500->h2g_tlb1_rmap[victim] = esel; | ||
594 | |||
595 | return victim; | ||
596 | } | ||
597 | |||
598 | static void kvmppc_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500) | 154 | static void kvmppc_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500) |
599 | { | 155 | { |
600 | int size = vcpu_e500->gtlb_params[1].entries; | 156 | int size = vcpu_e500->gtlb_params[1].entries; |
@@ -683,8 +239,8 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value) | |||
683 | for (esel = 0; esel < vcpu_e500->gtlb_params[1].entries; esel++) | 239 | for (esel = 0; esel < vcpu_e500->gtlb_params[1].entries; esel++) |
684 | kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel); | 240 | kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel); |
685 | 241 | ||
686 | /* Invalidate all vcpu id mappings */ | 242 | /* Invalidate all host shadow mappings */ |
687 | kvmppc_e500_tlbil_all(vcpu_e500); | 243 | kvmppc_core_flush_tlb(&vcpu_e500->vcpu); |
688 | 244 | ||
689 | return EMULATE_DONE; | 245 | return EMULATE_DONE; |
690 | } | 246 | } |
@@ -713,8 +269,8 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea) | |||
713 | kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); | 269 | kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); |
714 | } | 270 | } |
715 | 271 | ||
716 | /* Invalidate all vcpu id mappings */ | 272 | /* Invalidate all host shadow mappings */ |
717 | kvmppc_e500_tlbil_all(vcpu_e500); | 273 | kvmppc_core_flush_tlb(&vcpu_e500->vcpu); |
718 | 274 | ||
719 | return EMULATE_DONE; | 275 | return EMULATE_DONE; |
720 | } | 276 | } |
@@ -834,27 +390,11 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea) | |||
834 | return EMULATE_DONE; | 390 | return EMULATE_DONE; |
835 | } | 391 | } |
836 | 392 | ||
837 | /* sesel is for tlb1 only */ | ||
838 | static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, | ||
839 | struct kvm_book3e_206_tlb_entry *gtlbe, | ||
840 | struct kvm_book3e_206_tlb_entry *stlbe, | ||
841 | int stlbsel, int sesel) | ||
842 | { | ||
843 | int stid; | ||
844 | |||
845 | preempt_disable(); | ||
846 | stid = kvmppc_e500_get_tlb_stid(&vcpu_e500->vcpu, gtlbe); | ||
847 | |||
848 | stlbe->mas1 |= MAS1_TID(stid); | ||
849 | write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe); | ||
850 | preempt_enable(); | ||
851 | } | ||
852 | |||
853 | int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) | 393 | int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) |
854 | { | 394 | { |
855 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); | 395 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); |
856 | struct kvm_book3e_206_tlb_entry *gtlbe, stlbe; | 396 | struct kvm_book3e_206_tlb_entry *gtlbe; |
857 | int tlbsel, esel, stlbsel, sesel; | 397 | int tlbsel, esel; |
858 | int recal = 0; | 398 | int recal = 0; |
859 | 399 | ||
860 | tlbsel = get_tlb_tlbsel(vcpu); | 400 | tlbsel = get_tlb_tlbsel(vcpu); |
@@ -892,40 +432,16 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) | |||
892 | 432 | ||
893 | /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ | 433 | /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ |
894 | if (tlbe_is_host_safe(vcpu, gtlbe)) { | 434 | if (tlbe_is_host_safe(vcpu, gtlbe)) { |
895 | u64 eaddr; | 435 | u64 eaddr = get_tlb_eaddr(gtlbe); |
896 | u64 raddr; | 436 | u64 raddr = get_tlb_raddr(gtlbe); |
897 | 437 | ||
898 | switch (tlbsel) { | 438 | if (tlbsel == 0) { |
899 | case 0: | ||
900 | /* TLB0 */ | ||
901 | gtlbe->mas1 &= ~MAS1_TSIZE(~0); | 439 | gtlbe->mas1 &= ~MAS1_TSIZE(~0); |
902 | gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K); | 440 | gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K); |
903 | |||
904 | stlbsel = 0; | ||
905 | kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); | ||
906 | sesel = 0; /* unused */ | ||
907 | |||
908 | break; | ||
909 | |||
910 | case 1: | ||
911 | /* TLB1 */ | ||
912 | eaddr = get_tlb_eaddr(gtlbe); | ||
913 | raddr = get_tlb_raddr(gtlbe); | ||
914 | |||
915 | /* Create a 4KB mapping on the host. | ||
916 | * If the guest wanted a large page, | ||
917 | * only the first 4KB is mapped here and the rest | ||
918 | * are mapped on the fly. */ | ||
919 | stlbsel = 1; | ||
920 | sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, | ||
921 | raddr >> PAGE_SHIFT, gtlbe, &stlbe, esel); | ||
922 | break; | ||
923 | |||
924 | default: | ||
925 | BUG(); | ||
926 | } | 441 | } |
927 | 442 | ||
928 | write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel); | 443 | /* Premap the faulting page */ |
444 | kvmppc_mmu_map(vcpu, eaddr, raddr, index_of(tlbsel, esel)); | ||
929 | } | 445 | } |
930 | 446 | ||
931 | kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); | 447 | kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); |
@@ -1019,100 +535,14 @@ void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) | |||
1019 | { | 535 | { |
1020 | } | 536 | } |
1021 | 537 | ||
1022 | void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, | ||
1023 | unsigned int index) | ||
1024 | { | ||
1025 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); | ||
1026 | struct tlbe_priv *priv; | ||
1027 | struct kvm_book3e_206_tlb_entry *gtlbe, stlbe; | ||
1028 | int tlbsel = tlbsel_of(index); | ||
1029 | int esel = esel_of(index); | ||
1030 | int stlbsel, sesel; | ||
1031 | |||
1032 | gtlbe = get_entry(vcpu_e500, tlbsel, esel); | ||
1033 | |||
1034 | switch (tlbsel) { | ||
1035 | case 0: | ||
1036 | stlbsel = 0; | ||
1037 | sesel = 0; /* unused */ | ||
1038 | priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; | ||
1039 | |||
1040 | /* Only triggers after clear_tlb_refs */ | ||
1041 | if (unlikely(!(priv->ref.flags & E500_TLB_VALID))) | ||
1042 | kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); | ||
1043 | else | ||
1044 | kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K, | ||
1045 | &priv->ref, eaddr, &stlbe); | ||
1046 | break; | ||
1047 | |||
1048 | case 1: { | ||
1049 | gfn_t gfn = gpaddr >> PAGE_SHIFT; | ||
1050 | |||
1051 | stlbsel = 1; | ||
1052 | sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, | ||
1053 | gtlbe, &stlbe, esel); | ||
1054 | break; | ||
1055 | } | ||
1056 | |||
1057 | default: | ||
1058 | BUG(); | ||
1059 | break; | ||
1060 | } | ||
1061 | |||
1062 | write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel); | ||
1063 | } | ||
1064 | |||
1065 | /************* MMU Notifiers *************/ | ||
1066 | |||
1067 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | ||
1068 | { | ||
1069 | trace_kvm_unmap_hva(hva); | ||
1070 | |||
1071 | /* | ||
1072 | * Flush all shadow tlb entries everywhere. This is slow, but | ||
1073 | * we are 100% sure that we catch the to be unmapped page | ||
1074 | */ | ||
1075 | kvm_flush_remote_tlbs(kvm); | ||
1076 | |||
1077 | return 0; | ||
1078 | } | ||
1079 | |||
1080 | int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) | ||
1081 | { | ||
1082 | /* kvm_unmap_hva flushes everything anyways */ | ||
1083 | kvm_unmap_hva(kvm, start); | ||
1084 | |||
1085 | return 0; | ||
1086 | } | ||
1087 | |||
1088 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) | ||
1089 | { | ||
1090 | /* XXX could be more clever ;) */ | ||
1091 | return 0; | ||
1092 | } | ||
1093 | |||
1094 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) | ||
1095 | { | ||
1096 | /* XXX could be more clever ;) */ | ||
1097 | return 0; | ||
1098 | } | ||
1099 | |||
1100 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) | ||
1101 | { | ||
1102 | /* The page will get remapped properly on its next fault */ | ||
1103 | kvm_unmap_hva(kvm, hva); | ||
1104 | } | ||
1105 | |||
1106 | /*****************************************/ | 538 | /*****************************************/ |
1107 | 539 | ||
1108 | static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) | 540 | static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) |
1109 | { | 541 | { |
1110 | int i; | 542 | int i; |
1111 | 543 | ||
1112 | clear_tlb1_bitmap(vcpu_e500); | 544 | kvmppc_core_flush_tlb(&vcpu_e500->vcpu); |
1113 | kfree(vcpu_e500->g2h_tlb1_map); | 545 | kfree(vcpu_e500->g2h_tlb1_map); |
1114 | |||
1115 | clear_tlb_refs(vcpu_e500); | ||
1116 | kfree(vcpu_e500->gtlb_priv[0]); | 546 | kfree(vcpu_e500->gtlb_priv[0]); |
1117 | kfree(vcpu_e500->gtlb_priv[1]); | 547 | kfree(vcpu_e500->gtlb_priv[1]); |
1118 | 548 | ||
@@ -1303,7 +733,7 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, | |||
1303 | { | 733 | { |
1304 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); | 734 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); |
1305 | kvmppc_recalc_tlb1map_range(vcpu_e500); | 735 | kvmppc_recalc_tlb1map_range(vcpu_e500); |
1306 | clear_tlb_refs(vcpu_e500); | 736 | kvmppc_core_flush_tlb(vcpu); |
1307 | return 0; | 737 | return 0; |
1308 | } | 738 | } |
1309 | 739 | ||
@@ -1313,37 +743,8 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) | |||
1313 | int entry_size = sizeof(struct kvm_book3e_206_tlb_entry); | 743 | int entry_size = sizeof(struct kvm_book3e_206_tlb_entry); |
1314 | int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE; | 744 | int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE; |
1315 | 745 | ||
1316 | host_tlb_params[0].entries = mfspr(SPRN_TLB0CFG) & TLBnCFG_N_ENTRY; | 746 | if (e500_mmu_host_init(vcpu_e500)) |
1317 | host_tlb_params[1].entries = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; | 747 | goto err; |
1318 | |||
1319 | /* | ||
1320 | * This should never happen on real e500 hardware, but is | ||
1321 | * architecturally possible -- e.g. in some weird nested | ||
1322 | * virtualization case. | ||
1323 | */ | ||
1324 | if (host_tlb_params[0].entries == 0 || | ||
1325 | host_tlb_params[1].entries == 0) { | ||
1326 | pr_err("%s: need to know host tlb size\n", __func__); | ||
1327 | return -ENODEV; | ||
1328 | } | ||
1329 | |||
1330 | host_tlb_params[0].ways = (mfspr(SPRN_TLB0CFG) & TLBnCFG_ASSOC) >> | ||
1331 | TLBnCFG_ASSOC_SHIFT; | ||
1332 | host_tlb_params[1].ways = host_tlb_params[1].entries; | ||
1333 | |||
1334 | if (!is_power_of_2(host_tlb_params[0].entries) || | ||
1335 | !is_power_of_2(host_tlb_params[0].ways) || | ||
1336 | host_tlb_params[0].entries < host_tlb_params[0].ways || | ||
1337 | host_tlb_params[0].ways == 0) { | ||
1338 | pr_err("%s: bad tlb0 host config: %u entries %u ways\n", | ||
1339 | __func__, host_tlb_params[0].entries, | ||
1340 | host_tlb_params[0].ways); | ||
1341 | return -ENODEV; | ||
1342 | } | ||
1343 | |||
1344 | host_tlb_params[0].sets = | ||
1345 | host_tlb_params[0].entries / host_tlb_params[0].ways; | ||
1346 | host_tlb_params[1].sets = 1; | ||
1347 | 748 | ||
1348 | vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE; | 749 | vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE; |
1349 | vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE; | 750 | vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE; |
@@ -1362,18 +763,6 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) | |||
1362 | vcpu_e500->gtlb_offset[0] = 0; | 763 | vcpu_e500->gtlb_offset[0] = 0; |
1363 | vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE; | 764 | vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE; |
1364 | 765 | ||
1365 | vcpu_e500->tlb_refs[0] = | ||
1366 | kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[0].entries, | ||
1367 | GFP_KERNEL); | ||
1368 | if (!vcpu_e500->tlb_refs[0]) | ||
1369 | goto err; | ||
1370 | |||
1371 | vcpu_e500->tlb_refs[1] = | ||
1372 | kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[1].entries, | ||
1373 | GFP_KERNEL); | ||
1374 | if (!vcpu_e500->tlb_refs[1]) | ||
1375 | goto err; | ||
1376 | |||
1377 | vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) * | 766 | vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) * |
1378 | vcpu_e500->gtlb_params[0].entries, | 767 | vcpu_e500->gtlb_params[0].entries, |
1379 | GFP_KERNEL); | 768 | GFP_KERNEL); |
@@ -1392,12 +781,6 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) | |||
1392 | if (!vcpu_e500->g2h_tlb1_map) | 781 | if (!vcpu_e500->g2h_tlb1_map) |
1393 | goto err; | 782 | goto err; |
1394 | 783 | ||
1395 | vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) * | ||
1396 | host_tlb_params[1].entries, | ||
1397 | GFP_KERNEL); | ||
1398 | if (!vcpu_e500->h2g_tlb1_rmap) | ||
1399 | goto err; | ||
1400 | |||
1401 | /* Init TLB configuration register */ | 784 | /* Init TLB configuration register */ |
1402 | vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) & | 785 | vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) & |
1403 | ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); | 786 | ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); |
@@ -1416,15 +799,11 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) | |||
1416 | 799 | ||
1417 | err: | 800 | err: |
1418 | free_gtlb(vcpu_e500); | 801 | free_gtlb(vcpu_e500); |
1419 | kfree(vcpu_e500->tlb_refs[0]); | ||
1420 | kfree(vcpu_e500->tlb_refs[1]); | ||
1421 | return -1; | 802 | return -1; |
1422 | } | 803 | } |
1423 | 804 | ||
1424 | void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) | 805 | void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) |
1425 | { | 806 | { |
1426 | free_gtlb(vcpu_e500); | 807 | free_gtlb(vcpu_e500); |
1427 | kfree(vcpu_e500->h2g_tlb1_rmap); | 808 | e500_mmu_host_uninit(vcpu_e500); |
1428 | kfree(vcpu_e500->tlb_refs[0]); | ||
1429 | kfree(vcpu_e500->tlb_refs[1]); | ||
1430 | } | 809 | } |
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c new file mode 100644 index 000000000000..a222edfb9a9b --- /dev/null +++ b/arch/powerpc/kvm/e500_mmu_host.c | |||
@@ -0,0 +1,699 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved. | ||
3 | * | ||
4 | * Author: Yu Liu, yu.liu@freescale.com | ||
5 | * Scott Wood, scottwood@freescale.com | ||
6 | * Ashish Kalra, ashish.kalra@freescale.com | ||
7 | * Varun Sethi, varun.sethi@freescale.com | ||
8 | * Alexander Graf, agraf@suse.de | ||
9 | * | ||
10 | * Description: | ||
11 | * This file is based on arch/powerpc/kvm/44x_tlb.c, | ||
12 | * by Hollis Blanchard <hollisb@us.ibm.com>. | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or modify | ||
15 | * it under the terms of the GNU General Public License, version 2, as | ||
16 | * published by the Free Software Foundation. | ||
17 | */ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/types.h> | ||
21 | #include <linux/slab.h> | ||
22 | #include <linux/string.h> | ||
23 | #include <linux/kvm.h> | ||
24 | #include <linux/kvm_host.h> | ||
25 | #include <linux/highmem.h> | ||
26 | #include <linux/log2.h> | ||
27 | #include <linux/uaccess.h> | ||
28 | #include <linux/sched.h> | ||
29 | #include <linux/rwsem.h> | ||
30 | #include <linux/vmalloc.h> | ||
31 | #include <linux/hugetlb.h> | ||
32 | #include <asm/kvm_ppc.h> | ||
33 | |||
34 | #include "e500.h" | ||
35 | #include "trace.h" | ||
36 | #include "timing.h" | ||
37 | #include "e500_mmu_host.h" | ||
38 | |||
39 | #define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1) | ||
40 | |||
41 | static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM]; | ||
42 | |||
43 | static inline unsigned int tlb1_max_shadow_size(void) | ||
44 | { | ||
45 | /* reserve one entry for magic page */ | ||
46 | return host_tlb_params[1].entries - tlbcam_index - 1; | ||
47 | } | ||
48 | |||
49 | static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) | ||
50 | { | ||
51 | /* Mask off reserved bits. */ | ||
52 | mas3 &= MAS3_ATTRIB_MASK; | ||
53 | |||
54 | #ifndef CONFIG_KVM_BOOKE_HV | ||
55 | if (!usermode) { | ||
56 | /* Guest is in supervisor mode, | ||
57 | * so we need to translate guest | ||
58 | * supervisor permissions into user permissions. */ | ||
59 | mas3 &= ~E500_TLB_USER_PERM_MASK; | ||
60 | mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1; | ||
61 | } | ||
62 | mas3 |= E500_TLB_SUPER_PERM_MASK; | ||
63 | #endif | ||
64 | return mas3; | ||
65 | } | ||
66 | |||
67 | static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode) | ||
68 | { | ||
69 | #ifdef CONFIG_SMP | ||
70 | return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M; | ||
71 | #else | ||
72 | return mas2 & MAS2_ATTRIB_MASK; | ||
73 | #endif | ||
74 | } | ||
75 | |||
76 | /* | ||
77 | * writing shadow tlb entry to host TLB | ||
78 | */ | ||
79 | static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, | ||
80 | uint32_t mas0) | ||
81 | { | ||
82 | unsigned long flags; | ||
83 | |||
84 | local_irq_save(flags); | ||
85 | mtspr(SPRN_MAS0, mas0); | ||
86 | mtspr(SPRN_MAS1, stlbe->mas1); | ||
87 | mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2); | ||
88 | mtspr(SPRN_MAS3, (u32)stlbe->mas7_3); | ||
89 | mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32)); | ||
90 | #ifdef CONFIG_KVM_BOOKE_HV | ||
91 | mtspr(SPRN_MAS8, stlbe->mas8); | ||
92 | #endif | ||
93 | asm volatile("isync; tlbwe" : : : "memory"); | ||
94 | |||
95 | #ifdef CONFIG_KVM_BOOKE_HV | ||
96 | /* Must clear mas8 for other host tlbwe's */ | ||
97 | mtspr(SPRN_MAS8, 0); | ||
98 | isync(); | ||
99 | #endif | ||
100 | local_irq_restore(flags); | ||
101 | |||
102 | trace_kvm_booke206_stlb_write(mas0, stlbe->mas8, stlbe->mas1, | ||
103 | stlbe->mas2, stlbe->mas7_3); | ||
104 | } | ||
105 | |||
106 | /* | ||
107 | * Acquire a mas0 with victim hint, as if we just took a TLB miss. | ||
108 | * | ||
109 | * We don't care about the address we're searching for, other than that it's | ||
110 | * in the right set and is not present in the TLB. Using a zero PID and a | ||
111 | * userspace address means we don't have to set and then restore MAS5, or | ||
112 | * calculate a proper MAS6 value. | ||
113 | */ | ||
114 | static u32 get_host_mas0(unsigned long eaddr) | ||
115 | { | ||
116 | unsigned long flags; | ||
117 | u32 mas0; | ||
118 | |||
119 | local_irq_save(flags); | ||
120 | mtspr(SPRN_MAS6, 0); | ||
121 | asm volatile("tlbsx 0, %0" : : "b" (eaddr & ~CONFIG_PAGE_OFFSET)); | ||
122 | mas0 = mfspr(SPRN_MAS0); | ||
123 | local_irq_restore(flags); | ||
124 | |||
125 | return mas0; | ||
126 | } | ||
127 | |||
128 | /* sesel is for tlb1 only */ | ||
129 | static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, | ||
130 | int tlbsel, int sesel, struct kvm_book3e_206_tlb_entry *stlbe) | ||
131 | { | ||
132 | u32 mas0; | ||
133 | |||
134 | if (tlbsel == 0) { | ||
135 | mas0 = get_host_mas0(stlbe->mas2); | ||
136 | __write_host_tlbe(stlbe, mas0); | ||
137 | } else { | ||
138 | __write_host_tlbe(stlbe, | ||
139 | MAS0_TLBSEL(1) | | ||
140 | MAS0_ESEL(to_htlb1_esel(sesel))); | ||
141 | } | ||
142 | } | ||
143 | |||
144 | /* sesel is for tlb1 only */ | ||
145 | static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, | ||
146 | struct kvm_book3e_206_tlb_entry *gtlbe, | ||
147 | struct kvm_book3e_206_tlb_entry *stlbe, | ||
148 | int stlbsel, int sesel) | ||
149 | { | ||
150 | int stid; | ||
151 | |||
152 | preempt_disable(); | ||
153 | stid = kvmppc_e500_get_tlb_stid(&vcpu_e500->vcpu, gtlbe); | ||
154 | |||
155 | stlbe->mas1 |= MAS1_TID(stid); | ||
156 | write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe); | ||
157 | preempt_enable(); | ||
158 | } | ||
159 | |||
160 | #ifdef CONFIG_KVM_E500V2 | ||
161 | /* XXX should be a hook in the gva2hpa translation */ | ||
162 | void kvmppc_map_magic(struct kvm_vcpu *vcpu) | ||
163 | { | ||
164 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); | ||
165 | struct kvm_book3e_206_tlb_entry magic; | ||
166 | ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; | ||
167 | unsigned int stid; | ||
168 | pfn_t pfn; | ||
169 | |||
170 | pfn = (pfn_t)virt_to_phys((void *)shared_page) >> PAGE_SHIFT; | ||
171 | get_page(pfn_to_page(pfn)); | ||
172 | |||
173 | preempt_disable(); | ||
174 | stid = kvmppc_e500_get_sid(vcpu_e500, 0, 0, 0, 0); | ||
175 | |||
176 | magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) | | ||
177 | MAS1_TSIZE(BOOK3E_PAGESZ_4K); | ||
178 | magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M; | ||
179 | magic.mas7_3 = ((u64)pfn << PAGE_SHIFT) | | ||
180 | MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR; | ||
181 | magic.mas8 = 0; | ||
182 | |||
183 | __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index)); | ||
184 | preempt_enable(); | ||
185 | } | ||
186 | #endif | ||
187 | |||
188 | void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, | ||
189 | int esel) | ||
190 | { | ||
191 | struct kvm_book3e_206_tlb_entry *gtlbe = | ||
192 | get_entry(vcpu_e500, tlbsel, esel); | ||
193 | struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[tlbsel][esel].ref; | ||
194 | |||
195 | /* Don't bother with unmapped entries */ | ||
196 | if (!(ref->flags & E500_TLB_VALID)) | ||
197 | return; | ||
198 | |||
199 | if (tlbsel == 1 && ref->flags & E500_TLB_BITMAP) { | ||
200 | u64 tmp = vcpu_e500->g2h_tlb1_map[esel]; | ||
201 | int hw_tlb_indx; | ||
202 | unsigned long flags; | ||
203 | |||
204 | local_irq_save(flags); | ||
205 | while (tmp) { | ||
206 | hw_tlb_indx = __ilog2_u64(tmp & -tmp); | ||
207 | mtspr(SPRN_MAS0, | ||
208 | MAS0_TLBSEL(1) | | ||
209 | MAS0_ESEL(to_htlb1_esel(hw_tlb_indx))); | ||
210 | mtspr(SPRN_MAS1, 0); | ||
211 | asm volatile("tlbwe"); | ||
212 | vcpu_e500->h2g_tlb1_rmap[hw_tlb_indx] = 0; | ||
213 | tmp &= tmp - 1; | ||
214 | } | ||
215 | mb(); | ||
216 | vcpu_e500->g2h_tlb1_map[esel] = 0; | ||
217 | ref->flags &= ~(E500_TLB_BITMAP | E500_TLB_VALID); | ||
218 | local_irq_restore(flags); | ||
219 | } | ||
220 | |||
221 | if (tlbsel == 1 && ref->flags & E500_TLB_TLB0) { | ||
222 | /* | ||
223 | * TLB1 entry is backed by 4k pages. This should happen | ||
224 | * rarely and is not worth optimizing. Invalidate everything. | ||
225 | */ | ||
226 | kvmppc_e500_tlbil_all(vcpu_e500); | ||
227 | ref->flags &= ~(E500_TLB_TLB0 | E500_TLB_VALID); | ||
228 | } | ||
229 | |||
230 | /* Already invalidated in between */ | ||
231 | if (!(ref->flags & E500_TLB_VALID)) | ||
232 | return; | ||
233 | |||
234 | /* Guest tlbe is backed by at most one host tlbe per shadow pid. */ | ||
235 | kvmppc_e500_tlbil_one(vcpu_e500, gtlbe); | ||
236 | |||
237 | /* Mark the TLB as not backed by the host anymore */ | ||
238 | ref->flags &= ~E500_TLB_VALID; | ||
239 | } | ||
240 | |||
241 | static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe) | ||
242 | { | ||
243 | return tlbe->mas7_3 & (MAS3_SW|MAS3_UW); | ||
244 | } | ||
245 | |||
246 | static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, | ||
247 | struct kvm_book3e_206_tlb_entry *gtlbe, | ||
248 | pfn_t pfn) | ||
249 | { | ||
250 | ref->pfn = pfn; | ||
251 | ref->flags = E500_TLB_VALID; | ||
252 | |||
253 | if (tlbe_is_writable(gtlbe)) | ||
254 | kvm_set_pfn_dirty(pfn); | ||
255 | } | ||
256 | |||
257 | static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) | ||
258 | { | ||
259 | if (ref->flags & E500_TLB_VALID) { | ||
260 | trace_kvm_booke206_ref_release(ref->pfn, ref->flags); | ||
261 | ref->flags = 0; | ||
262 | } | ||
263 | } | ||
264 | |||
265 | static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500) | ||
266 | { | ||
267 | if (vcpu_e500->g2h_tlb1_map) | ||
268 | memset(vcpu_e500->g2h_tlb1_map, 0, | ||
269 | sizeof(u64) * vcpu_e500->gtlb_params[1].entries); | ||
270 | if (vcpu_e500->h2g_tlb1_rmap) | ||
271 | memset(vcpu_e500->h2g_tlb1_rmap, 0, | ||
272 | sizeof(unsigned int) * host_tlb_params[1].entries); | ||
273 | } | ||
274 | |||
275 | static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500) | ||
276 | { | ||
277 | int tlbsel = 0; | ||
278 | int i; | ||
279 | |||
280 | for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) { | ||
281 | struct tlbe_ref *ref = | ||
282 | &vcpu_e500->gtlb_priv[tlbsel][i].ref; | ||
283 | kvmppc_e500_ref_release(ref); | ||
284 | } | ||
285 | } | ||
286 | |||
287 | static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500) | ||
288 | { | ||
289 | int stlbsel = 1; | ||
290 | int i; | ||
291 | |||
292 | kvmppc_e500_tlbil_all(vcpu_e500); | ||
293 | |||
294 | for (i = 0; i < host_tlb_params[stlbsel].entries; i++) { | ||
295 | struct tlbe_ref *ref = | ||
296 | &vcpu_e500->tlb_refs[stlbsel][i]; | ||
297 | kvmppc_e500_ref_release(ref); | ||
298 | } | ||
299 | |||
300 | clear_tlb_privs(vcpu_e500); | ||
301 | } | ||
302 | |||
303 | void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu) | ||
304 | { | ||
305 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); | ||
306 | clear_tlb_refs(vcpu_e500); | ||
307 | clear_tlb1_bitmap(vcpu_e500); | ||
308 | } | ||
309 | |||
310 | /* TID must be supplied by the caller */ | ||
311 | static void kvmppc_e500_setup_stlbe( | ||
312 | struct kvm_vcpu *vcpu, | ||
313 | struct kvm_book3e_206_tlb_entry *gtlbe, | ||
314 | int tsize, struct tlbe_ref *ref, u64 gvaddr, | ||
315 | struct kvm_book3e_206_tlb_entry *stlbe) | ||
316 | { | ||
317 | pfn_t pfn = ref->pfn; | ||
318 | u32 pr = vcpu->arch.shared->msr & MSR_PR; | ||
319 | |||
320 | BUG_ON(!(ref->flags & E500_TLB_VALID)); | ||
321 | |||
322 | /* Force IPROT=0 for all guest mappings. */ | ||
323 | stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID; | ||
324 | stlbe->mas2 = (gvaddr & MAS2_EPN) | | ||
325 | e500_shadow_mas2_attrib(gtlbe->mas2, pr); | ||
326 | stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) | | ||
327 | e500_shadow_mas3_attrib(gtlbe->mas7_3, pr); | ||
328 | |||
329 | #ifdef CONFIG_KVM_BOOKE_HV | ||
330 | stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid; | ||
331 | #endif | ||
332 | } | ||
333 | |||
334 | static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, | ||
335 | u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, | ||
336 | int tlbsel, struct kvm_book3e_206_tlb_entry *stlbe, | ||
337 | struct tlbe_ref *ref) | ||
338 | { | ||
339 | struct kvm_memory_slot *slot; | ||
340 | unsigned long pfn = 0; /* silence GCC warning */ | ||
341 | unsigned long hva; | ||
342 | int pfnmap = 0; | ||
343 | int tsize = BOOK3E_PAGESZ_4K; | ||
344 | |||
345 | /* | ||
346 | * Translate guest physical to true physical, acquiring | ||
347 | * a page reference if it is normal, non-reserved memory. | ||
348 | * | ||
349 | * gfn_to_memslot() must succeed because otherwise we wouldn't | ||
350 | * have gotten this far. Eventually we should just pass the slot | ||
351 | * pointer through from the first lookup. | ||
352 | */ | ||
353 | slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn); | ||
354 | hva = gfn_to_hva_memslot(slot, gfn); | ||
355 | |||
356 | if (tlbsel == 1) { | ||
357 | struct vm_area_struct *vma; | ||
358 | down_read(¤t->mm->mmap_sem); | ||
359 | |||
360 | vma = find_vma(current->mm, hva); | ||
361 | if (vma && hva >= vma->vm_start && | ||
362 | (vma->vm_flags & VM_PFNMAP)) { | ||
363 | /* | ||
364 | * This VMA is a physically contiguous region (e.g. | ||
365 | * /dev/mem) that bypasses normal Linux page | ||
366 | * management. Find the overlap between the | ||
367 | * vma and the memslot. | ||
368 | */ | ||
369 | |||
370 | unsigned long start, end; | ||
371 | unsigned long slot_start, slot_end; | ||
372 | |||
373 | pfnmap = 1; | ||
374 | |||
375 | start = vma->vm_pgoff; | ||
376 | end = start + | ||
377 | ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); | ||
378 | |||
379 | pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT); | ||
380 | |||
381 | slot_start = pfn - (gfn - slot->base_gfn); | ||
382 | slot_end = slot_start + slot->npages; | ||
383 | |||
384 | if (start < slot_start) | ||
385 | start = slot_start; | ||
386 | if (end > slot_end) | ||
387 | end = slot_end; | ||
388 | |||
389 | tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> | ||
390 | MAS1_TSIZE_SHIFT; | ||
391 | |||
392 | /* | ||
393 | * e500 doesn't implement the lowest tsize bit, | ||
394 | * or 1K pages. | ||
395 | */ | ||
396 | tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); | ||
397 | |||
398 | /* | ||
399 | * Now find the largest tsize (up to what the guest | ||
400 | * requested) that will cover gfn, stay within the | ||
401 | * range, and for which gfn and pfn are mutually | ||
402 | * aligned. | ||
403 | */ | ||
404 | |||
405 | for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) { | ||
406 | unsigned long gfn_start, gfn_end, tsize_pages; | ||
407 | tsize_pages = 1 << (tsize - 2); | ||
408 | |||
409 | gfn_start = gfn & ~(tsize_pages - 1); | ||
410 | gfn_end = gfn_start + tsize_pages; | ||
411 | |||
412 | if (gfn_start + pfn - gfn < start) | ||
413 | continue; | ||
414 | if (gfn_end + pfn - gfn > end) | ||
415 | continue; | ||
416 | if ((gfn & (tsize_pages - 1)) != | ||
417 | (pfn & (tsize_pages - 1))) | ||
418 | continue; | ||
419 | |||
420 | gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); | ||
421 | pfn &= ~(tsize_pages - 1); | ||
422 | break; | ||
423 | } | ||
424 | } else if (vma && hva >= vma->vm_start && | ||
425 | (vma->vm_flags & VM_HUGETLB)) { | ||
426 | unsigned long psize = vma_kernel_pagesize(vma); | ||
427 | |||
428 | tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> | ||
429 | MAS1_TSIZE_SHIFT; | ||
430 | |||
431 | /* | ||
432 | * Take the largest page size that satisfies both host | ||
433 | * and guest mapping | ||
434 | */ | ||
435 | tsize = min(__ilog2(psize) - 10, tsize); | ||
436 | |||
437 | /* | ||
438 | * e500 doesn't implement the lowest tsize bit, | ||
439 | * or 1K pages. | ||
440 | */ | ||
441 | tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); | ||
442 | } | ||
443 | |||
444 | up_read(¤t->mm->mmap_sem); | ||
445 | } | ||
446 | |||
447 | if (likely(!pfnmap)) { | ||
448 | unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); | ||
449 | pfn = gfn_to_pfn_memslot(slot, gfn); | ||
450 | if (is_error_noslot_pfn(pfn)) { | ||
451 | printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", | ||
452 | (long)gfn); | ||
453 | return -EINVAL; | ||
454 | } | ||
455 | |||
456 | /* Align guest and physical address to page map boundaries */ | ||
457 | pfn &= ~(tsize_pages - 1); | ||
458 | gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); | ||
459 | } | ||
460 | |||
461 | /* Drop old ref and setup new one. */ | ||
462 | kvmppc_e500_ref_release(ref); | ||
463 | kvmppc_e500_ref_setup(ref, gtlbe, pfn); | ||
464 | |||
465 | kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize, | ||
466 | ref, gvaddr, stlbe); | ||
467 | |||
468 | /* Clear i-cache for new pages */ | ||
469 | kvmppc_mmu_flush_icache(pfn); | ||
470 | |||
471 | /* Drop refcount on page, so that mmu notifiers can clear it */ | ||
472 | kvm_release_pfn_clean(pfn); | ||
473 | |||
474 | return 0; | ||
475 | } | ||
476 | |||
477 | /* XXX only map the one-one case, for now use TLB0 */ | ||
478 | static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, int esel, | ||
479 | struct kvm_book3e_206_tlb_entry *stlbe) | ||
480 | { | ||
481 | struct kvm_book3e_206_tlb_entry *gtlbe; | ||
482 | struct tlbe_ref *ref; | ||
483 | int stlbsel = 0; | ||
484 | int sesel = 0; | ||
485 | int r; | ||
486 | |||
487 | gtlbe = get_entry(vcpu_e500, 0, esel); | ||
488 | ref = &vcpu_e500->gtlb_priv[0][esel].ref; | ||
489 | |||
490 | r = kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe), | ||
491 | get_tlb_raddr(gtlbe) >> PAGE_SHIFT, | ||
492 | gtlbe, 0, stlbe, ref); | ||
493 | if (r) | ||
494 | return r; | ||
495 | |||
496 | write_stlbe(vcpu_e500, gtlbe, stlbe, stlbsel, sesel); | ||
497 | |||
498 | return 0; | ||
499 | } | ||
500 | |||
501 | static int kvmppc_e500_tlb1_map_tlb1(struct kvmppc_vcpu_e500 *vcpu_e500, | ||
502 | struct tlbe_ref *ref, | ||
503 | int esel) | ||
504 | { | ||
505 | unsigned int sesel = vcpu_e500->host_tlb1_nv++; | ||
506 | |||
507 | if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size())) | ||
508 | vcpu_e500->host_tlb1_nv = 0; | ||
509 | |||
510 | vcpu_e500->tlb_refs[1][sesel] = *ref; | ||
511 | vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; | ||
512 | vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; | ||
513 | if (vcpu_e500->h2g_tlb1_rmap[sesel]) { | ||
514 | unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel]; | ||
515 | vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel); | ||
516 | } | ||
517 | vcpu_e500->h2g_tlb1_rmap[sesel] = esel; | ||
518 | |||
519 | return sesel; | ||
520 | } | ||
521 | |||
522 | /* Caller must ensure that the specified guest TLB entry is safe to insert into | ||
523 | * the shadow TLB. */ | ||
524 | /* For both one-one and one-to-many */ | ||
525 | static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, | ||
526 | u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, | ||
527 | struct kvm_book3e_206_tlb_entry *stlbe, int esel) | ||
528 | { | ||
529 | struct tlbe_ref ref; | ||
530 | int sesel; | ||
531 | int r; | ||
532 | |||
533 | ref.flags = 0; | ||
534 | r = kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, | ||
535 | &ref); | ||
536 | if (r) | ||
537 | return r; | ||
538 | |||
539 | /* Use TLB0 when we can only map a page with 4k */ | ||
540 | if (get_tlb_tsize(stlbe) == BOOK3E_PAGESZ_4K) { | ||
541 | vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_TLB0; | ||
542 | write_stlbe(vcpu_e500, gtlbe, stlbe, 0, 0); | ||
543 | return 0; | ||
544 | } | ||
545 | |||
546 | /* Otherwise map into TLB1 */ | ||
547 | sesel = kvmppc_e500_tlb1_map_tlb1(vcpu_e500, &ref, esel); | ||
548 | write_stlbe(vcpu_e500, gtlbe, stlbe, 1, sesel); | ||
549 | |||
550 | return 0; | ||
551 | } | ||
552 | |||
553 | void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, | ||
554 | unsigned int index) | ||
555 | { | ||
556 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); | ||
557 | struct tlbe_priv *priv; | ||
558 | struct kvm_book3e_206_tlb_entry *gtlbe, stlbe; | ||
559 | int tlbsel = tlbsel_of(index); | ||
560 | int esel = esel_of(index); | ||
561 | |||
562 | gtlbe = get_entry(vcpu_e500, tlbsel, esel); | ||
563 | |||
564 | switch (tlbsel) { | ||
565 | case 0: | ||
566 | priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; | ||
567 | |||
568 | /* Triggers after clear_tlb_refs or on initial mapping */ | ||
569 | if (!(priv->ref.flags & E500_TLB_VALID)) { | ||
570 | kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); | ||
571 | } else { | ||
572 | kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K, | ||
573 | &priv->ref, eaddr, &stlbe); | ||
574 | write_stlbe(vcpu_e500, gtlbe, &stlbe, 0, 0); | ||
575 | } | ||
576 | break; | ||
577 | |||
578 | case 1: { | ||
579 | gfn_t gfn = gpaddr >> PAGE_SHIFT; | ||
580 | kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe, &stlbe, | ||
581 | esel); | ||
582 | break; | ||
583 | } | ||
584 | |||
585 | default: | ||
586 | BUG(); | ||
587 | break; | ||
588 | } | ||
589 | } | ||
590 | |||
591 | /************* MMU Notifiers *************/ | ||
592 | |||
593 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | ||
594 | { | ||
595 | trace_kvm_unmap_hva(hva); | ||
596 | |||
597 | /* | ||
598 | * Flush all shadow tlb entries everywhere. This is slow, but | ||
599 | * we are 100% sure that we catch the to be unmapped page | ||
600 | */ | ||
601 | kvm_flush_remote_tlbs(kvm); | ||
602 | |||
603 | return 0; | ||
604 | } | ||
605 | |||
606 | int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) | ||
607 | { | ||
608 | /* kvm_unmap_hva flushes everything anyways */ | ||
609 | kvm_unmap_hva(kvm, start); | ||
610 | |||
611 | return 0; | ||
612 | } | ||
613 | |||
614 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) | ||
615 | { | ||
616 | /* XXX could be more clever ;) */ | ||
617 | return 0; | ||
618 | } | ||
619 | |||
620 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) | ||
621 | { | ||
622 | /* XXX could be more clever ;) */ | ||
623 | return 0; | ||
624 | } | ||
625 | |||
626 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) | ||
627 | { | ||
628 | /* The page will get remapped properly on its next fault */ | ||
629 | kvm_unmap_hva(kvm, hva); | ||
630 | } | ||
631 | |||
632 | /*****************************************/ | ||
633 | |||
634 | int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500) | ||
635 | { | ||
636 | host_tlb_params[0].entries = mfspr(SPRN_TLB0CFG) & TLBnCFG_N_ENTRY; | ||
637 | host_tlb_params[1].entries = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; | ||
638 | |||
639 | /* | ||
640 | * This should never happen on real e500 hardware, but is | ||
641 | * architecturally possible -- e.g. in some weird nested | ||
642 | * virtualization case. | ||
643 | */ | ||
644 | if (host_tlb_params[0].entries == 0 || | ||
645 | host_tlb_params[1].entries == 0) { | ||
646 | pr_err("%s: need to know host tlb size\n", __func__); | ||
647 | return -ENODEV; | ||
648 | } | ||
649 | |||
650 | host_tlb_params[0].ways = (mfspr(SPRN_TLB0CFG) & TLBnCFG_ASSOC) >> | ||
651 | TLBnCFG_ASSOC_SHIFT; | ||
652 | host_tlb_params[1].ways = host_tlb_params[1].entries; | ||
653 | |||
654 | if (!is_power_of_2(host_tlb_params[0].entries) || | ||
655 | !is_power_of_2(host_tlb_params[0].ways) || | ||
656 | host_tlb_params[0].entries < host_tlb_params[0].ways || | ||
657 | host_tlb_params[0].ways == 0) { | ||
658 | pr_err("%s: bad tlb0 host config: %u entries %u ways\n", | ||
659 | __func__, host_tlb_params[0].entries, | ||
660 | host_tlb_params[0].ways); | ||
661 | return -ENODEV; | ||
662 | } | ||
663 | |||
664 | host_tlb_params[0].sets = | ||
665 | host_tlb_params[0].entries / host_tlb_params[0].ways; | ||
666 | host_tlb_params[1].sets = 1; | ||
667 | |||
668 | vcpu_e500->tlb_refs[0] = | ||
669 | kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[0].entries, | ||
670 | GFP_KERNEL); | ||
671 | if (!vcpu_e500->tlb_refs[0]) | ||
672 | goto err; | ||
673 | |||
674 | vcpu_e500->tlb_refs[1] = | ||
675 | kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[1].entries, | ||
676 | GFP_KERNEL); | ||
677 | if (!vcpu_e500->tlb_refs[1]) | ||
678 | goto err; | ||
679 | |||
680 | vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) * | ||
681 | host_tlb_params[1].entries, | ||
682 | GFP_KERNEL); | ||
683 | if (!vcpu_e500->h2g_tlb1_rmap) | ||
684 | goto err; | ||
685 | |||
686 | return 0; | ||
687 | |||
688 | err: | ||
689 | kfree(vcpu_e500->tlb_refs[0]); | ||
690 | kfree(vcpu_e500->tlb_refs[1]); | ||
691 | return -EINVAL; | ||
692 | } | ||
693 | |||
694 | void e500_mmu_host_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) | ||
695 | { | ||
696 | kfree(vcpu_e500->h2g_tlb1_rmap); | ||
697 | kfree(vcpu_e500->tlb_refs[0]); | ||
698 | kfree(vcpu_e500->tlb_refs[1]); | ||
699 | } | ||
diff --git a/arch/powerpc/kvm/e500_mmu_host.h b/arch/powerpc/kvm/e500_mmu_host.h new file mode 100644 index 000000000000..7624835b76c7 --- /dev/null +++ b/arch/powerpc/kvm/e500_mmu_host.h | |||
@@ -0,0 +1,18 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License, version 2, as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | #ifndef KVM_E500_MMU_HOST_H | ||
10 | #define KVM_E500_MMU_HOST_H | ||
11 | |||
12 | void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, | ||
13 | int esel); | ||
14 | |||
15 | int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500); | ||
16 | void e500_mmu_host_uninit(struct kvmppc_vcpu_e500 *vcpu_e500); | ||
17 | |||
18 | #endif /* KVM_E500_MMU_HOST_H */ | ||
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 9d9cddc5b346..7a73b6f72a8b 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c | |||
@@ -150,8 +150,6 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) | |||
150 | case SPRN_TBWL: break; | 150 | case SPRN_TBWL: break; |
151 | case SPRN_TBWU: break; | 151 | case SPRN_TBWU: break; |
152 | 152 | ||
153 | case SPRN_MSSSR0: break; | ||
154 | |||
155 | case SPRN_DEC: | 153 | case SPRN_DEC: |
156 | vcpu->arch.dec = spr_val; | 154 | vcpu->arch.dec = spr_val; |
157 | kvmppc_emulate_dec(vcpu); | 155 | kvmppc_emulate_dec(vcpu); |
@@ -202,9 +200,6 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) | |||
202 | case SPRN_PIR: | 200 | case SPRN_PIR: |
203 | spr_val = vcpu->vcpu_id; | 201 | spr_val = vcpu->vcpu_id; |
204 | break; | 202 | break; |
205 | case SPRN_MSSSR0: | ||
206 | spr_val = 0; | ||
207 | break; | ||
208 | 203 | ||
209 | /* Note: mftb and TBRL/TBWL are user-accessible, so | 204 | /* Note: mftb and TBRL/TBWL are user-accessible, so |
210 | * the guest can always access the real TB anyways. | 205 | * the guest can always access the real TB anyways. |
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 70739a089560..934413cd3a1b 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -237,7 +237,8 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
237 | r = RESUME_HOST; | 237 | r = RESUME_HOST; |
238 | break; | 238 | break; |
239 | default: | 239 | default: |
240 | BUG(); | 240 | WARN_ON(1); |
241 | r = RESUME_GUEST; | ||
241 | } | 242 | } |
242 | 243 | ||
243 | return r; | 244 | return r; |
@@ -305,6 +306,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
305 | #ifdef CONFIG_BOOKE | 306 | #ifdef CONFIG_BOOKE |
306 | case KVM_CAP_PPC_BOOKE_SREGS: | 307 | case KVM_CAP_PPC_BOOKE_SREGS: |
307 | case KVM_CAP_PPC_BOOKE_WATCHDOG: | 308 | case KVM_CAP_PPC_BOOKE_WATCHDOG: |
309 | case KVM_CAP_PPC_EPR: | ||
308 | #else | 310 | #else |
309 | case KVM_CAP_PPC_SEGSTATE: | 311 | case KVM_CAP_PPC_SEGSTATE: |
310 | case KVM_CAP_PPC_HIOR: | 312 | case KVM_CAP_PPC_HIOR: |
@@ -412,7 +414,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
412 | struct kvm_memory_slot *memslot, | 414 | struct kvm_memory_slot *memslot, |
413 | struct kvm_memory_slot old, | 415 | struct kvm_memory_slot old, |
414 | struct kvm_userspace_memory_region *mem, | 416 | struct kvm_userspace_memory_region *mem, |
415 | int user_alloc) | 417 | bool user_alloc) |
416 | { | 418 | { |
417 | return kvmppc_core_prepare_memory_region(kvm, memslot, mem); | 419 | return kvmppc_core_prepare_memory_region(kvm, memslot, mem); |
418 | } | 420 | } |
@@ -420,7 +422,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
420 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 422 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
421 | struct kvm_userspace_memory_region *mem, | 423 | struct kvm_userspace_memory_region *mem, |
422 | struct kvm_memory_slot old, | 424 | struct kvm_memory_slot old, |
423 | int user_alloc) | 425 | bool user_alloc) |
424 | { | 426 | { |
425 | kvmppc_core_commit_memory_region(kvm, mem, old); | 427 | kvmppc_core_commit_memory_region(kvm, mem, old); |
426 | } | 428 | } |
@@ -720,6 +722,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
720 | for (i = 0; i < 9; ++i) | 722 | for (i = 0; i < 9; ++i) |
721 | kvmppc_set_gpr(vcpu, 4 + i, run->papr_hcall.args[i]); | 723 | kvmppc_set_gpr(vcpu, 4 + i, run->papr_hcall.args[i]); |
722 | vcpu->arch.hcall_needed = 0; | 724 | vcpu->arch.hcall_needed = 0; |
725 | #ifdef CONFIG_BOOKE | ||
726 | } else if (vcpu->arch.epr_needed) { | ||
727 | kvmppc_set_epr(vcpu, run->epr.epr); | ||
728 | vcpu->arch.epr_needed = 0; | ||
729 | #endif | ||
723 | } | 730 | } |
724 | 731 | ||
725 | r = kvmppc_vcpu_run(run, vcpu); | 732 | r = kvmppc_vcpu_run(run, vcpu); |
@@ -761,6 +768,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, | |||
761 | r = 0; | 768 | r = 0; |
762 | vcpu->arch.papr_enabled = true; | 769 | vcpu->arch.papr_enabled = true; |
763 | break; | 770 | break; |
771 | case KVM_CAP_PPC_EPR: | ||
772 | r = 0; | ||
773 | vcpu->arch.epr_enabled = cap->args[0]; | ||
774 | break; | ||
764 | #ifdef CONFIG_BOOKE | 775 | #ifdef CONFIG_BOOKE |
765 | case KVM_CAP_PPC_BOOKE_WATCHDOG: | 776 | case KVM_CAP_PPC_BOOKE_WATCHDOG: |
766 | r = 0; | 777 | r = 0; |
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index 7def77302d63..87c17bfb2968 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h | |||
@@ -41,6 +41,7 @@ enum interruption_class { | |||
41 | IRQIO_CSC, | 41 | IRQIO_CSC, |
42 | IRQIO_PCI, | 42 | IRQIO_PCI, |
43 | IRQIO_MSI, | 43 | IRQIO_MSI, |
44 | IRQIO_VIR, | ||
44 | NMI_NMI, | 45 | NMI_NMI, |
45 | CPU_RST, | 46 | CPU_RST, |
46 | NR_ARCH_IRQS | 47 | NR_ARCH_IRQS |
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index b7841546991f..16bd5d169cdb 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h | |||
@@ -20,9 +20,7 @@ | |||
20 | #include <asm/cpu.h> | 20 | #include <asm/cpu.h> |
21 | 21 | ||
22 | #define KVM_MAX_VCPUS 64 | 22 | #define KVM_MAX_VCPUS 64 |
23 | #define KVM_MEMORY_SLOTS 32 | 23 | #define KVM_USER_MEM_SLOTS 32 |
24 | /* memory slots that does not exposed to userspace */ | ||
25 | #define KVM_PRIVATE_MEM_SLOTS 4 | ||
26 | 24 | ||
27 | struct sca_entry { | 25 | struct sca_entry { |
28 | atomic_t scn; | 26 | atomic_t scn; |
@@ -76,8 +74,11 @@ struct kvm_s390_sie_block { | |||
76 | __u64 epoch; /* 0x0038 */ | 74 | __u64 epoch; /* 0x0038 */ |
77 | __u8 reserved40[4]; /* 0x0040 */ | 75 | __u8 reserved40[4]; /* 0x0040 */ |
78 | #define LCTL_CR0 0x8000 | 76 | #define LCTL_CR0 0x8000 |
77 | #define LCTL_CR6 0x0200 | ||
78 | #define LCTL_CR14 0x0002 | ||
79 | __u16 lctl; /* 0x0044 */ | 79 | __u16 lctl; /* 0x0044 */ |
80 | __s16 icpua; /* 0x0046 */ | 80 | __s16 icpua; /* 0x0046 */ |
81 | #define ICTL_LPSW 0x00400000 | ||
81 | __u32 ictl; /* 0x0048 */ | 82 | __u32 ictl; /* 0x0048 */ |
82 | __u32 eca; /* 0x004c */ | 83 | __u32 eca; /* 0x004c */ |
83 | __u8 icptcode; /* 0x0050 */ | 84 | __u8 icptcode; /* 0x0050 */ |
@@ -127,6 +128,7 @@ struct kvm_vcpu_stat { | |||
127 | u32 deliver_prefix_signal; | 128 | u32 deliver_prefix_signal; |
128 | u32 deliver_restart_signal; | 129 | u32 deliver_restart_signal; |
129 | u32 deliver_program_int; | 130 | u32 deliver_program_int; |
131 | u32 deliver_io_int; | ||
130 | u32 exit_wait_state; | 132 | u32 exit_wait_state; |
131 | u32 instruction_stidp; | 133 | u32 instruction_stidp; |
132 | u32 instruction_spx; | 134 | u32 instruction_spx; |
@@ -187,6 +189,11 @@ struct kvm_s390_emerg_info { | |||
187 | __u16 code; | 189 | __u16 code; |
188 | }; | 190 | }; |
189 | 191 | ||
192 | struct kvm_s390_mchk_info { | ||
193 | __u64 cr14; | ||
194 | __u64 mcic; | ||
195 | }; | ||
196 | |||
190 | struct kvm_s390_interrupt_info { | 197 | struct kvm_s390_interrupt_info { |
191 | struct list_head list; | 198 | struct list_head list; |
192 | u64 type; | 199 | u64 type; |
@@ -197,6 +204,7 @@ struct kvm_s390_interrupt_info { | |||
197 | struct kvm_s390_emerg_info emerg; | 204 | struct kvm_s390_emerg_info emerg; |
198 | struct kvm_s390_extcall_info extcall; | 205 | struct kvm_s390_extcall_info extcall; |
199 | struct kvm_s390_prefix_info prefix; | 206 | struct kvm_s390_prefix_info prefix; |
207 | struct kvm_s390_mchk_info mchk; | ||
200 | }; | 208 | }; |
201 | }; | 209 | }; |
202 | 210 | ||
@@ -254,6 +262,7 @@ struct kvm_arch{ | |||
254 | debug_info_t *dbf; | 262 | debug_info_t *dbf; |
255 | struct kvm_s390_float_interrupt float_int; | 263 | struct kvm_s390_float_interrupt float_int; |
256 | struct gmap *gmap; | 264 | struct gmap *gmap; |
265 | int css_support; | ||
257 | }; | 266 | }; |
258 | 267 | ||
259 | extern int sie64a(struct kvm_s390_sie_block *, u64 *); | 268 | extern int sie64a(struct kvm_s390_sie_block *, u64 *); |
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 9df824ea1667..1630f439cd2a 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c | |||
@@ -81,6 +81,7 @@ static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = { | |||
81 | [IRQIO_CSC] = {.name = "CSC", .desc = "[I/O] CHSC Subchannel"}, | 81 | [IRQIO_CSC] = {.name = "CSC", .desc = "[I/O] CHSC Subchannel"}, |
82 | [IRQIO_PCI] = {.name = "PCI", .desc = "[I/O] PCI Interrupt" }, | 82 | [IRQIO_PCI] = {.name = "PCI", .desc = "[I/O] PCI Interrupt" }, |
83 | [IRQIO_MSI] = {.name = "MSI", .desc = "[I/O] MSI Interrupt" }, | 83 | [IRQIO_MSI] = {.name = "MSI", .desc = "[I/O] MSI Interrupt" }, |
84 | [IRQIO_VIR] = {.name = "VIR", .desc = "[I/O] Virtual I/O Devices"}, | ||
84 | [NMI_NMI] = {.name = "NMI", .desc = "[NMI] Machine Check"}, | 85 | [NMI_NMI] = {.name = "NMI", .desc = "[NMI] Machine Check"}, |
85 | [CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"}, | 86 | [CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"}, |
86 | }; | 87 | }; |
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 22798ec33fd1..f26ff1e31bdb 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c | |||
@@ -26,27 +26,20 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) | |||
26 | { | 26 | { |
27 | int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; | 27 | int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; |
28 | int reg3 = vcpu->arch.sie_block->ipa & 0x000f; | 28 | int reg3 = vcpu->arch.sie_block->ipa & 0x000f; |
29 | int base2 = vcpu->arch.sie_block->ipb >> 28; | ||
30 | int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) + | ||
31 | ((vcpu->arch.sie_block->ipb & 0xff00) << 4); | ||
32 | u64 useraddr; | 29 | u64 useraddr; |
33 | int reg, rc; | 30 | int reg, rc; |
34 | 31 | ||
35 | vcpu->stat.instruction_lctlg++; | 32 | vcpu->stat.instruction_lctlg++; |
36 | if ((vcpu->arch.sie_block->ipb & 0xff) != 0x2f) | ||
37 | return -EOPNOTSUPP; | ||
38 | 33 | ||
39 | useraddr = disp2; | 34 | useraddr = kvm_s390_get_base_disp_rsy(vcpu); |
40 | if (base2) | ||
41 | useraddr += vcpu->run->s.regs.gprs[base2]; | ||
42 | 35 | ||
43 | if (useraddr & 7) | 36 | if (useraddr & 7) |
44 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 37 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
45 | 38 | ||
46 | reg = reg1; | 39 | reg = reg1; |
47 | 40 | ||
48 | VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2, | 41 | VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, |
49 | disp2); | 42 | useraddr); |
50 | trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr); | 43 | trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr); |
51 | 44 | ||
52 | do { | 45 | do { |
@@ -68,23 +61,19 @@ static int handle_lctl(struct kvm_vcpu *vcpu) | |||
68 | { | 61 | { |
69 | int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; | 62 | int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; |
70 | int reg3 = vcpu->arch.sie_block->ipa & 0x000f; | 63 | int reg3 = vcpu->arch.sie_block->ipa & 0x000f; |
71 | int base2 = vcpu->arch.sie_block->ipb >> 28; | ||
72 | int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); | ||
73 | u64 useraddr; | 64 | u64 useraddr; |
74 | u32 val = 0; | 65 | u32 val = 0; |
75 | int reg, rc; | 66 | int reg, rc; |
76 | 67 | ||
77 | vcpu->stat.instruction_lctl++; | 68 | vcpu->stat.instruction_lctl++; |
78 | 69 | ||
79 | useraddr = disp2; | 70 | useraddr = kvm_s390_get_base_disp_rs(vcpu); |
80 | if (base2) | ||
81 | useraddr += vcpu->run->s.regs.gprs[base2]; | ||
82 | 71 | ||
83 | if (useraddr & 3) | 72 | if (useraddr & 3) |
84 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 73 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
85 | 74 | ||
86 | VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2, | 75 | VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, |
87 | disp2); | 76 | useraddr); |
88 | trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr); | 77 | trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr); |
89 | 78 | ||
90 | reg = reg1; | 79 | reg = reg1; |
@@ -104,14 +93,31 @@ static int handle_lctl(struct kvm_vcpu *vcpu) | |||
104 | return 0; | 93 | return 0; |
105 | } | 94 | } |
106 | 95 | ||
107 | static intercept_handler_t instruction_handlers[256] = { | 96 | static const intercept_handler_t eb_handlers[256] = { |
97 | [0x2f] = handle_lctlg, | ||
98 | [0x8a] = kvm_s390_handle_priv_eb, | ||
99 | }; | ||
100 | |||
101 | static int handle_eb(struct kvm_vcpu *vcpu) | ||
102 | { | ||
103 | intercept_handler_t handler; | ||
104 | |||
105 | handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff]; | ||
106 | if (handler) | ||
107 | return handler(vcpu); | ||
108 | return -EOPNOTSUPP; | ||
109 | } | ||
110 | |||
111 | static const intercept_handler_t instruction_handlers[256] = { | ||
108 | [0x01] = kvm_s390_handle_01, | 112 | [0x01] = kvm_s390_handle_01, |
113 | [0x82] = kvm_s390_handle_lpsw, | ||
109 | [0x83] = kvm_s390_handle_diag, | 114 | [0x83] = kvm_s390_handle_diag, |
110 | [0xae] = kvm_s390_handle_sigp, | 115 | [0xae] = kvm_s390_handle_sigp, |
111 | [0xb2] = kvm_s390_handle_b2, | 116 | [0xb2] = kvm_s390_handle_b2, |
112 | [0xb7] = handle_lctl, | 117 | [0xb7] = handle_lctl, |
118 | [0xb9] = kvm_s390_handle_b9, | ||
113 | [0xe5] = kvm_s390_handle_e5, | 119 | [0xe5] = kvm_s390_handle_e5, |
114 | [0xeb] = handle_lctlg, | 120 | [0xeb] = handle_eb, |
115 | }; | 121 | }; |
116 | 122 | ||
117 | static int handle_noop(struct kvm_vcpu *vcpu) | 123 | static int handle_noop(struct kvm_vcpu *vcpu) |
@@ -258,6 +264,7 @@ static const intercept_handler_t intercept_funcs[] = { | |||
258 | [0x0C >> 2] = handle_instruction_and_prog, | 264 | [0x0C >> 2] = handle_instruction_and_prog, |
259 | [0x10 >> 2] = handle_noop, | 265 | [0x10 >> 2] = handle_noop, |
260 | [0x14 >> 2] = handle_noop, | 266 | [0x14 >> 2] = handle_noop, |
267 | [0x18 >> 2] = handle_noop, | ||
261 | [0x1C >> 2] = kvm_s390_handle_wait, | 268 | [0x1C >> 2] = kvm_s390_handle_wait, |
262 | [0x20 >> 2] = handle_validity, | 269 | [0x20 >> 2] = handle_validity, |
263 | [0x28 >> 2] = handle_stop, | 270 | [0x28 >> 2] = handle_stop, |
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 87418b50f21c..37116a77cb4b 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c | |||
@@ -21,11 +21,31 @@ | |||
21 | #include "gaccess.h" | 21 | #include "gaccess.h" |
22 | #include "trace-s390.h" | 22 | #include "trace-s390.h" |
23 | 23 | ||
24 | #define IOINT_SCHID_MASK 0x0000ffff | ||
25 | #define IOINT_SSID_MASK 0x00030000 | ||
26 | #define IOINT_CSSID_MASK 0x03fc0000 | ||
27 | #define IOINT_AI_MASK 0x04000000 | ||
28 | |||
29 | static int is_ioint(u64 type) | ||
30 | { | ||
31 | return ((type & 0xfffe0000u) != 0xfffe0000u); | ||
32 | } | ||
33 | |||
24 | static int psw_extint_disabled(struct kvm_vcpu *vcpu) | 34 | static int psw_extint_disabled(struct kvm_vcpu *vcpu) |
25 | { | 35 | { |
26 | return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); | 36 | return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); |
27 | } | 37 | } |
28 | 38 | ||
39 | static int psw_ioint_disabled(struct kvm_vcpu *vcpu) | ||
40 | { | ||
41 | return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO); | ||
42 | } | ||
43 | |||
44 | static int psw_mchk_disabled(struct kvm_vcpu *vcpu) | ||
45 | { | ||
46 | return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK); | ||
47 | } | ||
48 | |||
29 | static int psw_interrupts_disabled(struct kvm_vcpu *vcpu) | 49 | static int psw_interrupts_disabled(struct kvm_vcpu *vcpu) |
30 | { | 50 | { |
31 | if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) || | 51 | if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) || |
@@ -35,6 +55,13 @@ static int psw_interrupts_disabled(struct kvm_vcpu *vcpu) | |||
35 | return 1; | 55 | return 1; |
36 | } | 56 | } |
37 | 57 | ||
58 | static u64 int_word_to_isc_bits(u32 int_word) | ||
59 | { | ||
60 | u8 isc = (int_word & 0x38000000) >> 27; | ||
61 | |||
62 | return (0x80 >> isc) << 24; | ||
63 | } | ||
64 | |||
38 | static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, | 65 | static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, |
39 | struct kvm_s390_interrupt_info *inti) | 66 | struct kvm_s390_interrupt_info *inti) |
40 | { | 67 | { |
@@ -67,7 +94,22 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, | |||
67 | case KVM_S390_SIGP_SET_PREFIX: | 94 | case KVM_S390_SIGP_SET_PREFIX: |
68 | case KVM_S390_RESTART: | 95 | case KVM_S390_RESTART: |
69 | return 1; | 96 | return 1; |
97 | case KVM_S390_MCHK: | ||
98 | if (psw_mchk_disabled(vcpu)) | ||
99 | return 0; | ||
100 | if (vcpu->arch.sie_block->gcr[14] & inti->mchk.cr14) | ||
101 | return 1; | ||
102 | return 0; | ||
103 | case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: | ||
104 | if (psw_ioint_disabled(vcpu)) | ||
105 | return 0; | ||
106 | if (vcpu->arch.sie_block->gcr[6] & | ||
107 | int_word_to_isc_bits(inti->io.io_int_word)) | ||
108 | return 1; | ||
109 | return 0; | ||
70 | default: | 110 | default: |
111 | printk(KERN_WARNING "illegal interrupt type %llx\n", | ||
112 | inti->type); | ||
71 | BUG(); | 113 | BUG(); |
72 | } | 114 | } |
73 | return 0; | 115 | return 0; |
@@ -93,6 +135,7 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) | |||
93 | CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT, | 135 | CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT, |
94 | &vcpu->arch.sie_block->cpuflags); | 136 | &vcpu->arch.sie_block->cpuflags); |
95 | vcpu->arch.sie_block->lctl = 0x0000; | 137 | vcpu->arch.sie_block->lctl = 0x0000; |
138 | vcpu->arch.sie_block->ictl &= ~ICTL_LPSW; | ||
96 | } | 139 | } |
97 | 140 | ||
98 | static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) | 141 | static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) |
@@ -116,6 +159,18 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu, | |||
116 | case KVM_S390_SIGP_STOP: | 159 | case KVM_S390_SIGP_STOP: |
117 | __set_cpuflag(vcpu, CPUSTAT_STOP_INT); | 160 | __set_cpuflag(vcpu, CPUSTAT_STOP_INT); |
118 | break; | 161 | break; |
162 | case KVM_S390_MCHK: | ||
163 | if (psw_mchk_disabled(vcpu)) | ||
164 | vcpu->arch.sie_block->ictl |= ICTL_LPSW; | ||
165 | else | ||
166 | vcpu->arch.sie_block->lctl |= LCTL_CR14; | ||
167 | break; | ||
168 | case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: | ||
169 | if (psw_ioint_disabled(vcpu)) | ||
170 | __set_cpuflag(vcpu, CPUSTAT_IO_INT); | ||
171 | else | ||
172 | vcpu->arch.sie_block->lctl |= LCTL_CR6; | ||
173 | break; | ||
119 | default: | 174 | default: |
120 | BUG(); | 175 | BUG(); |
121 | } | 176 | } |
@@ -297,6 +352,73 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, | |||
297 | exception = 1; | 352 | exception = 1; |
298 | break; | 353 | break; |
299 | 354 | ||
355 | case KVM_S390_MCHK: | ||
356 | VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", | ||
357 | inti->mchk.mcic); | ||
358 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, | ||
359 | inti->mchk.cr14, | ||
360 | inti->mchk.mcic); | ||
361 | rc = kvm_s390_vcpu_store_status(vcpu, | ||
362 | KVM_S390_STORE_STATUS_PREFIXED); | ||
363 | if (rc == -EFAULT) | ||
364 | exception = 1; | ||
365 | |||
366 | rc = put_guest_u64(vcpu, __LC_MCCK_CODE, inti->mchk.mcic); | ||
367 | if (rc == -EFAULT) | ||
368 | exception = 1; | ||
369 | |||
370 | rc = copy_to_guest(vcpu, __LC_MCK_OLD_PSW, | ||
371 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | ||
372 | if (rc == -EFAULT) | ||
373 | exception = 1; | ||
374 | |||
375 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
376 | __LC_MCK_NEW_PSW, sizeof(psw_t)); | ||
377 | if (rc == -EFAULT) | ||
378 | exception = 1; | ||
379 | break; | ||
380 | |||
381 | case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: | ||
382 | { | ||
383 | __u32 param0 = ((__u32)inti->io.subchannel_id << 16) | | ||
384 | inti->io.subchannel_nr; | ||
385 | __u64 param1 = ((__u64)inti->io.io_int_parm << 32) | | ||
386 | inti->io.io_int_word; | ||
387 | VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type); | ||
388 | vcpu->stat.deliver_io_int++; | ||
389 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, | ||
390 | param0, param1); | ||
391 | rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_ID, | ||
392 | inti->io.subchannel_id); | ||
393 | if (rc == -EFAULT) | ||
394 | exception = 1; | ||
395 | |||
396 | rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_NR, | ||
397 | inti->io.subchannel_nr); | ||
398 | if (rc == -EFAULT) | ||
399 | exception = 1; | ||
400 | |||
401 | rc = put_guest_u32(vcpu, __LC_IO_INT_PARM, | ||
402 | inti->io.io_int_parm); | ||
403 | if (rc == -EFAULT) | ||
404 | exception = 1; | ||
405 | |||
406 | rc = put_guest_u32(vcpu, __LC_IO_INT_WORD, | ||
407 | inti->io.io_int_word); | ||
408 | if (rc == -EFAULT) | ||
409 | exception = 1; | ||
410 | |||
411 | rc = copy_to_guest(vcpu, __LC_IO_OLD_PSW, | ||
412 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | ||
413 | if (rc == -EFAULT) | ||
414 | exception = 1; | ||
415 | |||
416 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
417 | __LC_IO_NEW_PSW, sizeof(psw_t)); | ||
418 | if (rc == -EFAULT) | ||
419 | exception = 1; | ||
420 | break; | ||
421 | } | ||
300 | default: | 422 | default: |
301 | BUG(); | 423 | BUG(); |
302 | } | 424 | } |
@@ -518,6 +640,61 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) | |||
518 | } | 640 | } |
519 | } | 641 | } |
520 | 642 | ||
643 | void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu) | ||
644 | { | ||
645 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | ||
646 | struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; | ||
647 | struct kvm_s390_interrupt_info *n, *inti = NULL; | ||
648 | int deliver; | ||
649 | |||
650 | __reset_intercept_indicators(vcpu); | ||
651 | if (atomic_read(&li->active)) { | ||
652 | do { | ||
653 | deliver = 0; | ||
654 | spin_lock_bh(&li->lock); | ||
655 | list_for_each_entry_safe(inti, n, &li->list, list) { | ||
656 | if ((inti->type == KVM_S390_MCHK) && | ||
657 | __interrupt_is_deliverable(vcpu, inti)) { | ||
658 | list_del(&inti->list); | ||
659 | deliver = 1; | ||
660 | break; | ||
661 | } | ||
662 | __set_intercept_indicator(vcpu, inti); | ||
663 | } | ||
664 | if (list_empty(&li->list)) | ||
665 | atomic_set(&li->active, 0); | ||
666 | spin_unlock_bh(&li->lock); | ||
667 | if (deliver) { | ||
668 | __do_deliver_interrupt(vcpu, inti); | ||
669 | kfree(inti); | ||
670 | } | ||
671 | } while (deliver); | ||
672 | } | ||
673 | |||
674 | if (atomic_read(&fi->active)) { | ||
675 | do { | ||
676 | deliver = 0; | ||
677 | spin_lock(&fi->lock); | ||
678 | list_for_each_entry_safe(inti, n, &fi->list, list) { | ||
679 | if ((inti->type == KVM_S390_MCHK) && | ||
680 | __interrupt_is_deliverable(vcpu, inti)) { | ||
681 | list_del(&inti->list); | ||
682 | deliver = 1; | ||
683 | break; | ||
684 | } | ||
685 | __set_intercept_indicator(vcpu, inti); | ||
686 | } | ||
687 | if (list_empty(&fi->list)) | ||
688 | atomic_set(&fi->active, 0); | ||
689 | spin_unlock(&fi->lock); | ||
690 | if (deliver) { | ||
691 | __do_deliver_interrupt(vcpu, inti); | ||
692 | kfree(inti); | ||
693 | } | ||
694 | } while (deliver); | ||
695 | } | ||
696 | } | ||
697 | |||
521 | int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) | 698 | int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) |
522 | { | 699 | { |
523 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 700 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
@@ -540,12 +717,50 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) | |||
540 | return 0; | 717 | return 0; |
541 | } | 718 | } |
542 | 719 | ||
720 | struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, | ||
721 | u64 cr6, u64 schid) | ||
722 | { | ||
723 | struct kvm_s390_float_interrupt *fi; | ||
724 | struct kvm_s390_interrupt_info *inti, *iter; | ||
725 | |||
726 | if ((!schid && !cr6) || (schid && cr6)) | ||
727 | return NULL; | ||
728 | mutex_lock(&kvm->lock); | ||
729 | fi = &kvm->arch.float_int; | ||
730 | spin_lock(&fi->lock); | ||
731 | inti = NULL; | ||
732 | list_for_each_entry(iter, &fi->list, list) { | ||
733 | if (!is_ioint(iter->type)) | ||
734 | continue; | ||
735 | if (cr6 && | ||
736 | ((cr6 & int_word_to_isc_bits(iter->io.io_int_word)) == 0)) | ||
737 | continue; | ||
738 | if (schid) { | ||
739 | if (((schid & 0x00000000ffff0000) >> 16) != | ||
740 | iter->io.subchannel_id) | ||
741 | continue; | ||
742 | if ((schid & 0x000000000000ffff) != | ||
743 | iter->io.subchannel_nr) | ||
744 | continue; | ||
745 | } | ||
746 | inti = iter; | ||
747 | break; | ||
748 | } | ||
749 | if (inti) | ||
750 | list_del_init(&inti->list); | ||
751 | if (list_empty(&fi->list)) | ||
752 | atomic_set(&fi->active, 0); | ||
753 | spin_unlock(&fi->lock); | ||
754 | mutex_unlock(&kvm->lock); | ||
755 | return inti; | ||
756 | } | ||
757 | |||
543 | int kvm_s390_inject_vm(struct kvm *kvm, | 758 | int kvm_s390_inject_vm(struct kvm *kvm, |
544 | struct kvm_s390_interrupt *s390int) | 759 | struct kvm_s390_interrupt *s390int) |
545 | { | 760 | { |
546 | struct kvm_s390_local_interrupt *li; | 761 | struct kvm_s390_local_interrupt *li; |
547 | struct kvm_s390_float_interrupt *fi; | 762 | struct kvm_s390_float_interrupt *fi; |
548 | struct kvm_s390_interrupt_info *inti; | 763 | struct kvm_s390_interrupt_info *inti, *iter; |
549 | int sigcpu; | 764 | int sigcpu; |
550 | 765 | ||
551 | inti = kzalloc(sizeof(*inti), GFP_KERNEL); | 766 | inti = kzalloc(sizeof(*inti), GFP_KERNEL); |
@@ -569,6 +784,29 @@ int kvm_s390_inject_vm(struct kvm *kvm, | |||
569 | case KVM_S390_SIGP_STOP: | 784 | case KVM_S390_SIGP_STOP: |
570 | case KVM_S390_INT_EXTERNAL_CALL: | 785 | case KVM_S390_INT_EXTERNAL_CALL: |
571 | case KVM_S390_INT_EMERGENCY: | 786 | case KVM_S390_INT_EMERGENCY: |
787 | kfree(inti); | ||
788 | return -EINVAL; | ||
789 | case KVM_S390_MCHK: | ||
790 | VM_EVENT(kvm, 5, "inject: machine check parm64:%llx", | ||
791 | s390int->parm64); | ||
792 | inti->type = s390int->type; | ||
793 | inti->mchk.cr14 = s390int->parm; /* upper bits are not used */ | ||
794 | inti->mchk.mcic = s390int->parm64; | ||
795 | break; | ||
796 | case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: | ||
797 | if (s390int->type & IOINT_AI_MASK) | ||
798 | VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)"); | ||
799 | else | ||
800 | VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x", | ||
801 | s390int->type & IOINT_CSSID_MASK, | ||
802 | s390int->type & IOINT_SSID_MASK, | ||
803 | s390int->type & IOINT_SCHID_MASK); | ||
804 | inti->type = s390int->type; | ||
805 | inti->io.subchannel_id = s390int->parm >> 16; | ||
806 | inti->io.subchannel_nr = s390int->parm & 0x0000ffffu; | ||
807 | inti->io.io_int_parm = s390int->parm64 >> 32; | ||
808 | inti->io.io_int_word = s390int->parm64 & 0x00000000ffffffffull; | ||
809 | break; | ||
572 | default: | 810 | default: |
573 | kfree(inti); | 811 | kfree(inti); |
574 | return -EINVAL; | 812 | return -EINVAL; |
@@ -579,7 +817,22 @@ int kvm_s390_inject_vm(struct kvm *kvm, | |||
579 | mutex_lock(&kvm->lock); | 817 | mutex_lock(&kvm->lock); |
580 | fi = &kvm->arch.float_int; | 818 | fi = &kvm->arch.float_int; |
581 | spin_lock(&fi->lock); | 819 | spin_lock(&fi->lock); |
582 | list_add_tail(&inti->list, &fi->list); | 820 | if (!is_ioint(inti->type)) |
821 | list_add_tail(&inti->list, &fi->list); | ||
822 | else { | ||
823 | u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word); | ||
824 | |||
825 | /* Keep I/O interrupts sorted in isc order. */ | ||
826 | list_for_each_entry(iter, &fi->list, list) { | ||
827 | if (!is_ioint(iter->type)) | ||
828 | continue; | ||
829 | if (int_word_to_isc_bits(iter->io.io_int_word) | ||
830 | <= isc_bits) | ||
831 | continue; | ||
832 | break; | ||
833 | } | ||
834 | list_add_tail(&inti->list, &iter->list); | ||
835 | } | ||
583 | atomic_set(&fi->active, 1); | 836 | atomic_set(&fi->active, 1); |
584 | sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); | 837 | sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); |
585 | if (sigcpu == KVM_MAX_VCPUS) { | 838 | if (sigcpu == KVM_MAX_VCPUS) { |
@@ -651,8 +904,15 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, | |||
651 | inti->type = s390int->type; | 904 | inti->type = s390int->type; |
652 | inti->emerg.code = s390int->parm; | 905 | inti->emerg.code = s390int->parm; |
653 | break; | 906 | break; |
907 | case KVM_S390_MCHK: | ||
908 | VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx", | ||
909 | s390int->parm64); | ||
910 | inti->type = s390int->type; | ||
911 | inti->mchk.mcic = s390int->parm64; | ||
912 | break; | ||
654 | case KVM_S390_INT_VIRTIO: | 913 | case KVM_S390_INT_VIRTIO: |
655 | case KVM_S390_INT_SERVICE: | 914 | case KVM_S390_INT_SERVICE: |
915 | case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: | ||
656 | default: | 916 | default: |
657 | kfree(inti); | 917 | kfree(inti); |
658 | return -EINVAL; | 918 | return -EINVAL; |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2923781590a6..4cf35a0a79e7 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
@@ -140,6 +140,8 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
140 | #endif | 140 | #endif |
141 | case KVM_CAP_SYNC_REGS: | 141 | case KVM_CAP_SYNC_REGS: |
142 | case KVM_CAP_ONE_REG: | 142 | case KVM_CAP_ONE_REG: |
143 | case KVM_CAP_ENABLE_CAP: | ||
144 | case KVM_CAP_S390_CSS_SUPPORT: | ||
143 | r = 1; | 145 | r = 1; |
144 | break; | 146 | break; |
145 | case KVM_CAP_NR_VCPUS: | 147 | case KVM_CAP_NR_VCPUS: |
@@ -234,6 +236,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
234 | if (!kvm->arch.gmap) | 236 | if (!kvm->arch.gmap) |
235 | goto out_nogmap; | 237 | goto out_nogmap; |
236 | } | 238 | } |
239 | |||
240 | kvm->arch.css_support = 0; | ||
241 | |||
237 | return 0; | 242 | return 0; |
238 | out_nogmap: | 243 | out_nogmap: |
239 | debug_unregister(kvm->arch.dbf); | 244 | debug_unregister(kvm->arch.dbf); |
@@ -659,6 +664,7 @@ rerun_vcpu: | |||
659 | case KVM_EXIT_INTR: | 664 | case KVM_EXIT_INTR: |
660 | case KVM_EXIT_S390_RESET: | 665 | case KVM_EXIT_S390_RESET: |
661 | case KVM_EXIT_S390_UCONTROL: | 666 | case KVM_EXIT_S390_UCONTROL: |
667 | case KVM_EXIT_S390_TSCH: | ||
662 | break; | 668 | break; |
663 | default: | 669 | default: |
664 | BUG(); | 670 | BUG(); |
@@ -766,6 +772,14 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) | |||
766 | } else | 772 | } else |
767 | prefix = 0; | 773 | prefix = 0; |
768 | 774 | ||
775 | /* | ||
776 | * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy | ||
777 | * copying in vcpu load/put. Lets update our copies before we save | ||
778 | * it into the save area | ||
779 | */ | ||
780 | save_fp_regs(&vcpu->arch.guest_fpregs); | ||
781 | save_access_regs(vcpu->run->s.regs.acrs); | ||
782 | |||
769 | if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs), | 783 | if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs), |
770 | vcpu->arch.guest_fpregs.fprs, 128, prefix)) | 784 | vcpu->arch.guest_fpregs.fprs, 128, prefix)) |
771 | return -EFAULT; | 785 | return -EFAULT; |
@@ -810,6 +824,29 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) | |||
810 | return 0; | 824 | return 0; |
811 | } | 825 | } |
812 | 826 | ||
827 | static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, | ||
828 | struct kvm_enable_cap *cap) | ||
829 | { | ||
830 | int r; | ||
831 | |||
832 | if (cap->flags) | ||
833 | return -EINVAL; | ||
834 | |||
835 | switch (cap->cap) { | ||
836 | case KVM_CAP_S390_CSS_SUPPORT: | ||
837 | if (!vcpu->kvm->arch.css_support) { | ||
838 | vcpu->kvm->arch.css_support = 1; | ||
839 | trace_kvm_s390_enable_css(vcpu->kvm); | ||
840 | } | ||
841 | r = 0; | ||
842 | break; | ||
843 | default: | ||
844 | r = -EINVAL; | ||
845 | break; | ||
846 | } | ||
847 | return r; | ||
848 | } | ||
849 | |||
813 | long kvm_arch_vcpu_ioctl(struct file *filp, | 850 | long kvm_arch_vcpu_ioctl(struct file *filp, |
814 | unsigned int ioctl, unsigned long arg) | 851 | unsigned int ioctl, unsigned long arg) |
815 | { | 852 | { |
@@ -896,6 +933,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
896 | r = 0; | 933 | r = 0; |
897 | break; | 934 | break; |
898 | } | 935 | } |
936 | case KVM_ENABLE_CAP: | ||
937 | { | ||
938 | struct kvm_enable_cap cap; | ||
939 | r = -EFAULT; | ||
940 | if (copy_from_user(&cap, argp, sizeof(cap))) | ||
941 | break; | ||
942 | r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); | ||
943 | break; | ||
944 | } | ||
899 | default: | 945 | default: |
900 | r = -ENOTTY; | 946 | r = -ENOTTY; |
901 | } | 947 | } |
@@ -930,7 +976,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
930 | struct kvm_memory_slot *memslot, | 976 | struct kvm_memory_slot *memslot, |
931 | struct kvm_memory_slot old, | 977 | struct kvm_memory_slot old, |
932 | struct kvm_userspace_memory_region *mem, | 978 | struct kvm_userspace_memory_region *mem, |
933 | int user_alloc) | 979 | bool user_alloc) |
934 | { | 980 | { |
935 | /* A few sanity checks. We can have exactly one memory slot which has | 981 | /* A few sanity checks. We can have exactly one memory slot which has |
936 | to start at guest virtual zero and which has to be located at a | 982 | to start at guest virtual zero and which has to be located at a |
@@ -960,7 +1006,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
960 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 1006 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
961 | struct kvm_userspace_memory_region *mem, | 1007 | struct kvm_userspace_memory_region *mem, |
962 | struct kvm_memory_slot old, | 1008 | struct kvm_memory_slot old, |
963 | int user_alloc) | 1009 | bool user_alloc) |
964 | { | 1010 | { |
965 | int rc; | 1011 | int rc; |
966 | 1012 | ||
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index d75bc5e92c5b..4d89d64a8161 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
@@ -65,21 +65,67 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) | |||
65 | vcpu->arch.sie_block->ihcpu = 0xffff; | 65 | vcpu->arch.sie_block->ihcpu = 0xffff; |
66 | } | 66 | } |
67 | 67 | ||
68 | static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu) | ||
69 | { | ||
70 | u32 base2 = vcpu->arch.sie_block->ipb >> 28; | ||
71 | u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); | ||
72 | |||
73 | return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; | ||
74 | } | ||
75 | |||
76 | static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, | ||
77 | u64 *address1, u64 *address2) | ||
78 | { | ||
79 | u32 base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28; | ||
80 | u32 disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16; | ||
81 | u32 base2 = (vcpu->arch.sie_block->ipb & 0xf000) >> 12; | ||
82 | u32 disp2 = vcpu->arch.sie_block->ipb & 0x0fff; | ||
83 | |||
84 | *address1 = (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1; | ||
85 | *address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; | ||
86 | } | ||
87 | |||
88 | static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu) | ||
89 | { | ||
90 | u32 base2 = vcpu->arch.sie_block->ipb >> 28; | ||
91 | u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) + | ||
92 | ((vcpu->arch.sie_block->ipb & 0xff00) << 4); | ||
93 | /* The displacement is a 20bit _SIGNED_ value */ | ||
94 | if (disp2 & 0x80000) | ||
95 | disp2+=0xfff00000; | ||
96 | |||
97 | return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + (long)(int)disp2; | ||
98 | } | ||
99 | |||
100 | static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu) | ||
101 | { | ||
102 | u32 base2 = vcpu->arch.sie_block->ipb >> 28; | ||
103 | u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); | ||
104 | |||
105 | return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; | ||
106 | } | ||
107 | |||
68 | int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); | 108 | int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); |
69 | enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); | 109 | enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); |
70 | void kvm_s390_tasklet(unsigned long parm); | 110 | void kvm_s390_tasklet(unsigned long parm); |
71 | void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); | 111 | void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); |
112 | void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); | ||
72 | int kvm_s390_inject_vm(struct kvm *kvm, | 113 | int kvm_s390_inject_vm(struct kvm *kvm, |
73 | struct kvm_s390_interrupt *s390int); | 114 | struct kvm_s390_interrupt *s390int); |
74 | int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, | 115 | int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, |
75 | struct kvm_s390_interrupt *s390int); | 116 | struct kvm_s390_interrupt *s390int); |
76 | int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); | 117 | int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); |
77 | int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); | 118 | int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); |
119 | struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, | ||
120 | u64 cr6, u64 schid); | ||
78 | 121 | ||
79 | /* implemented in priv.c */ | 122 | /* implemented in priv.c */ |
80 | int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); | 123 | int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); |
81 | int kvm_s390_handle_e5(struct kvm_vcpu *vcpu); | 124 | int kvm_s390_handle_e5(struct kvm_vcpu *vcpu); |
82 | int kvm_s390_handle_01(struct kvm_vcpu *vcpu); | 125 | int kvm_s390_handle_01(struct kvm_vcpu *vcpu); |
126 | int kvm_s390_handle_b9(struct kvm_vcpu *vcpu); | ||
127 | int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu); | ||
128 | int kvm_s390_handle_priv_eb(struct kvm_vcpu *vcpu); | ||
83 | 129 | ||
84 | /* implemented in sigp.c */ | 130 | /* implemented in sigp.c */ |
85 | int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); | 131 | int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); |
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index d768906f15c8..0ef9894606e5 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c | |||
@@ -18,23 +18,21 @@ | |||
18 | #include <asm/debug.h> | 18 | #include <asm/debug.h> |
19 | #include <asm/ebcdic.h> | 19 | #include <asm/ebcdic.h> |
20 | #include <asm/sysinfo.h> | 20 | #include <asm/sysinfo.h> |
21 | #include <asm/ptrace.h> | ||
22 | #include <asm/compat.h> | ||
21 | #include "gaccess.h" | 23 | #include "gaccess.h" |
22 | #include "kvm-s390.h" | 24 | #include "kvm-s390.h" |
23 | #include "trace.h" | 25 | #include "trace.h" |
24 | 26 | ||
25 | static int handle_set_prefix(struct kvm_vcpu *vcpu) | 27 | static int handle_set_prefix(struct kvm_vcpu *vcpu) |
26 | { | 28 | { |
27 | int base2 = vcpu->arch.sie_block->ipb >> 28; | ||
28 | int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); | ||
29 | u64 operand2; | 29 | u64 operand2; |
30 | u32 address = 0; | 30 | u32 address = 0; |
31 | u8 tmp; | 31 | u8 tmp; |
32 | 32 | ||
33 | vcpu->stat.instruction_spx++; | 33 | vcpu->stat.instruction_spx++; |
34 | 34 | ||
35 | operand2 = disp2; | 35 | operand2 = kvm_s390_get_base_disp_s(vcpu); |
36 | if (base2) | ||
37 | operand2 += vcpu->run->s.regs.gprs[base2]; | ||
38 | 36 | ||
39 | /* must be word boundary */ | 37 | /* must be word boundary */ |
40 | if (operand2 & 3) { | 38 | if (operand2 & 3) { |
@@ -67,15 +65,12 @@ out: | |||
67 | 65 | ||
68 | static int handle_store_prefix(struct kvm_vcpu *vcpu) | 66 | static int handle_store_prefix(struct kvm_vcpu *vcpu) |
69 | { | 67 | { |
70 | int base2 = vcpu->arch.sie_block->ipb >> 28; | ||
71 | int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); | ||
72 | u64 operand2; | 68 | u64 operand2; |
73 | u32 address; | 69 | u32 address; |
74 | 70 | ||
75 | vcpu->stat.instruction_stpx++; | 71 | vcpu->stat.instruction_stpx++; |
76 | operand2 = disp2; | 72 | |
77 | if (base2) | 73 | operand2 = kvm_s390_get_base_disp_s(vcpu); |
78 | operand2 += vcpu->run->s.regs.gprs[base2]; | ||
79 | 74 | ||
80 | /* must be word boundary */ | 75 | /* must be word boundary */ |
81 | if (operand2 & 3) { | 76 | if (operand2 & 3) { |
@@ -100,15 +95,12 @@ out: | |||
100 | 95 | ||
101 | static int handle_store_cpu_address(struct kvm_vcpu *vcpu) | 96 | static int handle_store_cpu_address(struct kvm_vcpu *vcpu) |
102 | { | 97 | { |
103 | int base2 = vcpu->arch.sie_block->ipb >> 28; | ||
104 | int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); | ||
105 | u64 useraddr; | 98 | u64 useraddr; |
106 | int rc; | 99 | int rc; |
107 | 100 | ||
108 | vcpu->stat.instruction_stap++; | 101 | vcpu->stat.instruction_stap++; |
109 | useraddr = disp2; | 102 | |
110 | if (base2) | 103 | useraddr = kvm_s390_get_base_disp_s(vcpu); |
111 | useraddr += vcpu->run->s.regs.gprs[base2]; | ||
112 | 104 | ||
113 | if (useraddr & 1) { | 105 | if (useraddr & 1) { |
114 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 106 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
@@ -135,24 +127,96 @@ static int handle_skey(struct kvm_vcpu *vcpu) | |||
135 | return 0; | 127 | return 0; |
136 | } | 128 | } |
137 | 129 | ||
138 | static int handle_stsch(struct kvm_vcpu *vcpu) | 130 | static int handle_tpi(struct kvm_vcpu *vcpu) |
139 | { | 131 | { |
140 | vcpu->stat.instruction_stsch++; | 132 | u64 addr; |
141 | VCPU_EVENT(vcpu, 4, "%s", "store subchannel - CC3"); | 133 | struct kvm_s390_interrupt_info *inti; |
142 | /* condition code 3 */ | 134 | int cc; |
135 | |||
136 | addr = kvm_s390_get_base_disp_s(vcpu); | ||
137 | |||
138 | inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0); | ||
139 | if (inti) { | ||
140 | if (addr) { | ||
141 | /* | ||
142 | * Store the two-word I/O interruption code into the | ||
143 | * provided area. | ||
144 | */ | ||
145 | put_guest_u16(vcpu, addr, inti->io.subchannel_id); | ||
146 | put_guest_u16(vcpu, addr + 2, inti->io.subchannel_nr); | ||
147 | put_guest_u32(vcpu, addr + 4, inti->io.io_int_parm); | ||
148 | } else { | ||
149 | /* | ||
150 | * Store the three-word I/O interruption code into | ||
151 | * the appropriate lowcore area. | ||
152 | */ | ||
153 | put_guest_u16(vcpu, 184, inti->io.subchannel_id); | ||
154 | put_guest_u16(vcpu, 186, inti->io.subchannel_nr); | ||
155 | put_guest_u32(vcpu, 188, inti->io.io_int_parm); | ||
156 | put_guest_u32(vcpu, 192, inti->io.io_int_word); | ||
157 | } | ||
158 | cc = 1; | ||
159 | } else | ||
160 | cc = 0; | ||
161 | kfree(inti); | ||
162 | /* Set condition code and we're done. */ | ||
143 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); | 163 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); |
144 | vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; | 164 | vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44; |
145 | return 0; | 165 | return 0; |
146 | } | 166 | } |
147 | 167 | ||
148 | static int handle_chsc(struct kvm_vcpu *vcpu) | 168 | static int handle_tsch(struct kvm_vcpu *vcpu) |
149 | { | 169 | { |
150 | vcpu->stat.instruction_chsc++; | 170 | struct kvm_s390_interrupt_info *inti; |
151 | VCPU_EVENT(vcpu, 4, "%s", "channel subsystem call - CC3"); | 171 | |
152 | /* condition code 3 */ | 172 | inti = kvm_s390_get_io_int(vcpu->kvm, 0, |
153 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); | 173 | vcpu->run->s.regs.gprs[1]); |
154 | vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; | 174 | |
155 | return 0; | 175 | /* |
176 | * Prepare exit to userspace. | ||
177 | * We indicate whether we dequeued a pending I/O interrupt | ||
178 | * so that userspace can re-inject it if the instruction gets | ||
179 | * a program check. While this may re-order the pending I/O | ||
180 | * interrupts, this is no problem since the priority is kept | ||
181 | * intact. | ||
182 | */ | ||
183 | vcpu->run->exit_reason = KVM_EXIT_S390_TSCH; | ||
184 | vcpu->run->s390_tsch.dequeued = !!inti; | ||
185 | if (inti) { | ||
186 | vcpu->run->s390_tsch.subchannel_id = inti->io.subchannel_id; | ||
187 | vcpu->run->s390_tsch.subchannel_nr = inti->io.subchannel_nr; | ||
188 | vcpu->run->s390_tsch.io_int_parm = inti->io.io_int_parm; | ||
189 | vcpu->run->s390_tsch.io_int_word = inti->io.io_int_word; | ||
190 | } | ||
191 | vcpu->run->s390_tsch.ipb = vcpu->arch.sie_block->ipb; | ||
192 | kfree(inti); | ||
193 | return -EREMOTE; | ||
194 | } | ||
195 | |||
196 | static int handle_io_inst(struct kvm_vcpu *vcpu) | ||
197 | { | ||
198 | VCPU_EVENT(vcpu, 4, "%s", "I/O instruction"); | ||
199 | |||
200 | if (vcpu->kvm->arch.css_support) { | ||
201 | /* | ||
202 | * Most I/O instructions will be handled by userspace. | ||
203 | * Exceptions are tpi and the interrupt portion of tsch. | ||
204 | */ | ||
205 | if (vcpu->arch.sie_block->ipa == 0xb236) | ||
206 | return handle_tpi(vcpu); | ||
207 | if (vcpu->arch.sie_block->ipa == 0xb235) | ||
208 | return handle_tsch(vcpu); | ||
209 | /* Handle in userspace. */ | ||
210 | return -EOPNOTSUPP; | ||
211 | } else { | ||
212 | /* | ||
213 | * Set condition code 3 to stop the guest from issueing channel | ||
214 | * I/O instructions. | ||
215 | */ | ||
216 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); | ||
217 | vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; | ||
218 | return 0; | ||
219 | } | ||
156 | } | 220 | } |
157 | 221 | ||
158 | static int handle_stfl(struct kvm_vcpu *vcpu) | 222 | static int handle_stfl(struct kvm_vcpu *vcpu) |
@@ -176,17 +240,107 @@ static int handle_stfl(struct kvm_vcpu *vcpu) | |||
176 | return 0; | 240 | return 0; |
177 | } | 241 | } |
178 | 242 | ||
243 | static void handle_new_psw(struct kvm_vcpu *vcpu) | ||
244 | { | ||
245 | /* Check whether the new psw is enabled for machine checks. */ | ||
246 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK) | ||
247 | kvm_s390_deliver_pending_machine_checks(vcpu); | ||
248 | } | ||
249 | |||
250 | #define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA) | ||
251 | #define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL | ||
252 | #define PSW_ADDR_24 0x00000000000fffffUL | ||
253 | #define PSW_ADDR_31 0x000000007fffffffUL | ||
254 | |||
255 | int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) | ||
256 | { | ||
257 | u64 addr; | ||
258 | psw_compat_t new_psw; | ||
259 | |||
260 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | ||
261 | return kvm_s390_inject_program_int(vcpu, | ||
262 | PGM_PRIVILEGED_OPERATION); | ||
263 | |||
264 | addr = kvm_s390_get_base_disp_s(vcpu); | ||
265 | |||
266 | if (addr & 7) { | ||
267 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
268 | goto out; | ||
269 | } | ||
270 | |||
271 | if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) { | ||
272 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
273 | goto out; | ||
274 | } | ||
275 | |||
276 | if (!(new_psw.mask & PSW32_MASK_BASE)) { | ||
277 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
278 | goto out; | ||
279 | } | ||
280 | |||
281 | vcpu->arch.sie_block->gpsw.mask = | ||
282 | (new_psw.mask & ~PSW32_MASK_BASE) << 32; | ||
283 | vcpu->arch.sie_block->gpsw.addr = new_psw.addr; | ||
284 | |||
285 | if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) || | ||
286 | (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) && | ||
287 | (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) || | ||
288 | ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == | ||
289 | PSW_MASK_EA)) { | ||
290 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
291 | goto out; | ||
292 | } | ||
293 | |||
294 | handle_new_psw(vcpu); | ||
295 | out: | ||
296 | return 0; | ||
297 | } | ||
298 | |||
299 | static int handle_lpswe(struct kvm_vcpu *vcpu) | ||
300 | { | ||
301 | u64 addr; | ||
302 | psw_t new_psw; | ||
303 | |||
304 | addr = kvm_s390_get_base_disp_s(vcpu); | ||
305 | |||
306 | if (addr & 7) { | ||
307 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
308 | goto out; | ||
309 | } | ||
310 | |||
311 | if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) { | ||
312 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
313 | goto out; | ||
314 | } | ||
315 | |||
316 | vcpu->arch.sie_block->gpsw.mask = new_psw.mask; | ||
317 | vcpu->arch.sie_block->gpsw.addr = new_psw.addr; | ||
318 | |||
319 | if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) || | ||
320 | (((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == | ||
321 | PSW_MASK_BA) && | ||
322 | (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_31)) || | ||
323 | (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) && | ||
324 | (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) || | ||
325 | ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == | ||
326 | PSW_MASK_EA)) { | ||
327 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
328 | goto out; | ||
329 | } | ||
330 | |||
331 | handle_new_psw(vcpu); | ||
332 | out: | ||
333 | return 0; | ||
334 | } | ||
335 | |||
179 | static int handle_stidp(struct kvm_vcpu *vcpu) | 336 | static int handle_stidp(struct kvm_vcpu *vcpu) |
180 | { | 337 | { |
181 | int base2 = vcpu->arch.sie_block->ipb >> 28; | ||
182 | int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); | ||
183 | u64 operand2; | 338 | u64 operand2; |
184 | int rc; | 339 | int rc; |
185 | 340 | ||
186 | vcpu->stat.instruction_stidp++; | 341 | vcpu->stat.instruction_stidp++; |
187 | operand2 = disp2; | 342 | |
188 | if (base2) | 343 | operand2 = kvm_s390_get_base_disp_s(vcpu); |
189 | operand2 += vcpu->run->s.regs.gprs[base2]; | ||
190 | 344 | ||
191 | if (operand2 & 7) { | 345 | if (operand2 & 7) { |
192 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 346 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
@@ -240,17 +394,13 @@ static int handle_stsi(struct kvm_vcpu *vcpu) | |||
240 | int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28; | 394 | int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28; |
241 | int sel1 = vcpu->run->s.regs.gprs[0] & 0xff; | 395 | int sel1 = vcpu->run->s.regs.gprs[0] & 0xff; |
242 | int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff; | 396 | int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff; |
243 | int base2 = vcpu->arch.sie_block->ipb >> 28; | ||
244 | int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); | ||
245 | u64 operand2; | 397 | u64 operand2; |
246 | unsigned long mem; | 398 | unsigned long mem; |
247 | 399 | ||
248 | vcpu->stat.instruction_stsi++; | 400 | vcpu->stat.instruction_stsi++; |
249 | VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); | 401 | VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); |
250 | 402 | ||
251 | operand2 = disp2; | 403 | operand2 = kvm_s390_get_base_disp_s(vcpu); |
252 | if (base2) | ||
253 | operand2 += vcpu->run->s.regs.gprs[base2]; | ||
254 | 404 | ||
255 | if (operand2 & 0xfff && fc > 0) | 405 | if (operand2 & 0xfff && fc > 0) |
256 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 406 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
@@ -297,7 +447,7 @@ out_fail: | |||
297 | return 0; | 447 | return 0; |
298 | } | 448 | } |
299 | 449 | ||
300 | static intercept_handler_t priv_handlers[256] = { | 450 | static const intercept_handler_t b2_handlers[256] = { |
301 | [0x02] = handle_stidp, | 451 | [0x02] = handle_stidp, |
302 | [0x10] = handle_set_prefix, | 452 | [0x10] = handle_set_prefix, |
303 | [0x11] = handle_store_prefix, | 453 | [0x11] = handle_store_prefix, |
@@ -305,10 +455,25 @@ static intercept_handler_t priv_handlers[256] = { | |||
305 | [0x29] = handle_skey, | 455 | [0x29] = handle_skey, |
306 | [0x2a] = handle_skey, | 456 | [0x2a] = handle_skey, |
307 | [0x2b] = handle_skey, | 457 | [0x2b] = handle_skey, |
308 | [0x34] = handle_stsch, | 458 | [0x30] = handle_io_inst, |
309 | [0x5f] = handle_chsc, | 459 | [0x31] = handle_io_inst, |
460 | [0x32] = handle_io_inst, | ||
461 | [0x33] = handle_io_inst, | ||
462 | [0x34] = handle_io_inst, | ||
463 | [0x35] = handle_io_inst, | ||
464 | [0x36] = handle_io_inst, | ||
465 | [0x37] = handle_io_inst, | ||
466 | [0x38] = handle_io_inst, | ||
467 | [0x39] = handle_io_inst, | ||
468 | [0x3a] = handle_io_inst, | ||
469 | [0x3b] = handle_io_inst, | ||
470 | [0x3c] = handle_io_inst, | ||
471 | [0x5f] = handle_io_inst, | ||
472 | [0x74] = handle_io_inst, | ||
473 | [0x76] = handle_io_inst, | ||
310 | [0x7d] = handle_stsi, | 474 | [0x7d] = handle_stsi, |
311 | [0xb1] = handle_stfl, | 475 | [0xb1] = handle_stfl, |
476 | [0xb2] = handle_lpswe, | ||
312 | }; | 477 | }; |
313 | 478 | ||
314 | int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) | 479 | int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) |
@@ -322,7 +487,7 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) | |||
322 | * state bit and (a) handle the instruction or (b) send a code 2 | 487 | * state bit and (a) handle the instruction or (b) send a code 2 |
323 | * program check. | 488 | * program check. |
324 | * Anything else goes to userspace.*/ | 489 | * Anything else goes to userspace.*/ |
325 | handler = priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; | 490 | handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; |
326 | if (handler) { | 491 | if (handler) { |
327 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | 492 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) |
328 | return kvm_s390_inject_program_int(vcpu, | 493 | return kvm_s390_inject_program_int(vcpu, |
@@ -333,19 +498,74 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) | |||
333 | return -EOPNOTSUPP; | 498 | return -EOPNOTSUPP; |
334 | } | 499 | } |
335 | 500 | ||
501 | static int handle_epsw(struct kvm_vcpu *vcpu) | ||
502 | { | ||
503 | int reg1, reg2; | ||
504 | |||
505 | reg1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 24; | ||
506 | reg2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; | ||
507 | |||
508 | /* This basically extracts the mask half of the psw. */ | ||
509 | vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000; | ||
510 | vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32; | ||
511 | if (reg2) { | ||
512 | vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000; | ||
513 | vcpu->run->s.regs.gprs[reg2] |= | ||
514 | vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffff; | ||
515 | } | ||
516 | return 0; | ||
517 | } | ||
518 | |||
519 | static const intercept_handler_t b9_handlers[256] = { | ||
520 | [0x8d] = handle_epsw, | ||
521 | [0x9c] = handle_io_inst, | ||
522 | }; | ||
523 | |||
524 | int kvm_s390_handle_b9(struct kvm_vcpu *vcpu) | ||
525 | { | ||
526 | intercept_handler_t handler; | ||
527 | |||
528 | /* This is handled just as for the B2 instructions. */ | ||
529 | handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; | ||
530 | if (handler) { | ||
531 | if ((handler != handle_epsw) && | ||
532 | (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)) | ||
533 | return kvm_s390_inject_program_int(vcpu, | ||
534 | PGM_PRIVILEGED_OPERATION); | ||
535 | else | ||
536 | return handler(vcpu); | ||
537 | } | ||
538 | return -EOPNOTSUPP; | ||
539 | } | ||
540 | |||
541 | static const intercept_handler_t eb_handlers[256] = { | ||
542 | [0x8a] = handle_io_inst, | ||
543 | }; | ||
544 | |||
545 | int kvm_s390_handle_priv_eb(struct kvm_vcpu *vcpu) | ||
546 | { | ||
547 | intercept_handler_t handler; | ||
548 | |||
549 | /* All eb instructions that end up here are privileged. */ | ||
550 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | ||
551 | return kvm_s390_inject_program_int(vcpu, | ||
552 | PGM_PRIVILEGED_OPERATION); | ||
553 | handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff]; | ||
554 | if (handler) | ||
555 | return handler(vcpu); | ||
556 | return -EOPNOTSUPP; | ||
557 | } | ||
558 | |||
336 | static int handle_tprot(struct kvm_vcpu *vcpu) | 559 | static int handle_tprot(struct kvm_vcpu *vcpu) |
337 | { | 560 | { |
338 | int base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28; | 561 | u64 address1, address2; |
339 | int disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16; | ||
340 | int base2 = (vcpu->arch.sie_block->ipb & 0xf000) >> 12; | ||
341 | int disp2 = vcpu->arch.sie_block->ipb & 0x0fff; | ||
342 | u64 address1 = disp1 + base1 ? vcpu->run->s.regs.gprs[base1] : 0; | ||
343 | u64 address2 = disp2 + base2 ? vcpu->run->s.regs.gprs[base2] : 0; | ||
344 | struct vm_area_struct *vma; | 562 | struct vm_area_struct *vma; |
345 | unsigned long user_address; | 563 | unsigned long user_address; |
346 | 564 | ||
347 | vcpu->stat.instruction_tprot++; | 565 | vcpu->stat.instruction_tprot++; |
348 | 566 | ||
567 | kvm_s390_get_base_disp_sse(vcpu, &address1, &address2); | ||
568 | |||
349 | /* we only handle the Linux memory detection case: | 569 | /* we only handle the Linux memory detection case: |
350 | * access key == 0 | 570 | * access key == 0 |
351 | * guest DAT == off | 571 | * guest DAT == off |
@@ -405,7 +625,7 @@ static int handle_sckpf(struct kvm_vcpu *vcpu) | |||
405 | return 0; | 625 | return 0; |
406 | } | 626 | } |
407 | 627 | ||
408 | static intercept_handler_t x01_handlers[256] = { | 628 | static const intercept_handler_t x01_handlers[256] = { |
409 | [0x07] = handle_sckpf, | 629 | [0x07] = handle_sckpf, |
410 | }; | 630 | }; |
411 | 631 | ||
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 566ddf6e8dfb..1c48ab2845e0 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c | |||
@@ -137,8 +137,10 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) | |||
137 | inti->type = KVM_S390_SIGP_STOP; | 137 | inti->type = KVM_S390_SIGP_STOP; |
138 | 138 | ||
139 | spin_lock_bh(&li->lock); | 139 | spin_lock_bh(&li->lock); |
140 | if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) | 140 | if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { |
141 | kfree(inti); | ||
141 | goto out; | 142 | goto out; |
143 | } | ||
142 | list_add_tail(&inti->list, &li->list); | 144 | list_add_tail(&inti->list, &li->list); |
143 | atomic_set(&li->active, 1); | 145 | atomic_set(&li->active, 1); |
144 | atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); | 146 | atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); |
@@ -324,8 +326,6 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) | |||
324 | { | 326 | { |
325 | int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; | 327 | int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; |
326 | int r3 = vcpu->arch.sie_block->ipa & 0x000f; | 328 | int r3 = vcpu->arch.sie_block->ipa & 0x000f; |
327 | int base2 = vcpu->arch.sie_block->ipb >> 28; | ||
328 | int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); | ||
329 | u32 parameter; | 329 | u32 parameter; |
330 | u16 cpu_addr = vcpu->run->s.regs.gprs[r3]; | 330 | u16 cpu_addr = vcpu->run->s.regs.gprs[r3]; |
331 | u8 order_code; | 331 | u8 order_code; |
@@ -336,9 +336,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) | |||
336 | return kvm_s390_inject_program_int(vcpu, | 336 | return kvm_s390_inject_program_int(vcpu, |
337 | PGM_PRIVILEGED_OPERATION); | 337 | PGM_PRIVILEGED_OPERATION); |
338 | 338 | ||
339 | order_code = disp2; | 339 | order_code = kvm_s390_get_base_disp_rs(vcpu); |
340 | if (base2) | ||
341 | order_code += vcpu->run->s.regs.gprs[base2]; | ||
342 | 340 | ||
343 | if (r1 % 2) | 341 | if (r1 % 2) |
344 | parameter = vcpu->run->s.regs.gprs[r1]; | 342 | parameter = vcpu->run->s.regs.gprs[r1]; |
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index 90fdf85b5ff7..13f30f58a2df 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h | |||
@@ -141,13 +141,13 @@ TRACE_EVENT(kvm_s390_inject_vcpu, | |||
141 | * Trace point for the actual delivery of interrupts. | 141 | * Trace point for the actual delivery of interrupts. |
142 | */ | 142 | */ |
143 | TRACE_EVENT(kvm_s390_deliver_interrupt, | 143 | TRACE_EVENT(kvm_s390_deliver_interrupt, |
144 | TP_PROTO(unsigned int id, __u64 type, __u32 data0, __u64 data1), | 144 | TP_PROTO(unsigned int id, __u64 type, __u64 data0, __u64 data1), |
145 | TP_ARGS(id, type, data0, data1), | 145 | TP_ARGS(id, type, data0, data1), |
146 | 146 | ||
147 | TP_STRUCT__entry( | 147 | TP_STRUCT__entry( |
148 | __field(int, id) | 148 | __field(int, id) |
149 | __field(__u32, inttype) | 149 | __field(__u32, inttype) |
150 | __field(__u32, data0) | 150 | __field(__u64, data0) |
151 | __field(__u64, data1) | 151 | __field(__u64, data1) |
152 | ), | 152 | ), |
153 | 153 | ||
@@ -159,7 +159,7 @@ TRACE_EVENT(kvm_s390_deliver_interrupt, | |||
159 | ), | 159 | ), |
160 | 160 | ||
161 | TP_printk("deliver interrupt (vcpu %d): type:%x (%s) " \ | 161 | TP_printk("deliver interrupt (vcpu %d): type:%x (%s) " \ |
162 | "data:%08x %016llx", | 162 | "data:%08llx %016llx", |
163 | __entry->id, __entry->inttype, | 163 | __entry->id, __entry->inttype, |
164 | __print_symbolic(__entry->inttype, kvm_s390_int_type), | 164 | __print_symbolic(__entry->inttype, kvm_s390_int_type), |
165 | __entry->data0, __entry->data1) | 165 | __entry->data0, __entry->data1) |
@@ -204,6 +204,26 @@ TRACE_EVENT(kvm_s390_stop_request, | |||
204 | ); | 204 | ); |
205 | 205 | ||
206 | 206 | ||
207 | /* | ||
208 | * Trace point for enabling channel I/O instruction support. | ||
209 | */ | ||
210 | TRACE_EVENT(kvm_s390_enable_css, | ||
211 | TP_PROTO(void *kvm), | ||
212 | TP_ARGS(kvm), | ||
213 | |||
214 | TP_STRUCT__entry( | ||
215 | __field(void *, kvm) | ||
216 | ), | ||
217 | |||
218 | TP_fast_assign( | ||
219 | __entry->kvm = kvm; | ||
220 | ), | ||
221 | |||
222 | TP_printk("enabling channel I/O support (kvm @ %p)\n", | ||
223 | __entry->kvm) | ||
224 | ); | ||
225 | |||
226 | |||
207 | #endif /* _TRACE_KVMS390_H */ | 227 | #endif /* _TRACE_KVMS390_H */ |
208 | 228 | ||
209 | /* This part must be outside protection */ | 229 | /* This part must be outside protection */ |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index dc87b65e9c3a..635a74d22409 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -33,10 +33,10 @@ | |||
33 | 33 | ||
34 | #define KVM_MAX_VCPUS 254 | 34 | #define KVM_MAX_VCPUS 254 |
35 | #define KVM_SOFT_MAX_VCPUS 160 | 35 | #define KVM_SOFT_MAX_VCPUS 160 |
36 | #define KVM_MEMORY_SLOTS 32 | 36 | #define KVM_USER_MEM_SLOTS 125 |
37 | /* memory slots that does not exposed to userspace */ | 37 | /* memory slots that are not exposed to userspace */ |
38 | #define KVM_PRIVATE_MEM_SLOTS 4 | 38 | #define KVM_PRIVATE_MEM_SLOTS 3 |
39 | #define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) | 39 | #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) |
40 | 40 | ||
41 | #define KVM_MMIO_SIZE 16 | 41 | #define KVM_MMIO_SIZE 16 |
42 | 42 | ||
@@ -219,11 +219,6 @@ struct kvm_mmu_page { | |||
219 | u64 *spt; | 219 | u64 *spt; |
220 | /* hold the gfn of each spte inside spt */ | 220 | /* hold the gfn of each spte inside spt */ |
221 | gfn_t *gfns; | 221 | gfn_t *gfns; |
222 | /* | ||
223 | * One bit set per slot which has memory | ||
224 | * in this shadow page. | ||
225 | */ | ||
226 | DECLARE_BITMAP(slot_bitmap, KVM_MEM_SLOTS_NUM); | ||
227 | bool unsync; | 222 | bool unsync; |
228 | int root_count; /* Currently serving as active root */ | 223 | int root_count; /* Currently serving as active root */ |
229 | unsigned int unsync_children; | 224 | unsigned int unsync_children; |
@@ -502,6 +497,13 @@ struct kvm_vcpu_arch { | |||
502 | u64 msr_val; | 497 | u64 msr_val; |
503 | struct gfn_to_hva_cache data; | 498 | struct gfn_to_hva_cache data; |
504 | } pv_eoi; | 499 | } pv_eoi; |
500 | |||
501 | /* | ||
502 | * Indicate whether the access faults on its page table in guest | ||
503 | * which is set when fix page fault and used to detect unhandeable | ||
504 | * instruction. | ||
505 | */ | ||
506 | bool write_fault_to_shadow_pgtable; | ||
505 | }; | 507 | }; |
506 | 508 | ||
507 | struct kvm_lpage_info { | 509 | struct kvm_lpage_info { |
@@ -697,6 +699,11 @@ struct kvm_x86_ops { | |||
697 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); | 699 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); |
698 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); | 700 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); |
699 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); | 701 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); |
702 | int (*vm_has_apicv)(struct kvm *kvm); | ||
703 | void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); | ||
704 | void (*hwapic_isr_update)(struct kvm *kvm, int isr); | ||
705 | void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); | ||
706 | void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); | ||
700 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); | 707 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); |
701 | int (*get_tdp_level)(void); | 708 | int (*get_tdp_level)(void); |
702 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); | 709 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); |
@@ -991,6 +998,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long hva); | |||
991 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); | 998 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); |
992 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); | 999 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); |
993 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); | 1000 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); |
1001 | int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); | ||
994 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); | 1002 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); |
995 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); | 1003 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); |
996 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); | 1004 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); |
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 65231e173baf..695399f2d5eb 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
@@ -27,7 +27,7 @@ static inline bool kvm_check_and_clear_guest_paused(void) | |||
27 | * | 27 | * |
28 | * Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively. | 28 | * Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively. |
29 | * The hypercall number should be placed in rax and the return value will be | 29 | * The hypercall number should be placed in rax and the return value will be |
30 | * placed in rax. No other registers will be clobbered unless explicited | 30 | * placed in rax. No other registers will be clobbered unless explicitly |
31 | * noted by the particular hypercall. | 31 | * noted by the particular hypercall. |
32 | */ | 32 | */ |
33 | 33 | ||
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 235b49fa554b..b6fbf860e398 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -57,9 +57,12 @@ | |||
57 | #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 | 57 | #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 |
58 | #define SECONDARY_EXEC_ENABLE_EPT 0x00000002 | 58 | #define SECONDARY_EXEC_ENABLE_EPT 0x00000002 |
59 | #define SECONDARY_EXEC_RDTSCP 0x00000008 | 59 | #define SECONDARY_EXEC_RDTSCP 0x00000008 |
60 | #define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 | ||
60 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 | 61 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 |
61 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 | 62 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 |
62 | #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 | 63 | #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 |
64 | #define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 | ||
65 | #define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 | ||
63 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 | 66 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 |
64 | #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 | 67 | #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 |
65 | 68 | ||
@@ -97,6 +100,7 @@ enum vmcs_field { | |||
97 | GUEST_GS_SELECTOR = 0x0000080a, | 100 | GUEST_GS_SELECTOR = 0x0000080a, |
98 | GUEST_LDTR_SELECTOR = 0x0000080c, | 101 | GUEST_LDTR_SELECTOR = 0x0000080c, |
99 | GUEST_TR_SELECTOR = 0x0000080e, | 102 | GUEST_TR_SELECTOR = 0x0000080e, |
103 | GUEST_INTR_STATUS = 0x00000810, | ||
100 | HOST_ES_SELECTOR = 0x00000c00, | 104 | HOST_ES_SELECTOR = 0x00000c00, |
101 | HOST_CS_SELECTOR = 0x00000c02, | 105 | HOST_CS_SELECTOR = 0x00000c02, |
102 | HOST_SS_SELECTOR = 0x00000c04, | 106 | HOST_SS_SELECTOR = 0x00000c04, |
@@ -124,6 +128,14 @@ enum vmcs_field { | |||
124 | APIC_ACCESS_ADDR_HIGH = 0x00002015, | 128 | APIC_ACCESS_ADDR_HIGH = 0x00002015, |
125 | EPT_POINTER = 0x0000201a, | 129 | EPT_POINTER = 0x0000201a, |
126 | EPT_POINTER_HIGH = 0x0000201b, | 130 | EPT_POINTER_HIGH = 0x0000201b, |
131 | EOI_EXIT_BITMAP0 = 0x0000201c, | ||
132 | EOI_EXIT_BITMAP0_HIGH = 0x0000201d, | ||
133 | EOI_EXIT_BITMAP1 = 0x0000201e, | ||
134 | EOI_EXIT_BITMAP1_HIGH = 0x0000201f, | ||
135 | EOI_EXIT_BITMAP2 = 0x00002020, | ||
136 | EOI_EXIT_BITMAP2_HIGH = 0x00002021, | ||
137 | EOI_EXIT_BITMAP3 = 0x00002022, | ||
138 | EOI_EXIT_BITMAP3_HIGH = 0x00002023, | ||
127 | GUEST_PHYSICAL_ADDRESS = 0x00002400, | 139 | GUEST_PHYSICAL_ADDRESS = 0x00002400, |
128 | GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, | 140 | GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, |
129 | VMCS_LINK_POINTER = 0x00002800, | 141 | VMCS_LINK_POINTER = 0x00002800, |
@@ -346,9 +358,9 @@ enum vmcs_field { | |||
346 | 358 | ||
347 | #define AR_RESERVD_MASK 0xfffe0f00 | 359 | #define AR_RESERVD_MASK 0xfffe0f00 |
348 | 360 | ||
349 | #define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0) | 361 | #define TSS_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 0) |
350 | #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1) | 362 | #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 1) |
351 | #define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2) | 363 | #define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 2) |
352 | 364 | ||
353 | #define VMX_NR_VPIDS (1 << 16) | 365 | #define VMX_NR_VPIDS (1 << 16) |
354 | #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 | 366 | #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 |
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 979d03bce135..2871fccfee68 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h | |||
@@ -62,10 +62,12 @@ | |||
62 | #define EXIT_REASON_MCE_DURING_VMENTRY 41 | 62 | #define EXIT_REASON_MCE_DURING_VMENTRY 41 |
63 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 | 63 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 |
64 | #define EXIT_REASON_APIC_ACCESS 44 | 64 | #define EXIT_REASON_APIC_ACCESS 44 |
65 | #define EXIT_REASON_EOI_INDUCED 45 | ||
65 | #define EXIT_REASON_EPT_VIOLATION 48 | 66 | #define EXIT_REASON_EPT_VIOLATION 48 |
66 | #define EXIT_REASON_EPT_MISCONFIG 49 | 67 | #define EXIT_REASON_EPT_MISCONFIG 49 |
67 | #define EXIT_REASON_WBINVD 54 | 68 | #define EXIT_REASON_WBINVD 54 |
68 | #define EXIT_REASON_XSETBV 55 | 69 | #define EXIT_REASON_XSETBV 55 |
70 | #define EXIT_REASON_APIC_WRITE 56 | ||
69 | #define EXIT_REASON_INVPCID 58 | 71 | #define EXIT_REASON_INVPCID 58 |
70 | 72 | ||
71 | #define VMX_EXIT_REASONS \ | 73 | #define VMX_EXIT_REASONS \ |
@@ -103,7 +105,12 @@ | |||
103 | { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ | 105 | { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ |
104 | { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ | 106 | { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ |
105 | { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ | 107 | { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ |
106 | { EXIT_REASON_WBINVD, "WBINVD" } | 108 | { EXIT_REASON_WBINVD, "WBINVD" }, \ |
109 | { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \ | ||
110 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ | ||
111 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ | ||
112 | { EXIT_REASON_INVD, "INVD" }, \ | ||
113 | { EXIT_REASON_INVPCID, "INVPCID" } | ||
107 | 114 | ||
108 | 115 | ||
109 | #endif /* _UAPIVMX_H */ | 116 | #endif /* _UAPIVMX_H */ |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 9f966dc0b9e4..0732f0089a3d 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -218,6 +218,9 @@ static void kvm_shutdown(void) | |||
218 | void __init kvmclock_init(void) | 218 | void __init kvmclock_init(void) |
219 | { | 219 | { |
220 | unsigned long mem; | 220 | unsigned long mem; |
221 | int size; | ||
222 | |||
223 | size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); | ||
221 | 224 | ||
222 | if (!kvm_para_available()) | 225 | if (!kvm_para_available()) |
223 | return; | 226 | return; |
@@ -231,16 +234,14 @@ void __init kvmclock_init(void) | |||
231 | printk(KERN_INFO "kvm-clock: Using msrs %x and %x", | 234 | printk(KERN_INFO "kvm-clock: Using msrs %x and %x", |
232 | msr_kvm_system_time, msr_kvm_wall_clock); | 235 | msr_kvm_system_time, msr_kvm_wall_clock); |
233 | 236 | ||
234 | mem = memblock_alloc(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS, | 237 | mem = memblock_alloc(size, PAGE_SIZE); |
235 | PAGE_SIZE); | ||
236 | if (!mem) | 238 | if (!mem) |
237 | return; | 239 | return; |
238 | hv_clock = __va(mem); | 240 | hv_clock = __va(mem); |
239 | 241 | ||
240 | if (kvm_register_clock("boot clock")) { | 242 | if (kvm_register_clock("boot clock")) { |
241 | hv_clock = NULL; | 243 | hv_clock = NULL; |
242 | memblock_free(mem, | 244 | memblock_free(mem, size); |
243 | sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); | ||
244 | return; | 245 | return; |
245 | } | 246 | } |
246 | pv_time_ops.sched_clock = kvm_clock_read; | 247 | pv_time_ops.sched_clock = kvm_clock_read; |
@@ -275,7 +276,7 @@ int __init kvm_setup_vsyscall_timeinfo(void) | |||
275 | struct pvclock_vcpu_time_info *vcpu_time; | 276 | struct pvclock_vcpu_time_info *vcpu_time; |
276 | unsigned int size; | 277 | unsigned int size; |
277 | 278 | ||
278 | size = sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS; | 279 | size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); |
279 | 280 | ||
280 | preempt_disable(); | 281 | preempt_disable(); |
281 | cpu = smp_processor_id(); | 282 | cpu = smp_processor_id(); |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a27e76371108..a335cc6cde72 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include "kvm_cache_regs.h" | 24 | #include "kvm_cache_regs.h" |
25 | #include <linux/module.h> | 25 | #include <linux/module.h> |
26 | #include <asm/kvm_emulate.h> | 26 | #include <asm/kvm_emulate.h> |
27 | #include <linux/stringify.h> | ||
27 | 28 | ||
28 | #include "x86.h" | 29 | #include "x86.h" |
29 | #include "tss.h" | 30 | #include "tss.h" |
@@ -43,7 +44,7 @@ | |||
43 | #define OpCL 9ull /* CL register (for shifts) */ | 44 | #define OpCL 9ull /* CL register (for shifts) */ |
44 | #define OpImmByte 10ull /* 8-bit sign extended immediate */ | 45 | #define OpImmByte 10ull /* 8-bit sign extended immediate */ |
45 | #define OpOne 11ull /* Implied 1 */ | 46 | #define OpOne 11ull /* Implied 1 */ |
46 | #define OpImm 12ull /* Sign extended immediate */ | 47 | #define OpImm 12ull /* Sign extended up to 32-bit immediate */ |
47 | #define OpMem16 13ull /* Memory operand (16-bit). */ | 48 | #define OpMem16 13ull /* Memory operand (16-bit). */ |
48 | #define OpMem32 14ull /* Memory operand (32-bit). */ | 49 | #define OpMem32 14ull /* Memory operand (32-bit). */ |
49 | #define OpImmU 15ull /* Immediate operand, zero extended */ | 50 | #define OpImmU 15ull /* Immediate operand, zero extended */ |
@@ -58,6 +59,7 @@ | |||
58 | #define OpFS 24ull /* FS */ | 59 | #define OpFS 24ull /* FS */ |
59 | #define OpGS 25ull /* GS */ | 60 | #define OpGS 25ull /* GS */ |
60 | #define OpMem8 26ull /* 8-bit zero extended memory operand */ | 61 | #define OpMem8 26ull /* 8-bit zero extended memory operand */ |
62 | #define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */ | ||
61 | 63 | ||
62 | #define OpBits 5 /* Width of operand field */ | 64 | #define OpBits 5 /* Width of operand field */ |
63 | #define OpMask ((1ull << OpBits) - 1) | 65 | #define OpMask ((1ull << OpBits) - 1) |
@@ -101,6 +103,7 @@ | |||
101 | #define SrcMemFAddr (OpMemFAddr << SrcShift) | 103 | #define SrcMemFAddr (OpMemFAddr << SrcShift) |
102 | #define SrcAcc (OpAcc << SrcShift) | 104 | #define SrcAcc (OpAcc << SrcShift) |
103 | #define SrcImmU16 (OpImmU16 << SrcShift) | 105 | #define SrcImmU16 (OpImmU16 << SrcShift) |
106 | #define SrcImm64 (OpImm64 << SrcShift) | ||
104 | #define SrcDX (OpDX << SrcShift) | 107 | #define SrcDX (OpDX << SrcShift) |
105 | #define SrcMem8 (OpMem8 << SrcShift) | 108 | #define SrcMem8 (OpMem8 << SrcShift) |
106 | #define SrcMask (OpMask << SrcShift) | 109 | #define SrcMask (OpMask << SrcShift) |
@@ -113,6 +116,7 @@ | |||
113 | #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */ | 116 | #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */ |
114 | #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */ | 117 | #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */ |
115 | #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */ | 118 | #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */ |
119 | #define Escape (5<<15) /* Escape to coprocessor instruction */ | ||
116 | #define Sse (1<<18) /* SSE Vector instruction */ | 120 | #define Sse (1<<18) /* SSE Vector instruction */ |
117 | /* Generic ModRM decode. */ | 121 | /* Generic ModRM decode. */ |
118 | #define ModRM (1<<19) | 122 | #define ModRM (1<<19) |
@@ -146,6 +150,8 @@ | |||
146 | #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */ | 150 | #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */ |
147 | #define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */ | 151 | #define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */ |
148 | #define Avx ((u64)1 << 43) /* Advanced Vector Extensions */ | 152 | #define Avx ((u64)1 << 43) /* Advanced Vector Extensions */ |
153 | #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */ | ||
154 | #define NoWrite ((u64)1 << 45) /* No writeback */ | ||
149 | 155 | ||
150 | #define X2(x...) x, x | 156 | #define X2(x...) x, x |
151 | #define X3(x...) X2(x), x | 157 | #define X3(x...) X2(x), x |
@@ -156,6 +162,27 @@ | |||
156 | #define X8(x...) X4(x), X4(x) | 162 | #define X8(x...) X4(x), X4(x) |
157 | #define X16(x...) X8(x), X8(x) | 163 | #define X16(x...) X8(x), X8(x) |
158 | 164 | ||
165 | #define NR_FASTOP (ilog2(sizeof(ulong)) + 1) | ||
166 | #define FASTOP_SIZE 8 | ||
167 | |||
168 | /* | ||
169 | * fastop functions have a special calling convention: | ||
170 | * | ||
171 | * dst: [rdx]:rax (in/out) | ||
172 | * src: rbx (in/out) | ||
173 | * src2: rcx (in) | ||
174 | * flags: rflags (in/out) | ||
175 | * | ||
176 | * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for | ||
177 | * different operand sizes can be reached by calculation, rather than a jump | ||
178 | * table (which would be bigger than the code). | ||
179 | * | ||
180 | * fastop functions are declared as taking a never-defined fastop parameter, | ||
181 | * so they can't be called from C directly. | ||
182 | */ | ||
183 | |||
184 | struct fastop; | ||
185 | |||
159 | struct opcode { | 186 | struct opcode { |
160 | u64 flags : 56; | 187 | u64 flags : 56; |
161 | u64 intercept : 8; | 188 | u64 intercept : 8; |
@@ -164,6 +191,8 @@ struct opcode { | |||
164 | const struct opcode *group; | 191 | const struct opcode *group; |
165 | const struct group_dual *gdual; | 192 | const struct group_dual *gdual; |
166 | const struct gprefix *gprefix; | 193 | const struct gprefix *gprefix; |
194 | const struct escape *esc; | ||
195 | void (*fastop)(struct fastop *fake); | ||
167 | } u; | 196 | } u; |
168 | int (*check_perm)(struct x86_emulate_ctxt *ctxt); | 197 | int (*check_perm)(struct x86_emulate_ctxt *ctxt); |
169 | }; | 198 | }; |
@@ -180,6 +209,11 @@ struct gprefix { | |||
180 | struct opcode pfx_f3; | 209 | struct opcode pfx_f3; |
181 | }; | 210 | }; |
182 | 211 | ||
212 | struct escape { | ||
213 | struct opcode op[8]; | ||
214 | struct opcode high[64]; | ||
215 | }; | ||
216 | |||
183 | /* EFLAGS bit definitions. */ | 217 | /* EFLAGS bit definitions. */ |
184 | #define EFLG_ID (1<<21) | 218 | #define EFLG_ID (1<<21) |
185 | #define EFLG_VIP (1<<20) | 219 | #define EFLG_VIP (1<<20) |
@@ -407,6 +441,97 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) | |||
407 | } \ | 441 | } \ |
408 | } while (0) | 442 | } while (0) |
409 | 443 | ||
444 | static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); | ||
445 | |||
446 | #define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t" | ||
447 | #define FOP_RET "ret \n\t" | ||
448 | |||
449 | #define FOP_START(op) \ | ||
450 | extern void em_##op(struct fastop *fake); \ | ||
451 | asm(".pushsection .text, \"ax\" \n\t" \ | ||
452 | ".global em_" #op " \n\t" \ | ||
453 | FOP_ALIGN \ | ||
454 | "em_" #op ": \n\t" | ||
455 | |||
456 | #define FOP_END \ | ||
457 | ".popsection") | ||
458 | |||
459 | #define FOPNOP() FOP_ALIGN FOP_RET | ||
460 | |||
461 | #define FOP1E(op, dst) \ | ||
462 | FOP_ALIGN #op " %" #dst " \n\t" FOP_RET | ||
463 | |||
464 | #define FASTOP1(op) \ | ||
465 | FOP_START(op) \ | ||
466 | FOP1E(op##b, al) \ | ||
467 | FOP1E(op##w, ax) \ | ||
468 | FOP1E(op##l, eax) \ | ||
469 | ON64(FOP1E(op##q, rax)) \ | ||
470 | FOP_END | ||
471 | |||
472 | #define FOP2E(op, dst, src) \ | ||
473 | FOP_ALIGN #op " %" #src ", %" #dst " \n\t" FOP_RET | ||
474 | |||
475 | #define FASTOP2(op) \ | ||
476 | FOP_START(op) \ | ||
477 | FOP2E(op##b, al, bl) \ | ||
478 | FOP2E(op##w, ax, bx) \ | ||
479 | FOP2E(op##l, eax, ebx) \ | ||
480 | ON64(FOP2E(op##q, rax, rbx)) \ | ||
481 | FOP_END | ||
482 | |||
483 | /* 2 operand, word only */ | ||
484 | #define FASTOP2W(op) \ | ||
485 | FOP_START(op) \ | ||
486 | FOPNOP() \ | ||
487 | FOP2E(op##w, ax, bx) \ | ||
488 | FOP2E(op##l, eax, ebx) \ | ||
489 | ON64(FOP2E(op##q, rax, rbx)) \ | ||
490 | FOP_END | ||
491 | |||
492 | /* 2 operand, src is CL */ | ||
493 | #define FASTOP2CL(op) \ | ||
494 | FOP_START(op) \ | ||
495 | FOP2E(op##b, al, cl) \ | ||
496 | FOP2E(op##w, ax, cl) \ | ||
497 | FOP2E(op##l, eax, cl) \ | ||
498 | ON64(FOP2E(op##q, rax, cl)) \ | ||
499 | FOP_END | ||
500 | |||
501 | #define FOP3E(op, dst, src, src2) \ | ||
502 | FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET | ||
503 | |||
504 | /* 3-operand, word-only, src2=cl */ | ||
505 | #define FASTOP3WCL(op) \ | ||
506 | FOP_START(op) \ | ||
507 | FOPNOP() \ | ||
508 | FOP3E(op##w, ax, bx, cl) \ | ||
509 | FOP3E(op##l, eax, ebx, cl) \ | ||
510 | ON64(FOP3E(op##q, rax, rbx, cl)) \ | ||
511 | FOP_END | ||
512 | |||
513 | /* Special case for SETcc - 1 instruction per cc */ | ||
514 | #define FOP_SETCC(op) ".align 4; " #op " %al; ret \n\t" | ||
515 | |||
516 | FOP_START(setcc) | ||
517 | FOP_SETCC(seto) | ||
518 | FOP_SETCC(setno) | ||
519 | FOP_SETCC(setc) | ||
520 | FOP_SETCC(setnc) | ||
521 | FOP_SETCC(setz) | ||
522 | FOP_SETCC(setnz) | ||
523 | FOP_SETCC(setbe) | ||
524 | FOP_SETCC(setnbe) | ||
525 | FOP_SETCC(sets) | ||
526 | FOP_SETCC(setns) | ||
527 | FOP_SETCC(setp) | ||
528 | FOP_SETCC(setnp) | ||
529 | FOP_SETCC(setl) | ||
530 | FOP_SETCC(setnl) | ||
531 | FOP_SETCC(setle) | ||
532 | FOP_SETCC(setnle) | ||
533 | FOP_END; | ||
534 | |||
410 | #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \ | 535 | #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \ |
411 | do { \ | 536 | do { \ |
412 | unsigned long _tmp; \ | 537 | unsigned long _tmp; \ |
@@ -663,7 +788,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, | |||
663 | ulong la; | 788 | ulong la; |
664 | u32 lim; | 789 | u32 lim; |
665 | u16 sel; | 790 | u16 sel; |
666 | unsigned cpl, rpl; | 791 | unsigned cpl; |
667 | 792 | ||
668 | la = seg_base(ctxt, addr.seg) + addr.ea; | 793 | la = seg_base(ctxt, addr.seg) + addr.ea; |
669 | switch (ctxt->mode) { | 794 | switch (ctxt->mode) { |
@@ -697,11 +822,6 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, | |||
697 | goto bad; | 822 | goto bad; |
698 | } | 823 | } |
699 | cpl = ctxt->ops->cpl(ctxt); | 824 | cpl = ctxt->ops->cpl(ctxt); |
700 | if (ctxt->mode == X86EMUL_MODE_REAL) | ||
701 | rpl = 0; | ||
702 | else | ||
703 | rpl = sel & 3; | ||
704 | cpl = max(cpl, rpl); | ||
705 | if (!(desc.type & 8)) { | 825 | if (!(desc.type & 8)) { |
706 | /* data segment */ | 826 | /* data segment */ |
707 | if (cpl > desc.dpl) | 827 | if (cpl > desc.dpl) |
@@ -852,39 +972,50 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, | |||
852 | return rc; | 972 | return rc; |
853 | } | 973 | } |
854 | 974 | ||
855 | static int test_cc(unsigned int condition, unsigned int flags) | 975 | FASTOP2(add); |
856 | { | 976 | FASTOP2(or); |
857 | int rc = 0; | 977 | FASTOP2(adc); |
858 | 978 | FASTOP2(sbb); | |
859 | switch ((condition & 15) >> 1) { | 979 | FASTOP2(and); |
860 | case 0: /* o */ | 980 | FASTOP2(sub); |
861 | rc |= (flags & EFLG_OF); | 981 | FASTOP2(xor); |
862 | break; | 982 | FASTOP2(cmp); |
863 | case 1: /* b/c/nae */ | 983 | FASTOP2(test); |
864 | rc |= (flags & EFLG_CF); | 984 | |
865 | break; | 985 | FASTOP3WCL(shld); |
866 | case 2: /* z/e */ | 986 | FASTOP3WCL(shrd); |
867 | rc |= (flags & EFLG_ZF); | 987 | |
868 | break; | 988 | FASTOP2W(imul); |
869 | case 3: /* be/na */ | 989 | |
870 | rc |= (flags & (EFLG_CF|EFLG_ZF)); | 990 | FASTOP1(not); |
871 | break; | 991 | FASTOP1(neg); |
872 | case 4: /* s */ | 992 | FASTOP1(inc); |
873 | rc |= (flags & EFLG_SF); | 993 | FASTOP1(dec); |
874 | break; | 994 | |
875 | case 5: /* p/pe */ | 995 | FASTOP2CL(rol); |
876 | rc |= (flags & EFLG_PF); | 996 | FASTOP2CL(ror); |
877 | break; | 997 | FASTOP2CL(rcl); |
878 | case 7: /* le/ng */ | 998 | FASTOP2CL(rcr); |
879 | rc |= (flags & EFLG_ZF); | 999 | FASTOP2CL(shl); |
880 | /* fall through */ | 1000 | FASTOP2CL(shr); |
881 | case 6: /* l/nge */ | 1001 | FASTOP2CL(sar); |
882 | rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF)); | 1002 | |
883 | break; | 1003 | FASTOP2W(bsf); |
884 | } | 1004 | FASTOP2W(bsr); |
885 | 1005 | FASTOP2W(bt); | |
886 | /* Odd condition identifiers (lsb == 1) have inverted sense. */ | 1006 | FASTOP2W(bts); |
887 | return (!!rc ^ (condition & 1)); | 1007 | FASTOP2W(btr); |
1008 | FASTOP2W(btc); | ||
1009 | |||
1010 | static u8 test_cc(unsigned int condition, unsigned long flags) | ||
1011 | { | ||
1012 | u8 rc; | ||
1013 | void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf); | ||
1014 | |||
1015 | flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; | ||
1016 | asm("push %[flags]; popf; call *%[fastop]" | ||
1017 | : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags)); | ||
1018 | return rc; | ||
888 | } | 1019 | } |
889 | 1020 | ||
890 | static void fetch_register_operand(struct operand *op) | 1021 | static void fetch_register_operand(struct operand *op) |
@@ -994,6 +1125,53 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) | |||
994 | ctxt->ops->put_fpu(ctxt); | 1125 | ctxt->ops->put_fpu(ctxt); |
995 | } | 1126 | } |
996 | 1127 | ||
1128 | static int em_fninit(struct x86_emulate_ctxt *ctxt) | ||
1129 | { | ||
1130 | if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) | ||
1131 | return emulate_nm(ctxt); | ||
1132 | |||
1133 | ctxt->ops->get_fpu(ctxt); | ||
1134 | asm volatile("fninit"); | ||
1135 | ctxt->ops->put_fpu(ctxt); | ||
1136 | return X86EMUL_CONTINUE; | ||
1137 | } | ||
1138 | |||
1139 | static int em_fnstcw(struct x86_emulate_ctxt *ctxt) | ||
1140 | { | ||
1141 | u16 fcw; | ||
1142 | |||
1143 | if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) | ||
1144 | return emulate_nm(ctxt); | ||
1145 | |||
1146 | ctxt->ops->get_fpu(ctxt); | ||
1147 | asm volatile("fnstcw %0": "+m"(fcw)); | ||
1148 | ctxt->ops->put_fpu(ctxt); | ||
1149 | |||
1150 | /* force 2 byte destination */ | ||
1151 | ctxt->dst.bytes = 2; | ||
1152 | ctxt->dst.val = fcw; | ||
1153 | |||
1154 | return X86EMUL_CONTINUE; | ||
1155 | } | ||
1156 | |||
1157 | static int em_fnstsw(struct x86_emulate_ctxt *ctxt) | ||
1158 | { | ||
1159 | u16 fsw; | ||
1160 | |||
1161 | if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) | ||
1162 | return emulate_nm(ctxt); | ||
1163 | |||
1164 | ctxt->ops->get_fpu(ctxt); | ||
1165 | asm volatile("fnstsw %0": "+m"(fsw)); | ||
1166 | ctxt->ops->put_fpu(ctxt); | ||
1167 | |||
1168 | /* force 2 byte destination */ | ||
1169 | ctxt->dst.bytes = 2; | ||
1170 | ctxt->dst.val = fsw; | ||
1171 | |||
1172 | return X86EMUL_CONTINUE; | ||
1173 | } | ||
1174 | |||
997 | static void decode_register_operand(struct x86_emulate_ctxt *ctxt, | 1175 | static void decode_register_operand(struct x86_emulate_ctxt *ctxt, |
998 | struct operand *op) | 1176 | struct operand *op) |
999 | { | 1177 | { |
@@ -1534,6 +1712,9 @@ static int writeback(struct x86_emulate_ctxt *ctxt) | |||
1534 | { | 1712 | { |
1535 | int rc; | 1713 | int rc; |
1536 | 1714 | ||
1715 | if (ctxt->d & NoWrite) | ||
1716 | return X86EMUL_CONTINUE; | ||
1717 | |||
1537 | switch (ctxt->dst.type) { | 1718 | switch (ctxt->dst.type) { |
1538 | case OP_REG: | 1719 | case OP_REG: |
1539 | write_register_operand(&ctxt->dst); | 1720 | write_register_operand(&ctxt->dst); |
@@ -1918,47 +2099,6 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) | |||
1918 | return X86EMUL_CONTINUE; | 2099 | return X86EMUL_CONTINUE; |
1919 | } | 2100 | } |
1920 | 2101 | ||
1921 | static int em_grp2(struct x86_emulate_ctxt *ctxt) | ||
1922 | { | ||
1923 | switch (ctxt->modrm_reg) { | ||
1924 | case 0: /* rol */ | ||
1925 | emulate_2op_SrcB(ctxt, "rol"); | ||
1926 | break; | ||
1927 | case 1: /* ror */ | ||
1928 | emulate_2op_SrcB(ctxt, "ror"); | ||
1929 | break; | ||
1930 | case 2: /* rcl */ | ||
1931 | emulate_2op_SrcB(ctxt, "rcl"); | ||
1932 | break; | ||
1933 | case 3: /* rcr */ | ||
1934 | emulate_2op_SrcB(ctxt, "rcr"); | ||
1935 | break; | ||
1936 | case 4: /* sal/shl */ | ||
1937 | case 6: /* sal/shl */ | ||
1938 | emulate_2op_SrcB(ctxt, "sal"); | ||
1939 | break; | ||
1940 | case 5: /* shr */ | ||
1941 | emulate_2op_SrcB(ctxt, "shr"); | ||
1942 | break; | ||
1943 | case 7: /* sar */ | ||
1944 | emulate_2op_SrcB(ctxt, "sar"); | ||
1945 | break; | ||
1946 | } | ||
1947 | return X86EMUL_CONTINUE; | ||
1948 | } | ||
1949 | |||
1950 | static int em_not(struct x86_emulate_ctxt *ctxt) | ||
1951 | { | ||
1952 | ctxt->dst.val = ~ctxt->dst.val; | ||
1953 | return X86EMUL_CONTINUE; | ||
1954 | } | ||
1955 | |||
1956 | static int em_neg(struct x86_emulate_ctxt *ctxt) | ||
1957 | { | ||
1958 | emulate_1op(ctxt, "neg"); | ||
1959 | return X86EMUL_CONTINUE; | ||
1960 | } | ||
1961 | |||
1962 | static int em_mul_ex(struct x86_emulate_ctxt *ctxt) | 2102 | static int em_mul_ex(struct x86_emulate_ctxt *ctxt) |
1963 | { | 2103 | { |
1964 | u8 ex = 0; | 2104 | u8 ex = 0; |
@@ -2000,12 +2140,6 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt) | |||
2000 | int rc = X86EMUL_CONTINUE; | 2140 | int rc = X86EMUL_CONTINUE; |
2001 | 2141 | ||
2002 | switch (ctxt->modrm_reg) { | 2142 | switch (ctxt->modrm_reg) { |
2003 | case 0: /* inc */ | ||
2004 | emulate_1op(ctxt, "inc"); | ||
2005 | break; | ||
2006 | case 1: /* dec */ | ||
2007 | emulate_1op(ctxt, "dec"); | ||
2008 | break; | ||
2009 | case 2: /* call near abs */ { | 2143 | case 2: /* call near abs */ { |
2010 | long int old_eip; | 2144 | long int old_eip; |
2011 | old_eip = ctxt->_eip; | 2145 | old_eip = ctxt->_eip; |
@@ -2075,7 +2209,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) | |||
2075 | /* Save real source value, then compare EAX against destination. */ | 2209 | /* Save real source value, then compare EAX against destination. */ |
2076 | ctxt->src.orig_val = ctxt->src.val; | 2210 | ctxt->src.orig_val = ctxt->src.val; |
2077 | ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX); | 2211 | ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX); |
2078 | emulate_2op_SrcV(ctxt, "cmp"); | 2212 | fastop(ctxt, em_cmp); |
2079 | 2213 | ||
2080 | if (ctxt->eflags & EFLG_ZF) { | 2214 | if (ctxt->eflags & EFLG_ZF) { |
2081 | /* Success: write back to memory. */ | 2215 | /* Success: write back to memory. */ |
@@ -2843,7 +2977,7 @@ static int em_das(struct x86_emulate_ctxt *ctxt) | |||
2843 | ctxt->src.type = OP_IMM; | 2977 | ctxt->src.type = OP_IMM; |
2844 | ctxt->src.val = 0; | 2978 | ctxt->src.val = 0; |
2845 | ctxt->src.bytes = 1; | 2979 | ctxt->src.bytes = 1; |
2846 | emulate_2op_SrcV(ctxt, "or"); | 2980 | fastop(ctxt, em_or); |
2847 | ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF); | 2981 | ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF); |
2848 | if (cf) | 2982 | if (cf) |
2849 | ctxt->eflags |= X86_EFLAGS_CF; | 2983 | ctxt->eflags |= X86_EFLAGS_CF; |
@@ -2852,6 +2986,24 @@ static int em_das(struct x86_emulate_ctxt *ctxt) | |||
2852 | return X86EMUL_CONTINUE; | 2986 | return X86EMUL_CONTINUE; |
2853 | } | 2987 | } |
2854 | 2988 | ||
2989 | static int em_aad(struct x86_emulate_ctxt *ctxt) | ||
2990 | { | ||
2991 | u8 al = ctxt->dst.val & 0xff; | ||
2992 | u8 ah = (ctxt->dst.val >> 8) & 0xff; | ||
2993 | |||
2994 | al = (al + (ah * ctxt->src.val)) & 0xff; | ||
2995 | |||
2996 | ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al; | ||
2997 | |||
2998 | /* Set PF, ZF, SF */ | ||
2999 | ctxt->src.type = OP_IMM; | ||
3000 | ctxt->src.val = 0; | ||
3001 | ctxt->src.bytes = 1; | ||
3002 | fastop(ctxt, em_or); | ||
3003 | |||
3004 | return X86EMUL_CONTINUE; | ||
3005 | } | ||
3006 | |||
2855 | static int em_call(struct x86_emulate_ctxt *ctxt) | 3007 | static int em_call(struct x86_emulate_ctxt *ctxt) |
2856 | { | 3008 | { |
2857 | long rel = ctxt->src.val; | 3009 | long rel = ctxt->src.val; |
@@ -2900,64 +3052,6 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) | |||
2900 | return X86EMUL_CONTINUE; | 3052 | return X86EMUL_CONTINUE; |
2901 | } | 3053 | } |
2902 | 3054 | ||
2903 | static int em_add(struct x86_emulate_ctxt *ctxt) | ||
2904 | { | ||
2905 | emulate_2op_SrcV(ctxt, "add"); | ||
2906 | return X86EMUL_CONTINUE; | ||
2907 | } | ||
2908 | |||
2909 | static int em_or(struct x86_emulate_ctxt *ctxt) | ||
2910 | { | ||
2911 | emulate_2op_SrcV(ctxt, "or"); | ||
2912 | return X86EMUL_CONTINUE; | ||
2913 | } | ||
2914 | |||
2915 | static int em_adc(struct x86_emulate_ctxt *ctxt) | ||
2916 | { | ||
2917 | emulate_2op_SrcV(ctxt, "adc"); | ||
2918 | return X86EMUL_CONTINUE; | ||
2919 | } | ||
2920 | |||
2921 | static int em_sbb(struct x86_emulate_ctxt *ctxt) | ||
2922 | { | ||
2923 | emulate_2op_SrcV(ctxt, "sbb"); | ||
2924 | return X86EMUL_CONTINUE; | ||
2925 | } | ||
2926 | |||
2927 | static int em_and(struct x86_emulate_ctxt *ctxt) | ||
2928 | { | ||
2929 | emulate_2op_SrcV(ctxt, "and"); | ||
2930 | return X86EMUL_CONTINUE; | ||
2931 | } | ||
2932 | |||
2933 | static int em_sub(struct x86_emulate_ctxt *ctxt) | ||
2934 | { | ||
2935 | emulate_2op_SrcV(ctxt, "sub"); | ||
2936 | return X86EMUL_CONTINUE; | ||
2937 | } | ||
2938 | |||
2939 | static int em_xor(struct x86_emulate_ctxt *ctxt) | ||
2940 | { | ||
2941 | emulate_2op_SrcV(ctxt, "xor"); | ||
2942 | return X86EMUL_CONTINUE; | ||
2943 | } | ||
2944 | |||
2945 | static int em_cmp(struct x86_emulate_ctxt *ctxt) | ||
2946 | { | ||
2947 | emulate_2op_SrcV(ctxt, "cmp"); | ||
2948 | /* Disable writeback. */ | ||
2949 | ctxt->dst.type = OP_NONE; | ||
2950 | return X86EMUL_CONTINUE; | ||
2951 | } | ||
2952 | |||
2953 | static int em_test(struct x86_emulate_ctxt *ctxt) | ||
2954 | { | ||
2955 | emulate_2op_SrcV(ctxt, "test"); | ||
2956 | /* Disable writeback. */ | ||
2957 | ctxt->dst.type = OP_NONE; | ||
2958 | return X86EMUL_CONTINUE; | ||
2959 | } | ||
2960 | |||
2961 | static int em_xchg(struct x86_emulate_ctxt *ctxt) | 3055 | static int em_xchg(struct x86_emulate_ctxt *ctxt) |
2962 | { | 3056 | { |
2963 | /* Write back the register source. */ | 3057 | /* Write back the register source. */ |
@@ -2970,16 +3064,10 @@ static int em_xchg(struct x86_emulate_ctxt *ctxt) | |||
2970 | return X86EMUL_CONTINUE; | 3064 | return X86EMUL_CONTINUE; |
2971 | } | 3065 | } |
2972 | 3066 | ||
2973 | static int em_imul(struct x86_emulate_ctxt *ctxt) | ||
2974 | { | ||
2975 | emulate_2op_SrcV_nobyte(ctxt, "imul"); | ||
2976 | return X86EMUL_CONTINUE; | ||
2977 | } | ||
2978 | |||
2979 | static int em_imul_3op(struct x86_emulate_ctxt *ctxt) | 3067 | static int em_imul_3op(struct x86_emulate_ctxt *ctxt) |
2980 | { | 3068 | { |
2981 | ctxt->dst.val = ctxt->src2.val; | 3069 | ctxt->dst.val = ctxt->src2.val; |
2982 | return em_imul(ctxt); | 3070 | return fastop(ctxt, em_imul); |
2983 | } | 3071 | } |
2984 | 3072 | ||
2985 | static int em_cwd(struct x86_emulate_ctxt *ctxt) | 3073 | static int em_cwd(struct x86_emulate_ctxt *ctxt) |
@@ -3300,47 +3388,6 @@ static int em_sti(struct x86_emulate_ctxt *ctxt) | |||
3300 | return X86EMUL_CONTINUE; | 3388 | return X86EMUL_CONTINUE; |
3301 | } | 3389 | } |
3302 | 3390 | ||
3303 | static int em_bt(struct x86_emulate_ctxt *ctxt) | ||
3304 | { | ||
3305 | /* Disable writeback. */ | ||
3306 | ctxt->dst.type = OP_NONE; | ||
3307 | /* only subword offset */ | ||
3308 | ctxt->src.val &= (ctxt->dst.bytes << 3) - 1; | ||
3309 | |||
3310 | emulate_2op_SrcV_nobyte(ctxt, "bt"); | ||
3311 | return X86EMUL_CONTINUE; | ||
3312 | } | ||
3313 | |||
3314 | static int em_bts(struct x86_emulate_ctxt *ctxt) | ||
3315 | { | ||
3316 | emulate_2op_SrcV_nobyte(ctxt, "bts"); | ||
3317 | return X86EMUL_CONTINUE; | ||
3318 | } | ||
3319 | |||
3320 | static int em_btr(struct x86_emulate_ctxt *ctxt) | ||
3321 | { | ||
3322 | emulate_2op_SrcV_nobyte(ctxt, "btr"); | ||
3323 | return X86EMUL_CONTINUE; | ||
3324 | } | ||
3325 | |||
3326 | static int em_btc(struct x86_emulate_ctxt *ctxt) | ||
3327 | { | ||
3328 | emulate_2op_SrcV_nobyte(ctxt, "btc"); | ||
3329 | return X86EMUL_CONTINUE; | ||
3330 | } | ||
3331 | |||
3332 | static int em_bsf(struct x86_emulate_ctxt *ctxt) | ||
3333 | { | ||
3334 | emulate_2op_SrcV_nobyte(ctxt, "bsf"); | ||
3335 | return X86EMUL_CONTINUE; | ||
3336 | } | ||
3337 | |||
3338 | static int em_bsr(struct x86_emulate_ctxt *ctxt) | ||
3339 | { | ||
3340 | emulate_2op_SrcV_nobyte(ctxt, "bsr"); | ||
3341 | return X86EMUL_CONTINUE; | ||
3342 | } | ||
3343 | |||
3344 | static int em_cpuid(struct x86_emulate_ctxt *ctxt) | 3391 | static int em_cpuid(struct x86_emulate_ctxt *ctxt) |
3345 | { | 3392 | { |
3346 | u32 eax, ebx, ecx, edx; | 3393 | u32 eax, ebx, ecx, edx; |
@@ -3572,7 +3619,9 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) | |||
3572 | #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } | 3619 | #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } |
3573 | #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } | 3620 | #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } |
3574 | #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } | 3621 | #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } |
3622 | #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) } | ||
3575 | #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } | 3623 | #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } |
3624 | #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } | ||
3576 | #define II(_f, _e, _i) \ | 3625 | #define II(_f, _e, _i) \ |
3577 | { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } | 3626 | { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } |
3578 | #define IIP(_f, _e, _i, _p) \ | 3627 | #define IIP(_f, _e, _i, _p) \ |
@@ -3583,12 +3632,13 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) | |||
3583 | #define D2bv(_f) D((_f) | ByteOp), D(_f) | 3632 | #define D2bv(_f) D((_f) | ByteOp), D(_f) |
3584 | #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) | 3633 | #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) |
3585 | #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) | 3634 | #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) |
3635 | #define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e) | ||
3586 | #define I2bvIP(_f, _e, _i, _p) \ | 3636 | #define I2bvIP(_f, _e, _i, _p) \ |
3587 | IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p) | 3637 | IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p) |
3588 | 3638 | ||
3589 | #define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ | 3639 | #define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \ |
3590 | I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ | 3640 | F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ |
3591 | I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) | 3641 | F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) |
3592 | 3642 | ||
3593 | static const struct opcode group7_rm1[] = { | 3643 | static const struct opcode group7_rm1[] = { |
3594 | DI(SrcNone | Priv, monitor), | 3644 | DI(SrcNone | Priv, monitor), |
@@ -3614,25 +3664,36 @@ static const struct opcode group7_rm7[] = { | |||
3614 | }; | 3664 | }; |
3615 | 3665 | ||
3616 | static const struct opcode group1[] = { | 3666 | static const struct opcode group1[] = { |
3617 | I(Lock, em_add), | 3667 | F(Lock, em_add), |
3618 | I(Lock | PageTable, em_or), | 3668 | F(Lock | PageTable, em_or), |
3619 | I(Lock, em_adc), | 3669 | F(Lock, em_adc), |
3620 | I(Lock, em_sbb), | 3670 | F(Lock, em_sbb), |
3621 | I(Lock | PageTable, em_and), | 3671 | F(Lock | PageTable, em_and), |
3622 | I(Lock, em_sub), | 3672 | F(Lock, em_sub), |
3623 | I(Lock, em_xor), | 3673 | F(Lock, em_xor), |
3624 | I(0, em_cmp), | 3674 | F(NoWrite, em_cmp), |
3625 | }; | 3675 | }; |
3626 | 3676 | ||
3627 | static const struct opcode group1A[] = { | 3677 | static const struct opcode group1A[] = { |
3628 | I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N, | 3678 | I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N, |
3629 | }; | 3679 | }; |
3630 | 3680 | ||
3681 | static const struct opcode group2[] = { | ||
3682 | F(DstMem | ModRM, em_rol), | ||
3683 | F(DstMem | ModRM, em_ror), | ||
3684 | F(DstMem | ModRM, em_rcl), | ||
3685 | F(DstMem | ModRM, em_rcr), | ||
3686 | F(DstMem | ModRM, em_shl), | ||
3687 | F(DstMem | ModRM, em_shr), | ||
3688 | F(DstMem | ModRM, em_shl), | ||
3689 | F(DstMem | ModRM, em_sar), | ||
3690 | }; | ||
3691 | |||
3631 | static const struct opcode group3[] = { | 3692 | static const struct opcode group3[] = { |
3632 | I(DstMem | SrcImm, em_test), | 3693 | F(DstMem | SrcImm | NoWrite, em_test), |
3633 | I(DstMem | SrcImm, em_test), | 3694 | F(DstMem | SrcImm | NoWrite, em_test), |
3634 | I(DstMem | SrcNone | Lock, em_not), | 3695 | F(DstMem | SrcNone | Lock, em_not), |
3635 | I(DstMem | SrcNone | Lock, em_neg), | 3696 | F(DstMem | SrcNone | Lock, em_neg), |
3636 | I(SrcMem, em_mul_ex), | 3697 | I(SrcMem, em_mul_ex), |
3637 | I(SrcMem, em_imul_ex), | 3698 | I(SrcMem, em_imul_ex), |
3638 | I(SrcMem, em_div_ex), | 3699 | I(SrcMem, em_div_ex), |
@@ -3640,14 +3701,14 @@ static const struct opcode group3[] = { | |||
3640 | }; | 3701 | }; |
3641 | 3702 | ||
3642 | static const struct opcode group4[] = { | 3703 | static const struct opcode group4[] = { |
3643 | I(ByteOp | DstMem | SrcNone | Lock, em_grp45), | 3704 | F(ByteOp | DstMem | SrcNone | Lock, em_inc), |
3644 | I(ByteOp | DstMem | SrcNone | Lock, em_grp45), | 3705 | F(ByteOp | DstMem | SrcNone | Lock, em_dec), |
3645 | N, N, N, N, N, N, | 3706 | N, N, N, N, N, N, |
3646 | }; | 3707 | }; |
3647 | 3708 | ||
3648 | static const struct opcode group5[] = { | 3709 | static const struct opcode group5[] = { |
3649 | I(DstMem | SrcNone | Lock, em_grp45), | 3710 | F(DstMem | SrcNone | Lock, em_inc), |
3650 | I(DstMem | SrcNone | Lock, em_grp45), | 3711 | F(DstMem | SrcNone | Lock, em_dec), |
3651 | I(SrcMem | Stack, em_grp45), | 3712 | I(SrcMem | Stack, em_grp45), |
3652 | I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), | 3713 | I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), |
3653 | I(SrcMem | Stack, em_grp45), | 3714 | I(SrcMem | Stack, em_grp45), |
@@ -3682,10 +3743,10 @@ static const struct group_dual group7 = { { | |||
3682 | 3743 | ||
3683 | static const struct opcode group8[] = { | 3744 | static const struct opcode group8[] = { |
3684 | N, N, N, N, | 3745 | N, N, N, N, |
3685 | I(DstMem | SrcImmByte, em_bt), | 3746 | F(DstMem | SrcImmByte | NoWrite, em_bt), |
3686 | I(DstMem | SrcImmByte | Lock | PageTable, em_bts), | 3747 | F(DstMem | SrcImmByte | Lock | PageTable, em_bts), |
3687 | I(DstMem | SrcImmByte | Lock, em_btr), | 3748 | F(DstMem | SrcImmByte | Lock, em_btr), |
3688 | I(DstMem | SrcImmByte | Lock | PageTable, em_btc), | 3749 | F(DstMem | SrcImmByte | Lock | PageTable, em_btc), |
3689 | }; | 3750 | }; |
3690 | 3751 | ||
3691 | static const struct group_dual group9 = { { | 3752 | static const struct group_dual group9 = { { |
@@ -3707,33 +3768,96 @@ static const struct gprefix pfx_vmovntpx = { | |||
3707 | I(0, em_mov), N, N, N, | 3768 | I(0, em_mov), N, N, N, |
3708 | }; | 3769 | }; |
3709 | 3770 | ||
3771 | static const struct escape escape_d9 = { { | ||
3772 | N, N, N, N, N, N, N, I(DstMem, em_fnstcw), | ||
3773 | }, { | ||
3774 | /* 0xC0 - 0xC7 */ | ||
3775 | N, N, N, N, N, N, N, N, | ||
3776 | /* 0xC8 - 0xCF */ | ||
3777 | N, N, N, N, N, N, N, N, | ||
3778 | /* 0xD0 - 0xC7 */ | ||
3779 | N, N, N, N, N, N, N, N, | ||
3780 | /* 0xD8 - 0xDF */ | ||
3781 | N, N, N, N, N, N, N, N, | ||
3782 | /* 0xE0 - 0xE7 */ | ||
3783 | N, N, N, N, N, N, N, N, | ||
3784 | /* 0xE8 - 0xEF */ | ||
3785 | N, N, N, N, N, N, N, N, | ||
3786 | /* 0xF0 - 0xF7 */ | ||
3787 | N, N, N, N, N, N, N, N, | ||
3788 | /* 0xF8 - 0xFF */ | ||
3789 | N, N, N, N, N, N, N, N, | ||
3790 | } }; | ||
3791 | |||
3792 | static const struct escape escape_db = { { | ||
3793 | N, N, N, N, N, N, N, N, | ||
3794 | }, { | ||
3795 | /* 0xC0 - 0xC7 */ | ||
3796 | N, N, N, N, N, N, N, N, | ||
3797 | /* 0xC8 - 0xCF */ | ||
3798 | N, N, N, N, N, N, N, N, | ||
3799 | /* 0xD0 - 0xC7 */ | ||
3800 | N, N, N, N, N, N, N, N, | ||
3801 | /* 0xD8 - 0xDF */ | ||
3802 | N, N, N, N, N, N, N, N, | ||
3803 | /* 0xE0 - 0xE7 */ | ||
3804 | N, N, N, I(ImplicitOps, em_fninit), N, N, N, N, | ||
3805 | /* 0xE8 - 0xEF */ | ||
3806 | N, N, N, N, N, N, N, N, | ||
3807 | /* 0xF0 - 0xF7 */ | ||
3808 | N, N, N, N, N, N, N, N, | ||
3809 | /* 0xF8 - 0xFF */ | ||
3810 | N, N, N, N, N, N, N, N, | ||
3811 | } }; | ||
3812 | |||
3813 | static const struct escape escape_dd = { { | ||
3814 | N, N, N, N, N, N, N, I(DstMem, em_fnstsw), | ||
3815 | }, { | ||
3816 | /* 0xC0 - 0xC7 */ | ||
3817 | N, N, N, N, N, N, N, N, | ||
3818 | /* 0xC8 - 0xCF */ | ||
3819 | N, N, N, N, N, N, N, N, | ||
3820 | /* 0xD0 - 0xC7 */ | ||
3821 | N, N, N, N, N, N, N, N, | ||
3822 | /* 0xD8 - 0xDF */ | ||
3823 | N, N, N, N, N, N, N, N, | ||
3824 | /* 0xE0 - 0xE7 */ | ||
3825 | N, N, N, N, N, N, N, N, | ||
3826 | /* 0xE8 - 0xEF */ | ||
3827 | N, N, N, N, N, N, N, N, | ||
3828 | /* 0xF0 - 0xF7 */ | ||
3829 | N, N, N, N, N, N, N, N, | ||
3830 | /* 0xF8 - 0xFF */ | ||
3831 | N, N, N, N, N, N, N, N, | ||
3832 | } }; | ||
3833 | |||
3710 | static const struct opcode opcode_table[256] = { | 3834 | static const struct opcode opcode_table[256] = { |
3711 | /* 0x00 - 0x07 */ | 3835 | /* 0x00 - 0x07 */ |
3712 | I6ALU(Lock, em_add), | 3836 | F6ALU(Lock, em_add), |
3713 | I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), | 3837 | I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), |
3714 | I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg), | 3838 | I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg), |
3715 | /* 0x08 - 0x0F */ | 3839 | /* 0x08 - 0x0F */ |
3716 | I6ALU(Lock | PageTable, em_or), | 3840 | F6ALU(Lock | PageTable, em_or), |
3717 | I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg), | 3841 | I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg), |
3718 | N, | 3842 | N, |
3719 | /* 0x10 - 0x17 */ | 3843 | /* 0x10 - 0x17 */ |
3720 | I6ALU(Lock, em_adc), | 3844 | F6ALU(Lock, em_adc), |
3721 | I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg), | 3845 | I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg), |
3722 | I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg), | 3846 | I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg), |
3723 | /* 0x18 - 0x1F */ | 3847 | /* 0x18 - 0x1F */ |
3724 | I6ALU(Lock, em_sbb), | 3848 | F6ALU(Lock, em_sbb), |
3725 | I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg), | 3849 | I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg), |
3726 | I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg), | 3850 | I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg), |
3727 | /* 0x20 - 0x27 */ | 3851 | /* 0x20 - 0x27 */ |
3728 | I6ALU(Lock | PageTable, em_and), N, N, | 3852 | F6ALU(Lock | PageTable, em_and), N, N, |
3729 | /* 0x28 - 0x2F */ | 3853 | /* 0x28 - 0x2F */ |
3730 | I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), | 3854 | F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), |
3731 | /* 0x30 - 0x37 */ | 3855 | /* 0x30 - 0x37 */ |
3732 | I6ALU(Lock, em_xor), N, N, | 3856 | F6ALU(Lock, em_xor), N, N, |
3733 | /* 0x38 - 0x3F */ | 3857 | /* 0x38 - 0x3F */ |
3734 | I6ALU(0, em_cmp), N, N, | 3858 | F6ALU(NoWrite, em_cmp), N, N, |
3735 | /* 0x40 - 0x4F */ | 3859 | /* 0x40 - 0x4F */ |
3736 | X16(D(DstReg)), | 3860 | X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)), |
3737 | /* 0x50 - 0x57 */ | 3861 | /* 0x50 - 0x57 */ |
3738 | X8(I(SrcReg | Stack, em_push)), | 3862 | X8(I(SrcReg | Stack, em_push)), |
3739 | /* 0x58 - 0x5F */ | 3863 | /* 0x58 - 0x5F */ |
@@ -3757,7 +3881,7 @@ static const struct opcode opcode_table[256] = { | |||
3757 | G(DstMem | SrcImm, group1), | 3881 | G(DstMem | SrcImm, group1), |
3758 | G(ByteOp | DstMem | SrcImm | No64, group1), | 3882 | G(ByteOp | DstMem | SrcImm | No64, group1), |
3759 | G(DstMem | SrcImmByte, group1), | 3883 | G(DstMem | SrcImmByte, group1), |
3760 | I2bv(DstMem | SrcReg | ModRM, em_test), | 3884 | F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test), |
3761 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg), | 3885 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg), |
3762 | /* 0x88 - 0x8F */ | 3886 | /* 0x88 - 0x8F */ |
3763 | I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov), | 3887 | I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov), |
@@ -3777,18 +3901,18 @@ static const struct opcode opcode_table[256] = { | |||
3777 | I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), | 3901 | I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), |
3778 | I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), | 3902 | I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), |
3779 | I2bv(SrcSI | DstDI | Mov | String, em_mov), | 3903 | I2bv(SrcSI | DstDI | Mov | String, em_mov), |
3780 | I2bv(SrcSI | DstDI | String, em_cmp), | 3904 | F2bv(SrcSI | DstDI | String | NoWrite, em_cmp), |
3781 | /* 0xA8 - 0xAF */ | 3905 | /* 0xA8 - 0xAF */ |
3782 | I2bv(DstAcc | SrcImm, em_test), | 3906 | F2bv(DstAcc | SrcImm | NoWrite, em_test), |
3783 | I2bv(SrcAcc | DstDI | Mov | String, em_mov), | 3907 | I2bv(SrcAcc | DstDI | Mov | String, em_mov), |
3784 | I2bv(SrcSI | DstAcc | Mov | String, em_mov), | 3908 | I2bv(SrcSI | DstAcc | Mov | String, em_mov), |
3785 | I2bv(SrcAcc | DstDI | String, em_cmp), | 3909 | F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp), |
3786 | /* 0xB0 - 0xB7 */ | 3910 | /* 0xB0 - 0xB7 */ |
3787 | X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), | 3911 | X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), |
3788 | /* 0xB8 - 0xBF */ | 3912 | /* 0xB8 - 0xBF */ |
3789 | X8(I(DstReg | SrcImm | Mov, em_mov)), | 3913 | X8(I(DstReg | SrcImm64 | Mov, em_mov)), |
3790 | /* 0xC0 - 0xC7 */ | 3914 | /* 0xC0 - 0xC7 */ |
3791 | D2bv(DstMem | SrcImmByte | ModRM), | 3915 | G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2), |
3792 | I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm), | 3916 | I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm), |
3793 | I(ImplicitOps | Stack, em_ret), | 3917 | I(ImplicitOps | Stack, em_ret), |
3794 | I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg), | 3918 | I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg), |
@@ -3800,10 +3924,11 @@ static const struct opcode opcode_table[256] = { | |||
3800 | D(ImplicitOps), DI(SrcImmByte, intn), | 3924 | D(ImplicitOps), DI(SrcImmByte, intn), |
3801 | D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), | 3925 | D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), |
3802 | /* 0xD0 - 0xD7 */ | 3926 | /* 0xD0 - 0xD7 */ |
3803 | D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), | 3927 | G(Src2One | ByteOp, group2), G(Src2One, group2), |
3804 | N, N, N, N, | 3928 | G(Src2CL | ByteOp, group2), G(Src2CL, group2), |
3929 | N, I(DstAcc | SrcImmByte | No64, em_aad), N, N, | ||
3805 | /* 0xD8 - 0xDF */ | 3930 | /* 0xD8 - 0xDF */ |
3806 | N, N, N, N, N, N, N, N, | 3931 | N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N, |
3807 | /* 0xE0 - 0xE7 */ | 3932 | /* 0xE0 - 0xE7 */ |
3808 | X3(I(SrcImmByte, em_loop)), | 3933 | X3(I(SrcImmByte, em_loop)), |
3809 | I(SrcImmByte, em_jcxz), | 3934 | I(SrcImmByte, em_jcxz), |
@@ -3870,28 +3995,29 @@ static const struct opcode twobyte_table[256] = { | |||
3870 | X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), | 3995 | X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), |
3871 | /* 0xA0 - 0xA7 */ | 3996 | /* 0xA0 - 0xA7 */ |
3872 | I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), | 3997 | I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), |
3873 | II(ImplicitOps, em_cpuid, cpuid), I(DstMem | SrcReg | ModRM | BitOp, em_bt), | 3998 | II(ImplicitOps, em_cpuid, cpuid), |
3874 | D(DstMem | SrcReg | Src2ImmByte | ModRM), | 3999 | F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt), |
3875 | D(DstMem | SrcReg | Src2CL | ModRM), N, N, | 4000 | F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld), |
4001 | F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, | ||
3876 | /* 0xA8 - 0xAF */ | 4002 | /* 0xA8 - 0xAF */ |
3877 | I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), | 4003 | I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), |
3878 | DI(ImplicitOps, rsm), | 4004 | DI(ImplicitOps, rsm), |
3879 | I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), | 4005 | F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), |
3880 | D(DstMem | SrcReg | Src2ImmByte | ModRM), | 4006 | F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), |
3881 | D(DstMem | SrcReg | Src2CL | ModRM), | 4007 | F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), |
3882 | D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), | 4008 | D(ModRM), F(DstReg | SrcMem | ModRM, em_imul), |
3883 | /* 0xB0 - 0xB7 */ | 4009 | /* 0xB0 - 0xB7 */ |
3884 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), | 4010 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), |
3885 | I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), | 4011 | I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), |
3886 | I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), | 4012 | F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), |
3887 | I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), | 4013 | I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), |
3888 | I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), | 4014 | I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), |
3889 | D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), | 4015 | D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), |
3890 | /* 0xB8 - 0xBF */ | 4016 | /* 0xB8 - 0xBF */ |
3891 | N, N, | 4017 | N, N, |
3892 | G(BitOp, group8), | 4018 | G(BitOp, group8), |
3893 | I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), | 4019 | F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), |
3894 | I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr), | 4020 | F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr), |
3895 | D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), | 4021 | D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), |
3896 | /* 0xC0 - 0xC7 */ | 4022 | /* 0xC0 - 0xC7 */ |
3897 | D2bv(DstMem | SrcReg | ModRM | Lock), | 4023 | D2bv(DstMem | SrcReg | ModRM | Lock), |
@@ -3950,6 +4076,9 @@ static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
3950 | case 4: | 4076 | case 4: |
3951 | op->val = insn_fetch(s32, ctxt); | 4077 | op->val = insn_fetch(s32, ctxt); |
3952 | break; | 4078 | break; |
4079 | case 8: | ||
4080 | op->val = insn_fetch(s64, ctxt); | ||
4081 | break; | ||
3953 | } | 4082 | } |
3954 | if (!sign_extension) { | 4083 | if (!sign_extension) { |
3955 | switch (op->bytes) { | 4084 | switch (op->bytes) { |
@@ -4028,6 +4157,9 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
4028 | case OpImm: | 4157 | case OpImm: |
4029 | rc = decode_imm(ctxt, op, imm_size(ctxt), true); | 4158 | rc = decode_imm(ctxt, op, imm_size(ctxt), true); |
4030 | break; | 4159 | break; |
4160 | case OpImm64: | ||
4161 | rc = decode_imm(ctxt, op, ctxt->op_bytes, true); | ||
4162 | break; | ||
4031 | case OpMem8: | 4163 | case OpMem8: |
4032 | ctxt->memop.bytes = 1; | 4164 | ctxt->memop.bytes = 1; |
4033 | goto mem_common; | 4165 | goto mem_common; |
@@ -4222,6 +4354,12 @@ done_prefixes: | |||
4222 | case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break; | 4354 | case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break; |
4223 | } | 4355 | } |
4224 | break; | 4356 | break; |
4357 | case Escape: | ||
4358 | if (ctxt->modrm > 0xbf) | ||
4359 | opcode = opcode.u.esc->high[ctxt->modrm - 0xc0]; | ||
4360 | else | ||
4361 | opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7]; | ||
4362 | break; | ||
4225 | default: | 4363 | default: |
4226 | return EMULATION_FAILED; | 4364 | return EMULATION_FAILED; |
4227 | } | 4365 | } |
@@ -4354,6 +4492,16 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, | |||
4354 | read_mmx_reg(ctxt, &op->mm_val, op->addr.mm); | 4492 | read_mmx_reg(ctxt, &op->mm_val, op->addr.mm); |
4355 | } | 4493 | } |
4356 | 4494 | ||
4495 | static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) | ||
4496 | { | ||
4497 | ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; | ||
4498 | fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; | ||
4499 | asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" | ||
4500 | : "+a"(ctxt->dst.val), "+b"(ctxt->src.val), [flags]"+D"(flags) | ||
4501 | : "c"(ctxt->src2.val), [fastop]"S"(fop)); | ||
4502 | ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); | ||
4503 | return X86EMUL_CONTINUE; | ||
4504 | } | ||
4357 | 4505 | ||
4358 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | 4506 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) |
4359 | { | 4507 | { |
@@ -4483,6 +4631,13 @@ special_insn: | |||
4483 | } | 4631 | } |
4484 | 4632 | ||
4485 | if (ctxt->execute) { | 4633 | if (ctxt->execute) { |
4634 | if (ctxt->d & Fastop) { | ||
4635 | void (*fop)(struct fastop *) = (void *)ctxt->execute; | ||
4636 | rc = fastop(ctxt, fop); | ||
4637 | if (rc != X86EMUL_CONTINUE) | ||
4638 | goto done; | ||
4639 | goto writeback; | ||
4640 | } | ||
4486 | rc = ctxt->execute(ctxt); | 4641 | rc = ctxt->execute(ctxt); |
4487 | if (rc != X86EMUL_CONTINUE) | 4642 | if (rc != X86EMUL_CONTINUE) |
4488 | goto done; | 4643 | goto done; |
@@ -4493,12 +4648,6 @@ special_insn: | |||
4493 | goto twobyte_insn; | 4648 | goto twobyte_insn; |
4494 | 4649 | ||
4495 | switch (ctxt->b) { | 4650 | switch (ctxt->b) { |
4496 | case 0x40 ... 0x47: /* inc r16/r32 */ | ||
4497 | emulate_1op(ctxt, "inc"); | ||
4498 | break; | ||
4499 | case 0x48 ... 0x4f: /* dec r16/r32 */ | ||
4500 | emulate_1op(ctxt, "dec"); | ||
4501 | break; | ||
4502 | case 0x63: /* movsxd */ | 4651 | case 0x63: /* movsxd */ |
4503 | if (ctxt->mode != X86EMUL_MODE_PROT64) | 4652 | if (ctxt->mode != X86EMUL_MODE_PROT64) |
4504 | goto cannot_emulate; | 4653 | goto cannot_emulate; |
@@ -4523,9 +4672,6 @@ special_insn: | |||
4523 | case 8: ctxt->dst.val = (s32)ctxt->dst.val; break; | 4672 | case 8: ctxt->dst.val = (s32)ctxt->dst.val; break; |
4524 | } | 4673 | } |
4525 | break; | 4674 | break; |
4526 | case 0xc0 ... 0xc1: | ||
4527 | rc = em_grp2(ctxt); | ||
4528 | break; | ||
4529 | case 0xcc: /* int3 */ | 4675 | case 0xcc: /* int3 */ |
4530 | rc = emulate_int(ctxt, 3); | 4676 | rc = emulate_int(ctxt, 3); |
4531 | break; | 4677 | break; |
@@ -4536,13 +4682,6 @@ special_insn: | |||
4536 | if (ctxt->eflags & EFLG_OF) | 4682 | if (ctxt->eflags & EFLG_OF) |
4537 | rc = emulate_int(ctxt, 4); | 4683 | rc = emulate_int(ctxt, 4); |
4538 | break; | 4684 | break; |
4539 | case 0xd0 ... 0xd1: /* Grp2 */ | ||
4540 | rc = em_grp2(ctxt); | ||
4541 | break; | ||
4542 | case 0xd2 ... 0xd3: /* Grp2 */ | ||
4543 | ctxt->src.val = reg_read(ctxt, VCPU_REGS_RCX); | ||
4544 | rc = em_grp2(ctxt); | ||
4545 | break; | ||
4546 | case 0xe9: /* jmp rel */ | 4685 | case 0xe9: /* jmp rel */ |
4547 | case 0xeb: /* jmp rel short */ | 4686 | case 0xeb: /* jmp rel short */ |
4548 | jmp_rel(ctxt, ctxt->src.val); | 4687 | jmp_rel(ctxt, ctxt->src.val); |
@@ -4661,14 +4800,6 @@ twobyte_insn: | |||
4661 | case 0x90 ... 0x9f: /* setcc r/m8 */ | 4800 | case 0x90 ... 0x9f: /* setcc r/m8 */ |
4662 | ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); | 4801 | ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); |
4663 | break; | 4802 | break; |
4664 | case 0xa4: /* shld imm8, r, r/m */ | ||
4665 | case 0xa5: /* shld cl, r, r/m */ | ||
4666 | emulate_2op_cl(ctxt, "shld"); | ||
4667 | break; | ||
4668 | case 0xac: /* shrd imm8, r, r/m */ | ||
4669 | case 0xad: /* shrd cl, r, r/m */ | ||
4670 | emulate_2op_cl(ctxt, "shrd"); | ||
4671 | break; | ||
4672 | case 0xae: /* clflush */ | 4803 | case 0xae: /* clflush */ |
4673 | break; | 4804 | break; |
4674 | case 0xb6 ... 0xb7: /* movzx */ | 4805 | case 0xb6 ... 0xb7: /* movzx */ |
@@ -4682,7 +4813,7 @@ twobyte_insn: | |||
4682 | (s16) ctxt->src.val; | 4813 | (s16) ctxt->src.val; |
4683 | break; | 4814 | break; |
4684 | case 0xc0 ... 0xc1: /* xadd */ | 4815 | case 0xc0 ... 0xc1: /* xadd */ |
4685 | emulate_2op_SrcV(ctxt, "add"); | 4816 | fastop(ctxt, em_add); |
4686 | /* Write back the register source. */ | 4817 | /* Write back the register source. */ |
4687 | ctxt->src.val = ctxt->dst.orig_val; | 4818 | ctxt->src.val = ctxt->dst.orig_val; |
4688 | write_register_operand(&ctxt->src); | 4819 | write_register_operand(&ctxt->src); |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 11300d2fa714..c1d30b2fc9bb 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -122,7 +122,6 @@ static s64 __kpit_elapsed(struct kvm *kvm) | |||
122 | */ | 122 | */ |
123 | remaining = hrtimer_get_remaining(&ps->timer); | 123 | remaining = hrtimer_get_remaining(&ps->timer); |
124 | elapsed = ps->period - ktime_to_ns(remaining); | 124 | elapsed = ps->period - ktime_to_ns(remaining); |
125 | elapsed = mod_64(elapsed, ps->period); | ||
126 | 125 | ||
127 | return elapsed; | 126 | return elapsed; |
128 | } | 127 | } |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 848206df0967..cc31f7c06d3d 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -241,6 +241,8 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
241 | int irq, irq2, intno; | 241 | int irq, irq2, intno; |
242 | struct kvm_pic *s = pic_irqchip(kvm); | 242 | struct kvm_pic *s = pic_irqchip(kvm); |
243 | 243 | ||
244 | s->output = 0; | ||
245 | |||
244 | pic_lock(s); | 246 | pic_lock(s); |
245 | irq = pic_get_irq(&s->pics[0]); | 247 | irq = pic_get_irq(&s->pics[0]); |
246 | if (irq >= 0) { | 248 | if (irq >= 0) { |
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 7e06ba1618bd..484bc874688b 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c | |||
@@ -38,49 +38,81 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | |||
38 | EXPORT_SYMBOL(kvm_cpu_has_pending_timer); | 38 | EXPORT_SYMBOL(kvm_cpu_has_pending_timer); |
39 | 39 | ||
40 | /* | 40 | /* |
41 | * check if there is pending interrupt from | ||
42 | * non-APIC source without intack. | ||
43 | */ | ||
44 | static int kvm_cpu_has_extint(struct kvm_vcpu *v) | ||
45 | { | ||
46 | if (kvm_apic_accept_pic_intr(v)) | ||
47 | return pic_irqchip(v->kvm)->output; /* PIC */ | ||
48 | else | ||
49 | return 0; | ||
50 | } | ||
51 | |||
52 | /* | ||
53 | * check if there is injectable interrupt: | ||
54 | * when virtual interrupt delivery enabled, | ||
55 | * interrupt from apic will handled by hardware, | ||
56 | * we don't need to check it here. | ||
57 | */ | ||
58 | int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) | ||
59 | { | ||
60 | if (!irqchip_in_kernel(v->kvm)) | ||
61 | return v->arch.interrupt.pending; | ||
62 | |||
63 | if (kvm_cpu_has_extint(v)) | ||
64 | return 1; | ||
65 | |||
66 | if (kvm_apic_vid_enabled(v->kvm)) | ||
67 | return 0; | ||
68 | |||
69 | return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ | ||
70 | } | ||
71 | |||
72 | /* | ||
41 | * check if there is pending interrupt without | 73 | * check if there is pending interrupt without |
42 | * intack. | 74 | * intack. |
43 | */ | 75 | */ |
44 | int kvm_cpu_has_interrupt(struct kvm_vcpu *v) | 76 | int kvm_cpu_has_interrupt(struct kvm_vcpu *v) |
45 | { | 77 | { |
46 | struct kvm_pic *s; | ||
47 | |||
48 | if (!irqchip_in_kernel(v->kvm)) | 78 | if (!irqchip_in_kernel(v->kvm)) |
49 | return v->arch.interrupt.pending; | 79 | return v->arch.interrupt.pending; |
50 | 80 | ||
51 | if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ | 81 | if (kvm_cpu_has_extint(v)) |
52 | if (kvm_apic_accept_pic_intr(v)) { | 82 | return 1; |
53 | s = pic_irqchip(v->kvm); /* PIC */ | 83 | |
54 | return s->output; | 84 | return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ |
55 | } else | ||
56 | return 0; | ||
57 | } | ||
58 | return 1; | ||
59 | } | 85 | } |
60 | EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); | 86 | EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); |
61 | 87 | ||
62 | /* | 88 | /* |
89 | * Read pending interrupt(from non-APIC source) | ||
90 | * vector and intack. | ||
91 | */ | ||
92 | static int kvm_cpu_get_extint(struct kvm_vcpu *v) | ||
93 | { | ||
94 | if (kvm_cpu_has_extint(v)) | ||
95 | return kvm_pic_read_irq(v->kvm); /* PIC */ | ||
96 | return -1; | ||
97 | } | ||
98 | |||
99 | /* | ||
63 | * Read pending interrupt vector and intack. | 100 | * Read pending interrupt vector and intack. |
64 | */ | 101 | */ |
65 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v) | 102 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v) |
66 | { | 103 | { |
67 | struct kvm_pic *s; | ||
68 | int vector; | 104 | int vector; |
69 | 105 | ||
70 | if (!irqchip_in_kernel(v->kvm)) | 106 | if (!irqchip_in_kernel(v->kvm)) |
71 | return v->arch.interrupt.nr; | 107 | return v->arch.interrupt.nr; |
72 | 108 | ||
73 | vector = kvm_get_apic_interrupt(v); /* APIC */ | 109 | vector = kvm_cpu_get_extint(v); |
74 | if (vector == -1) { | 110 | |
75 | if (kvm_apic_accept_pic_intr(v)) { | 111 | if (kvm_apic_vid_enabled(v->kvm) || vector != -1) |
76 | s = pic_irqchip(v->kvm); | 112 | return vector; /* PIC */ |
77 | s->output = 0; /* PIC */ | 113 | |
78 | vector = kvm_pic_read_irq(v->kvm); | 114 | return kvm_get_apic_interrupt(v); /* APIC */ |
79 | } | ||
80 | } | ||
81 | return vector; | ||
82 | } | 115 | } |
83 | EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); | ||
84 | 116 | ||
85 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) | 117 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) |
86 | { | 118 | { |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 9392f527f107..02b51dd4e4ad 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -140,31 +140,56 @@ static inline int apic_enabled(struct kvm_lapic *apic) | |||
140 | (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ | 140 | (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ |
141 | APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) | 141 | APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) |
142 | 142 | ||
143 | static inline int apic_x2apic_mode(struct kvm_lapic *apic) | ||
144 | { | ||
145 | return apic->vcpu->arch.apic_base & X2APIC_ENABLE; | ||
146 | } | ||
147 | |||
148 | static inline int kvm_apic_id(struct kvm_lapic *apic) | 143 | static inline int kvm_apic_id(struct kvm_lapic *apic) |
149 | { | 144 | { |
150 | return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; | 145 | return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; |
151 | } | 146 | } |
152 | 147 | ||
153 | static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr) | 148 | void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, |
149 | struct kvm_lapic_irq *irq, | ||
150 | u64 *eoi_exit_bitmap) | ||
154 | { | 151 | { |
155 | u16 cid; | 152 | struct kvm_lapic **dst; |
156 | ldr >>= 32 - map->ldr_bits; | 153 | struct kvm_apic_map *map; |
157 | cid = (ldr >> map->cid_shift) & map->cid_mask; | 154 | unsigned long bitmap = 1; |
155 | int i; | ||
158 | 156 | ||
159 | BUG_ON(cid >= ARRAY_SIZE(map->logical_map)); | 157 | rcu_read_lock(); |
158 | map = rcu_dereference(vcpu->kvm->arch.apic_map); | ||
160 | 159 | ||
161 | return cid; | 160 | if (unlikely(!map)) { |
162 | } | 161 | __set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap); |
162 | goto out; | ||
163 | } | ||
163 | 164 | ||
164 | static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) | 165 | if (irq->dest_mode == 0) { /* physical mode */ |
165 | { | 166 | if (irq->delivery_mode == APIC_DM_LOWEST || |
166 | ldr >>= (32 - map->ldr_bits); | 167 | irq->dest_id == 0xff) { |
167 | return ldr & map->lid_mask; | 168 | __set_bit(irq->vector, |
169 | (unsigned long *)eoi_exit_bitmap); | ||
170 | goto out; | ||
171 | } | ||
172 | dst = &map->phys_map[irq->dest_id & 0xff]; | ||
173 | } else { | ||
174 | u32 mda = irq->dest_id << (32 - map->ldr_bits); | ||
175 | |||
176 | dst = map->logical_map[apic_cluster_id(map, mda)]; | ||
177 | |||
178 | bitmap = apic_logical_id(map, mda); | ||
179 | } | ||
180 | |||
181 | for_each_set_bit(i, &bitmap, 16) { | ||
182 | if (!dst[i]) | ||
183 | continue; | ||
184 | if (dst[i]->vcpu == vcpu) { | ||
185 | __set_bit(irq->vector, | ||
186 | (unsigned long *)eoi_exit_bitmap); | ||
187 | break; | ||
188 | } | ||
189 | } | ||
190 | |||
191 | out: | ||
192 | rcu_read_unlock(); | ||
168 | } | 193 | } |
169 | 194 | ||
170 | static void recalculate_apic_map(struct kvm *kvm) | 195 | static void recalculate_apic_map(struct kvm *kvm) |
@@ -230,6 +255,8 @@ out: | |||
230 | 255 | ||
231 | if (old) | 256 | if (old) |
232 | kfree_rcu(old, rcu); | 257 | kfree_rcu(old, rcu); |
258 | |||
259 | kvm_ioapic_make_eoibitmap_request(kvm); | ||
233 | } | 260 | } |
234 | 261 | ||
235 | static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) | 262 | static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) |
@@ -345,6 +372,10 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) | |||
345 | { | 372 | { |
346 | int result; | 373 | int result; |
347 | 374 | ||
375 | /* | ||
376 | * Note that irr_pending is just a hint. It will be always | ||
377 | * true with virtual interrupt delivery enabled. | ||
378 | */ | ||
348 | if (!apic->irr_pending) | 379 | if (!apic->irr_pending) |
349 | return -1; | 380 | return -1; |
350 | 381 | ||
@@ -461,6 +492,8 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) | |||
461 | static inline int apic_find_highest_isr(struct kvm_lapic *apic) | 492 | static inline int apic_find_highest_isr(struct kvm_lapic *apic) |
462 | { | 493 | { |
463 | int result; | 494 | int result; |
495 | |||
496 | /* Note that isr_count is always 1 with vid enabled */ | ||
464 | if (!apic->isr_count) | 497 | if (!apic->isr_count) |
465 | return -1; | 498 | return -1; |
466 | if (likely(apic->highest_isr_cache != -1)) | 499 | if (likely(apic->highest_isr_cache != -1)) |
@@ -740,6 +773,19 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) | |||
740 | return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; | 773 | return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; |
741 | } | 774 | } |
742 | 775 | ||
776 | static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) | ||
777 | { | ||
778 | if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && | ||
779 | kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { | ||
780 | int trigger_mode; | ||
781 | if (apic_test_vector(vector, apic->regs + APIC_TMR)) | ||
782 | trigger_mode = IOAPIC_LEVEL_TRIG; | ||
783 | else | ||
784 | trigger_mode = IOAPIC_EDGE_TRIG; | ||
785 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); | ||
786 | } | ||
787 | } | ||
788 | |||
743 | static int apic_set_eoi(struct kvm_lapic *apic) | 789 | static int apic_set_eoi(struct kvm_lapic *apic) |
744 | { | 790 | { |
745 | int vector = apic_find_highest_isr(apic); | 791 | int vector = apic_find_highest_isr(apic); |
@@ -756,19 +802,26 @@ static int apic_set_eoi(struct kvm_lapic *apic) | |||
756 | apic_clear_isr(vector, apic); | 802 | apic_clear_isr(vector, apic); |
757 | apic_update_ppr(apic); | 803 | apic_update_ppr(apic); |
758 | 804 | ||
759 | if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && | 805 | kvm_ioapic_send_eoi(apic, vector); |
760 | kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { | ||
761 | int trigger_mode; | ||
762 | if (apic_test_vector(vector, apic->regs + APIC_TMR)) | ||
763 | trigger_mode = IOAPIC_LEVEL_TRIG; | ||
764 | else | ||
765 | trigger_mode = IOAPIC_EDGE_TRIG; | ||
766 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); | ||
767 | } | ||
768 | kvm_make_request(KVM_REQ_EVENT, apic->vcpu); | 806 | kvm_make_request(KVM_REQ_EVENT, apic->vcpu); |
769 | return vector; | 807 | return vector; |
770 | } | 808 | } |
771 | 809 | ||
810 | /* | ||
811 | * this interface assumes a trap-like exit, which has already finished | ||
812 | * desired side effect including vISR and vPPR update. | ||
813 | */ | ||
814 | void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector) | ||
815 | { | ||
816 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
817 | |||
818 | trace_kvm_eoi(apic, vector); | ||
819 | |||
820 | kvm_ioapic_send_eoi(apic, vector); | ||
821 | kvm_make_request(KVM_REQ_EVENT, apic->vcpu); | ||
822 | } | ||
823 | EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated); | ||
824 | |||
772 | static void apic_send_ipi(struct kvm_lapic *apic) | 825 | static void apic_send_ipi(struct kvm_lapic *apic) |
773 | { | 826 | { |
774 | u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR); | 827 | u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR); |
@@ -1212,6 +1265,21 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) | |||
1212 | } | 1265 | } |
1213 | EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); | 1266 | EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); |
1214 | 1267 | ||
1268 | /* emulate APIC access in a trap manner */ | ||
1269 | void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) | ||
1270 | { | ||
1271 | u32 val = 0; | ||
1272 | |||
1273 | /* hw has done the conditional check and inst decode */ | ||
1274 | offset &= 0xff0; | ||
1275 | |||
1276 | apic_reg_read(vcpu->arch.apic, offset, 4, &val); | ||
1277 | |||
1278 | /* TODO: optimize to just emulate side effect w/o one more write */ | ||
1279 | apic_reg_write(vcpu->arch.apic, offset, val); | ||
1280 | } | ||
1281 | EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode); | ||
1282 | |||
1215 | void kvm_free_lapic(struct kvm_vcpu *vcpu) | 1283 | void kvm_free_lapic(struct kvm_vcpu *vcpu) |
1216 | { | 1284 | { |
1217 | struct kvm_lapic *apic = vcpu->arch.apic; | 1285 | struct kvm_lapic *apic = vcpu->arch.apic; |
@@ -1288,6 +1356,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) | |||
1288 | 1356 | ||
1289 | void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) | 1357 | void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) |
1290 | { | 1358 | { |
1359 | u64 old_value = vcpu->arch.apic_base; | ||
1291 | struct kvm_lapic *apic = vcpu->arch.apic; | 1360 | struct kvm_lapic *apic = vcpu->arch.apic; |
1292 | 1361 | ||
1293 | if (!apic) { | 1362 | if (!apic) { |
@@ -1309,11 +1378,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) | |||
1309 | value &= ~MSR_IA32_APICBASE_BSP; | 1378 | value &= ~MSR_IA32_APICBASE_BSP; |
1310 | 1379 | ||
1311 | vcpu->arch.apic_base = value; | 1380 | vcpu->arch.apic_base = value; |
1312 | if (apic_x2apic_mode(apic)) { | 1381 | if ((old_value ^ value) & X2APIC_ENABLE) { |
1313 | u32 id = kvm_apic_id(apic); | 1382 | if (value & X2APIC_ENABLE) { |
1314 | u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); | 1383 | u32 id = kvm_apic_id(apic); |
1315 | kvm_apic_set_ldr(apic, ldr); | 1384 | u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); |
1385 | kvm_apic_set_ldr(apic, ldr); | ||
1386 | kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true); | ||
1387 | } else | ||
1388 | kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false); | ||
1316 | } | 1389 | } |
1390 | |||
1317 | apic->base_address = apic->vcpu->arch.apic_base & | 1391 | apic->base_address = apic->vcpu->arch.apic_base & |
1318 | MSR_IA32_APICBASE_BASE; | 1392 | MSR_IA32_APICBASE_BASE; |
1319 | 1393 | ||
@@ -1359,8 +1433,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) | |||
1359 | apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); | 1433 | apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); |
1360 | apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); | 1434 | apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); |
1361 | } | 1435 | } |
1362 | apic->irr_pending = false; | 1436 | apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm); |
1363 | apic->isr_count = 0; | 1437 | apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm); |
1364 | apic->highest_isr_cache = -1; | 1438 | apic->highest_isr_cache = -1; |
1365 | update_divide_count(apic); | 1439 | update_divide_count(apic); |
1366 | atomic_set(&apic->lapic_timer.pending, 0); | 1440 | atomic_set(&apic->lapic_timer.pending, 0); |
@@ -1575,8 +1649,10 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, | |||
1575 | update_divide_count(apic); | 1649 | update_divide_count(apic); |
1576 | start_apic_timer(apic); | 1650 | start_apic_timer(apic); |
1577 | apic->irr_pending = true; | 1651 | apic->irr_pending = true; |
1578 | apic->isr_count = count_vectors(apic->regs + APIC_ISR); | 1652 | apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ? |
1653 | 1 : count_vectors(apic->regs + APIC_ISR); | ||
1579 | apic->highest_isr_cache = -1; | 1654 | apic->highest_isr_cache = -1; |
1655 | kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); | ||
1580 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 1656 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
1581 | } | 1657 | } |
1582 | 1658 | ||
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index e5ebf9f3571f..1676d34ddb4e 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -64,6 +64,9 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); | |||
64 | u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); | 64 | u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); |
65 | void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); | 65 | void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); |
66 | 66 | ||
67 | void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset); | ||
68 | void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector); | ||
69 | |||
67 | void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); | 70 | void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); |
68 | void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); | 71 | void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); |
69 | void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); | 72 | void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); |
@@ -124,4 +127,35 @@ static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu) | |||
124 | return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic); | 127 | return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic); |
125 | } | 128 | } |
126 | 129 | ||
130 | static inline int apic_x2apic_mode(struct kvm_lapic *apic) | ||
131 | { | ||
132 | return apic->vcpu->arch.apic_base & X2APIC_ENABLE; | ||
133 | } | ||
134 | |||
135 | static inline bool kvm_apic_vid_enabled(struct kvm *kvm) | ||
136 | { | ||
137 | return kvm_x86_ops->vm_has_apicv(kvm); | ||
138 | } | ||
139 | |||
140 | static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr) | ||
141 | { | ||
142 | u16 cid; | ||
143 | ldr >>= 32 - map->ldr_bits; | ||
144 | cid = (ldr >> map->cid_shift) & map->cid_mask; | ||
145 | |||
146 | BUG_ON(cid >= ARRAY_SIZE(map->logical_map)); | ||
147 | |||
148 | return cid; | ||
149 | } | ||
150 | |||
151 | static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) | ||
152 | { | ||
153 | ldr >>= (32 - map->ldr_bits); | ||
154 | return ldr & map->lid_mask; | ||
155 | } | ||
156 | |||
157 | void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, | ||
158 | struct kvm_lapic_irq *irq, | ||
159 | u64 *eoi_bitmap); | ||
160 | |||
127 | #endif | 161 | #endif |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 01d7c2ad05f5..4ed3edbe06bd 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -448,7 +448,8 @@ static bool __check_direct_spte_mmio_pf(u64 spte) | |||
448 | 448 | ||
449 | static bool spte_is_locklessly_modifiable(u64 spte) | 449 | static bool spte_is_locklessly_modifiable(u64 spte) |
450 | { | 450 | { |
451 | return !(~spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)); | 451 | return (spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)) == |
452 | (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE); | ||
452 | } | 453 | } |
453 | 454 | ||
454 | static bool spte_has_volatile_bits(u64 spte) | 455 | static bool spte_has_volatile_bits(u64 spte) |
@@ -831,8 +832,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | |||
831 | if (host_level == PT_PAGE_TABLE_LEVEL) | 832 | if (host_level == PT_PAGE_TABLE_LEVEL) |
832 | return host_level; | 833 | return host_level; |
833 | 834 | ||
834 | max_level = kvm_x86_ops->get_lpage_level() < host_level ? | 835 | max_level = min(kvm_x86_ops->get_lpage_level(), host_level); |
835 | kvm_x86_ops->get_lpage_level() : host_level; | ||
836 | 836 | ||
837 | for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) | 837 | for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) |
838 | if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) | 838 | if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) |
@@ -1142,7 +1142,7 @@ spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect) | |||
1142 | } | 1142 | } |
1143 | 1143 | ||
1144 | static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, | 1144 | static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, |
1145 | int level, bool pt_protect) | 1145 | bool pt_protect) |
1146 | { | 1146 | { |
1147 | u64 *sptep; | 1147 | u64 *sptep; |
1148 | struct rmap_iterator iter; | 1148 | struct rmap_iterator iter; |
@@ -1180,7 +1180,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, | |||
1180 | while (mask) { | 1180 | while (mask) { |
1181 | rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), | 1181 | rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), |
1182 | PT_PAGE_TABLE_LEVEL, slot); | 1182 | PT_PAGE_TABLE_LEVEL, slot); |
1183 | __rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL, false); | 1183 | __rmap_write_protect(kvm, rmapp, false); |
1184 | 1184 | ||
1185 | /* clear the first set bit */ | 1185 | /* clear the first set bit */ |
1186 | mask &= mask - 1; | 1186 | mask &= mask - 1; |
@@ -1199,7 +1199,7 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
1199 | for (i = PT_PAGE_TABLE_LEVEL; | 1199 | for (i = PT_PAGE_TABLE_LEVEL; |
1200 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 1200 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
1201 | rmapp = __gfn_to_rmap(gfn, i, slot); | 1201 | rmapp = __gfn_to_rmap(gfn, i, slot); |
1202 | write_protected |= __rmap_write_protect(kvm, rmapp, i, true); | 1202 | write_protected |= __rmap_write_protect(kvm, rmapp, true); |
1203 | } | 1203 | } |
1204 | 1204 | ||
1205 | return write_protected; | 1205 | return write_protected; |
@@ -1460,28 +1460,14 @@ static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr) | |||
1460 | percpu_counter_add(&kvm_total_used_mmu_pages, nr); | 1460 | percpu_counter_add(&kvm_total_used_mmu_pages, nr); |
1461 | } | 1461 | } |
1462 | 1462 | ||
1463 | /* | 1463 | static void kvm_mmu_free_page(struct kvm_mmu_page *sp) |
1464 | * Remove the sp from shadow page cache, after call it, | ||
1465 | * we can not find this sp from the cache, and the shadow | ||
1466 | * page table is still valid. | ||
1467 | * It should be under the protection of mmu lock. | ||
1468 | */ | ||
1469 | static void kvm_mmu_isolate_page(struct kvm_mmu_page *sp) | ||
1470 | { | 1464 | { |
1471 | ASSERT(is_empty_shadow_page(sp->spt)); | 1465 | ASSERT(is_empty_shadow_page(sp->spt)); |
1472 | hlist_del(&sp->hash_link); | 1466 | hlist_del(&sp->hash_link); |
1473 | if (!sp->role.direct) | ||
1474 | free_page((unsigned long)sp->gfns); | ||
1475 | } | ||
1476 | |||
1477 | /* | ||
1478 | * Free the shadow page table and the sp, we can do it | ||
1479 | * out of the protection of mmu lock. | ||
1480 | */ | ||
1481 | static void kvm_mmu_free_page(struct kvm_mmu_page *sp) | ||
1482 | { | ||
1483 | list_del(&sp->link); | 1467 | list_del(&sp->link); |
1484 | free_page((unsigned long)sp->spt); | 1468 | free_page((unsigned long)sp->spt); |
1469 | if (!sp->role.direct) | ||
1470 | free_page((unsigned long)sp->gfns); | ||
1485 | kmem_cache_free(mmu_page_header_cache, sp); | 1471 | kmem_cache_free(mmu_page_header_cache, sp); |
1486 | } | 1472 | } |
1487 | 1473 | ||
@@ -1522,7 +1508,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
1522 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); | 1508 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); |
1523 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | 1509 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); |
1524 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | 1510 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); |
1525 | bitmap_zero(sp->slot_bitmap, KVM_MEM_SLOTS_NUM); | ||
1526 | sp->parent_ptes = 0; | 1511 | sp->parent_ptes = 0; |
1527 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); | 1512 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); |
1528 | kvm_mod_used_mmu_pages(vcpu->kvm, +1); | 1513 | kvm_mod_used_mmu_pages(vcpu->kvm, +1); |
@@ -1973,9 +1958,9 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp) | |||
1973 | { | 1958 | { |
1974 | u64 spte; | 1959 | u64 spte; |
1975 | 1960 | ||
1976 | spte = __pa(sp->spt) | 1961 | spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | |
1977 | | PT_PRESENT_MASK | PT_ACCESSED_MASK | 1962 | shadow_user_mask | shadow_x_mask | shadow_accessed_mask; |
1978 | | PT_WRITABLE_MASK | PT_USER_MASK; | 1963 | |
1979 | mmu_spte_set(sptep, spte); | 1964 | mmu_spte_set(sptep, spte); |
1980 | } | 1965 | } |
1981 | 1966 | ||
@@ -2126,7 +2111,6 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, | |||
2126 | do { | 2111 | do { |
2127 | sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); | 2112 | sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); |
2128 | WARN_ON(!sp->role.invalid || sp->root_count); | 2113 | WARN_ON(!sp->role.invalid || sp->root_count); |
2129 | kvm_mmu_isolate_page(sp); | ||
2130 | kvm_mmu_free_page(sp); | 2114 | kvm_mmu_free_page(sp); |
2131 | } while (!list_empty(invalid_list)); | 2115 | } while (!list_empty(invalid_list)); |
2132 | } | 2116 | } |
@@ -2144,6 +2128,8 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) | |||
2144 | * change the value | 2128 | * change the value |
2145 | */ | 2129 | */ |
2146 | 2130 | ||
2131 | spin_lock(&kvm->mmu_lock); | ||
2132 | |||
2147 | if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) { | 2133 | if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) { |
2148 | while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages && | 2134 | while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages && |
2149 | !list_empty(&kvm->arch.active_mmu_pages)) { | 2135 | !list_empty(&kvm->arch.active_mmu_pages)) { |
@@ -2158,6 +2144,8 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) | |||
2158 | } | 2144 | } |
2159 | 2145 | ||
2160 | kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages; | 2146 | kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages; |
2147 | |||
2148 | spin_unlock(&kvm->mmu_lock); | ||
2161 | } | 2149 | } |
2162 | 2150 | ||
2163 | int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | 2151 | int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) |
@@ -2183,14 +2171,6 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | |||
2183 | } | 2171 | } |
2184 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page); | 2172 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page); |
2185 | 2173 | ||
2186 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) | ||
2187 | { | ||
2188 | int slot = memslot_id(kvm, gfn); | ||
2189 | struct kvm_mmu_page *sp = page_header(__pa(pte)); | ||
2190 | |||
2191 | __set_bit(slot, sp->slot_bitmap); | ||
2192 | } | ||
2193 | |||
2194 | /* | 2174 | /* |
2195 | * The function is based on mtrr_type_lookup() in | 2175 | * The function is based on mtrr_type_lookup() in |
2196 | * arch/x86/kernel/cpu/mtrr/generic.c | 2176 | * arch/x86/kernel/cpu/mtrr/generic.c |
@@ -2332,9 +2312,8 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
2332 | if (s->role.level != PT_PAGE_TABLE_LEVEL) | 2312 | if (s->role.level != PT_PAGE_TABLE_LEVEL) |
2333 | return 1; | 2313 | return 1; |
2334 | 2314 | ||
2335 | if (!need_unsync && !s->unsync) { | 2315 | if (!s->unsync) |
2336 | need_unsync = true; | 2316 | need_unsync = true; |
2337 | } | ||
2338 | } | 2317 | } |
2339 | if (need_unsync) | 2318 | if (need_unsync) |
2340 | kvm_unsync_pages(vcpu, gfn); | 2319 | kvm_unsync_pages(vcpu, gfn); |
@@ -2342,8 +2321,7 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
2342 | } | 2321 | } |
2343 | 2322 | ||
2344 | static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | 2323 | static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, |
2345 | unsigned pte_access, int user_fault, | 2324 | unsigned pte_access, int level, |
2346 | int write_fault, int level, | ||
2347 | gfn_t gfn, pfn_t pfn, bool speculative, | 2325 | gfn_t gfn, pfn_t pfn, bool speculative, |
2348 | bool can_unsync, bool host_writable) | 2326 | bool can_unsync, bool host_writable) |
2349 | { | 2327 | { |
@@ -2378,20 +2356,13 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2378 | 2356 | ||
2379 | spte |= (u64)pfn << PAGE_SHIFT; | 2357 | spte |= (u64)pfn << PAGE_SHIFT; |
2380 | 2358 | ||
2381 | if ((pte_access & ACC_WRITE_MASK) | 2359 | if (pte_access & ACC_WRITE_MASK) { |
2382 | || (!vcpu->arch.mmu.direct_map && write_fault | ||
2383 | && !is_write_protection(vcpu) && !user_fault)) { | ||
2384 | 2360 | ||
2385 | /* | 2361 | /* |
2386 | * There are two cases: | 2362 | * Other vcpu creates new sp in the window between |
2387 | * - the one is other vcpu creates new sp in the window | 2363 | * mapping_level() and acquiring mmu-lock. We can |
2388 | * between mapping_level() and acquiring mmu-lock. | 2364 | * allow guest to retry the access, the mapping can |
2389 | * - the another case is the new sp is created by itself | 2365 | * be fixed if guest refault. |
2390 | * (page-fault path) when guest uses the target gfn as | ||
2391 | * its page table. | ||
2392 | * Both of these cases can be fixed by allowing guest to | ||
2393 | * retry the access, it will refault, then we can establish | ||
2394 | * the mapping by using small page. | ||
2395 | */ | 2366 | */ |
2396 | if (level > PT_PAGE_TABLE_LEVEL && | 2367 | if (level > PT_PAGE_TABLE_LEVEL && |
2397 | has_wrprotected_page(vcpu->kvm, gfn, level)) | 2368 | has_wrprotected_page(vcpu->kvm, gfn, level)) |
@@ -2399,19 +2370,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2399 | 2370 | ||
2400 | spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; | 2371 | spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; |
2401 | 2372 | ||
2402 | if (!vcpu->arch.mmu.direct_map | ||
2403 | && !(pte_access & ACC_WRITE_MASK)) { | ||
2404 | spte &= ~PT_USER_MASK; | ||
2405 | /* | ||
2406 | * If we converted a user page to a kernel page, | ||
2407 | * so that the kernel can write to it when cr0.wp=0, | ||
2408 | * then we should prevent the kernel from executing it | ||
2409 | * if SMEP is enabled. | ||
2410 | */ | ||
2411 | if (kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)) | ||
2412 | spte |= PT64_NX_MASK; | ||
2413 | } | ||
2414 | |||
2415 | /* | 2373 | /* |
2416 | * Optimization: for pte sync, if spte was writable the hash | 2374 | * Optimization: for pte sync, if spte was writable the hash |
2417 | * lookup is unnecessary (and expensive). Write protection | 2375 | * lookup is unnecessary (and expensive). Write protection |
@@ -2441,19 +2399,15 @@ done: | |||
2441 | } | 2399 | } |
2442 | 2400 | ||
2443 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | 2401 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, |
2444 | unsigned pt_access, unsigned pte_access, | 2402 | unsigned pte_access, int write_fault, int *emulate, |
2445 | int user_fault, int write_fault, | 2403 | int level, gfn_t gfn, pfn_t pfn, bool speculative, |
2446 | int *emulate, int level, gfn_t gfn, | ||
2447 | pfn_t pfn, bool speculative, | ||
2448 | bool host_writable) | 2404 | bool host_writable) |
2449 | { | 2405 | { |
2450 | int was_rmapped = 0; | 2406 | int was_rmapped = 0; |
2451 | int rmap_count; | 2407 | int rmap_count; |
2452 | 2408 | ||
2453 | pgprintk("%s: spte %llx access %x write_fault %d" | 2409 | pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__, |
2454 | " user_fault %d gfn %llx\n", | 2410 | *sptep, write_fault, gfn); |
2455 | __func__, *sptep, pt_access, | ||
2456 | write_fault, user_fault, gfn); | ||
2457 | 2411 | ||
2458 | if (is_rmap_spte(*sptep)) { | 2412 | if (is_rmap_spte(*sptep)) { |
2459 | /* | 2413 | /* |
@@ -2477,9 +2431,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2477 | was_rmapped = 1; | 2431 | was_rmapped = 1; |
2478 | } | 2432 | } |
2479 | 2433 | ||
2480 | if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, | 2434 | if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative, |
2481 | level, gfn, pfn, speculative, true, | 2435 | true, host_writable)) { |
2482 | host_writable)) { | ||
2483 | if (write_fault) | 2436 | if (write_fault) |
2484 | *emulate = 1; | 2437 | *emulate = 1; |
2485 | kvm_mmu_flush_tlb(vcpu); | 2438 | kvm_mmu_flush_tlb(vcpu); |
@@ -2497,7 +2450,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2497 | ++vcpu->kvm->stat.lpages; | 2450 | ++vcpu->kvm->stat.lpages; |
2498 | 2451 | ||
2499 | if (is_shadow_present_pte(*sptep)) { | 2452 | if (is_shadow_present_pte(*sptep)) { |
2500 | page_header_update_slot(vcpu->kvm, sptep, gfn); | ||
2501 | if (!was_rmapped) { | 2453 | if (!was_rmapped) { |
2502 | rmap_count = rmap_add(vcpu, sptep, gfn); | 2454 | rmap_count = rmap_add(vcpu, sptep, gfn); |
2503 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) | 2455 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) |
@@ -2571,10 +2523,9 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, | |||
2571 | return -1; | 2523 | return -1; |
2572 | 2524 | ||
2573 | for (i = 0; i < ret; i++, gfn++, start++) | 2525 | for (i = 0; i < ret; i++, gfn++, start++) |
2574 | mmu_set_spte(vcpu, start, ACC_ALL, | 2526 | mmu_set_spte(vcpu, start, access, 0, NULL, |
2575 | access, 0, 0, NULL, | 2527 | sp->role.level, gfn, page_to_pfn(pages[i]), |
2576 | sp->role.level, gfn, | 2528 | true, true); |
2577 | page_to_pfn(pages[i]), true, true); | ||
2578 | 2529 | ||
2579 | return 0; | 2530 | return 0; |
2580 | } | 2531 | } |
@@ -2633,11 +2584,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
2633 | 2584 | ||
2634 | for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { | 2585 | for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { |
2635 | if (iterator.level == level) { | 2586 | if (iterator.level == level) { |
2636 | unsigned pte_access = ACC_ALL; | 2587 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, |
2637 | 2588 | write, &emulate, level, gfn, pfn, | |
2638 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access, | 2589 | prefault, map_writable); |
2639 | 0, write, &emulate, | ||
2640 | level, gfn, pfn, prefault, map_writable); | ||
2641 | direct_pte_prefetch(vcpu, iterator.sptep); | 2590 | direct_pte_prefetch(vcpu, iterator.sptep); |
2642 | ++vcpu->stat.pf_fixed; | 2591 | ++vcpu->stat.pf_fixed; |
2643 | break; | 2592 | break; |
@@ -2652,11 +2601,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
2652 | iterator.level - 1, | 2601 | iterator.level - 1, |
2653 | 1, ACC_ALL, iterator.sptep); | 2602 | 1, ACC_ALL, iterator.sptep); |
2654 | 2603 | ||
2655 | mmu_spte_set(iterator.sptep, | 2604 | link_shadow_page(iterator.sptep, sp); |
2656 | __pa(sp->spt) | ||
2657 | | PT_PRESENT_MASK | PT_WRITABLE_MASK | ||
2658 | | shadow_user_mask | shadow_x_mask | ||
2659 | | shadow_accessed_mask); | ||
2660 | } | 2605 | } |
2661 | } | 2606 | } |
2662 | return emulate; | 2607 | return emulate; |
@@ -3719,6 +3664,7 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) | |||
3719 | else | 3664 | else |
3720 | r = paging32_init_context(vcpu, context); | 3665 | r = paging32_init_context(vcpu, context); |
3721 | 3666 | ||
3667 | vcpu->arch.mmu.base_role.nxe = is_nx(vcpu); | ||
3722 | vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); | 3668 | vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); |
3723 | vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); | 3669 | vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); |
3724 | vcpu->arch.mmu.base_role.smep_andnot_wp | 3670 | vcpu->arch.mmu.base_role.smep_andnot_wp |
@@ -3885,7 +3831,7 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, | |||
3885 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | 3831 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ |
3886 | *gpa &= ~(gpa_t)7; | 3832 | *gpa &= ~(gpa_t)7; |
3887 | *bytes = 8; | 3833 | *bytes = 8; |
3888 | r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8)); | 3834 | r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, 8); |
3889 | if (r) | 3835 | if (r) |
3890 | gentry = 0; | 3836 | gentry = 0; |
3891 | new = (const u8 *)&gentry; | 3837 | new = (const u8 *)&gentry; |
@@ -4039,7 +3985,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
4039 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) | 3985 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) |
4040 | & mask.word) && rmap_can_add(vcpu)) | 3986 | & mask.word) && rmap_can_add(vcpu)) |
4041 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); | 3987 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); |
4042 | if (!remote_flush && need_remote_flush(entry, *spte)) | 3988 | if (need_remote_flush(entry, *spte)) |
4043 | remote_flush = true; | 3989 | remote_flush = true; |
4044 | ++spte; | 3990 | ++spte; |
4045 | } | 3991 | } |
@@ -4198,26 +4144,36 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu) | |||
4198 | 4144 | ||
4199 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | 4145 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) |
4200 | { | 4146 | { |
4201 | struct kvm_mmu_page *sp; | 4147 | struct kvm_memory_slot *memslot; |
4202 | bool flush = false; | 4148 | gfn_t last_gfn; |
4149 | int i; | ||
4203 | 4150 | ||
4204 | list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { | 4151 | memslot = id_to_memslot(kvm->memslots, slot); |
4205 | int i; | 4152 | last_gfn = memslot->base_gfn + memslot->npages - 1; |
4206 | u64 *pt; | ||
4207 | 4153 | ||
4208 | if (!test_bit(slot, sp->slot_bitmap)) | 4154 | spin_lock(&kvm->mmu_lock); |
4209 | continue; | ||
4210 | 4155 | ||
4211 | pt = sp->spt; | 4156 | for (i = PT_PAGE_TABLE_LEVEL; |
4212 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | 4157 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
4213 | if (!is_shadow_present_pte(pt[i]) || | 4158 | unsigned long *rmapp; |
4214 | !is_last_spte(pt[i], sp->role.level)) | 4159 | unsigned long last_index, index; |
4215 | continue; | ||
4216 | 4160 | ||
4217 | spte_write_protect(kvm, &pt[i], &flush, false); | 4161 | rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL]; |
4162 | last_index = gfn_to_index(last_gfn, memslot->base_gfn, i); | ||
4163 | |||
4164 | for (index = 0; index <= last_index; ++index, ++rmapp) { | ||
4165 | if (*rmapp) | ||
4166 | __rmap_write_protect(kvm, rmapp, false); | ||
4167 | |||
4168 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { | ||
4169 | kvm_flush_remote_tlbs(kvm); | ||
4170 | cond_resched_lock(&kvm->mmu_lock); | ||
4171 | } | ||
4218 | } | 4172 | } |
4219 | } | 4173 | } |
4174 | |||
4220 | kvm_flush_remote_tlbs(kvm); | 4175 | kvm_flush_remote_tlbs(kvm); |
4176 | spin_unlock(&kvm->mmu_lock); | ||
4221 | } | 4177 | } |
4222 | 4178 | ||
4223 | void kvm_mmu_zap_all(struct kvm *kvm) | 4179 | void kvm_mmu_zap_all(struct kvm *kvm) |
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index cd6e98333ba3..b8f6172f4174 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
@@ -195,12 +195,6 @@ DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page, | |||
195 | TP_ARGS(sp) | 195 | TP_ARGS(sp) |
196 | ); | 196 | ); |
197 | 197 | ||
198 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_delay_free_pages, | ||
199 | TP_PROTO(struct kvm_mmu_page *sp), | ||
200 | |||
201 | TP_ARGS(sp) | ||
202 | ); | ||
203 | |||
204 | TRACE_EVENT( | 198 | TRACE_EVENT( |
205 | mark_mmio_spte, | 199 | mark_mmio_spte, |
206 | TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access), | 200 | TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access), |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 891eb6d93b8b..105dd5bd550e 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -151,7 +151,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, | |||
151 | pt_element_t pte; | 151 | pt_element_t pte; |
152 | pt_element_t __user *uninitialized_var(ptep_user); | 152 | pt_element_t __user *uninitialized_var(ptep_user); |
153 | gfn_t table_gfn; | 153 | gfn_t table_gfn; |
154 | unsigned index, pt_access, pte_access, accessed_dirty, shift; | 154 | unsigned index, pt_access, pte_access, accessed_dirty; |
155 | gpa_t pte_gpa; | 155 | gpa_t pte_gpa; |
156 | int offset; | 156 | int offset; |
157 | const int write_fault = access & PFERR_WRITE_MASK; | 157 | const int write_fault = access & PFERR_WRITE_MASK; |
@@ -249,16 +249,12 @@ retry_walk: | |||
249 | 249 | ||
250 | if (!write_fault) | 250 | if (!write_fault) |
251 | protect_clean_gpte(&pte_access, pte); | 251 | protect_clean_gpte(&pte_access, pte); |
252 | 252 | else | |
253 | /* | 253 | /* |
254 | * On a write fault, fold the dirty bit into accessed_dirty by shifting it one | 254 | * On a write fault, fold the dirty bit into accessed_dirty by |
255 | * place right. | 255 | * shifting it one place right. |
256 | * | 256 | */ |
257 | * On a read fault, do nothing. | 257 | accessed_dirty &= pte >> (PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT); |
258 | */ | ||
259 | shift = write_fault >> ilog2(PFERR_WRITE_MASK); | ||
260 | shift *= PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT; | ||
261 | accessed_dirty &= pte >> shift; | ||
262 | 258 | ||
263 | if (unlikely(!accessed_dirty)) { | 259 | if (unlikely(!accessed_dirty)) { |
264 | ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); | 260 | ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); |
@@ -330,8 +326,8 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
330 | * we call mmu_set_spte() with host_writable = true because | 326 | * we call mmu_set_spte() with host_writable = true because |
331 | * pte_prefetch_gfn_to_pfn always gets a writable pfn. | 327 | * pte_prefetch_gfn_to_pfn always gets a writable pfn. |
332 | */ | 328 | */ |
333 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, | 329 | mmu_set_spte(vcpu, spte, pte_access, 0, NULL, PT_PAGE_TABLE_LEVEL, |
334 | NULL, PT_PAGE_TABLE_LEVEL, gfn, pfn, true, true); | 330 | gfn, pfn, true, true); |
335 | 331 | ||
336 | return true; | 332 | return true; |
337 | } | 333 | } |
@@ -405,7 +401,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, | |||
405 | */ | 401 | */ |
406 | static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | 402 | static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, |
407 | struct guest_walker *gw, | 403 | struct guest_walker *gw, |
408 | int user_fault, int write_fault, int hlevel, | 404 | int write_fault, int hlevel, |
409 | pfn_t pfn, bool map_writable, bool prefault) | 405 | pfn_t pfn, bool map_writable, bool prefault) |
410 | { | 406 | { |
411 | struct kvm_mmu_page *sp = NULL; | 407 | struct kvm_mmu_page *sp = NULL; |
@@ -413,9 +409,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
413 | unsigned direct_access, access = gw->pt_access; | 409 | unsigned direct_access, access = gw->pt_access; |
414 | int top_level, emulate = 0; | 410 | int top_level, emulate = 0; |
415 | 411 | ||
416 | if (!is_present_gpte(gw->ptes[gw->level - 1])) | ||
417 | return 0; | ||
418 | |||
419 | direct_access = gw->pte_access; | 412 | direct_access = gw->pte_access; |
420 | 413 | ||
421 | top_level = vcpu->arch.mmu.root_level; | 414 | top_level = vcpu->arch.mmu.root_level; |
@@ -477,9 +470,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
477 | } | 470 | } |
478 | 471 | ||
479 | clear_sp_write_flooding_count(it.sptep); | 472 | clear_sp_write_flooding_count(it.sptep); |
480 | mmu_set_spte(vcpu, it.sptep, access, gw->pte_access, | 473 | mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, &emulate, |
481 | user_fault, write_fault, &emulate, it.level, | 474 | it.level, gw->gfn, pfn, prefault, map_writable); |
482 | gw->gfn, pfn, prefault, map_writable); | ||
483 | FNAME(pte_prefetch)(vcpu, gw, it.sptep); | 475 | FNAME(pte_prefetch)(vcpu, gw, it.sptep); |
484 | 476 | ||
485 | return emulate; | 477 | return emulate; |
@@ -491,6 +483,46 @@ out_gpte_changed: | |||
491 | return 0; | 483 | return 0; |
492 | } | 484 | } |
493 | 485 | ||
486 | /* | ||
487 | * To see whether the mapped gfn can write its page table in the current | ||
488 | * mapping. | ||
489 | * | ||
490 | * It is the helper function of FNAME(page_fault). When guest uses large page | ||
491 | * size to map the writable gfn which is used as current page table, we should | ||
492 | * force kvm to use small page size to map it because new shadow page will be | ||
493 | * created when kvm establishes shadow page table that stop kvm using large | ||
494 | * page size. Do it early can avoid unnecessary #PF and emulation. | ||
495 | * | ||
496 | * @write_fault_to_shadow_pgtable will return true if the fault gfn is | ||
497 | * currently used as its page table. | ||
498 | * | ||
499 | * Note: the PDPT page table is not checked for PAE-32 bit guest. It is ok | ||
500 | * since the PDPT is always shadowed, that means, we can not use large page | ||
501 | * size to map the gfn which is used as PDPT. | ||
502 | */ | ||
503 | static bool | ||
504 | FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu, | ||
505 | struct guest_walker *walker, int user_fault, | ||
506 | bool *write_fault_to_shadow_pgtable) | ||
507 | { | ||
508 | int level; | ||
509 | gfn_t mask = ~(KVM_PAGES_PER_HPAGE(walker->level) - 1); | ||
510 | bool self_changed = false; | ||
511 | |||
512 | if (!(walker->pte_access & ACC_WRITE_MASK || | ||
513 | (!is_write_protection(vcpu) && !user_fault))) | ||
514 | return false; | ||
515 | |||
516 | for (level = walker->level; level <= walker->max_level; level++) { | ||
517 | gfn_t gfn = walker->gfn ^ walker->table_gfn[level - 1]; | ||
518 | |||
519 | self_changed |= !(gfn & mask); | ||
520 | *write_fault_to_shadow_pgtable |= !gfn; | ||
521 | } | ||
522 | |||
523 | return self_changed; | ||
524 | } | ||
525 | |||
494 | /* | 526 | /* |
495 | * Page fault handler. There are several causes for a page fault: | 527 | * Page fault handler. There are several causes for a page fault: |
496 | * - there is no shadow pte for the guest pte | 528 | * - there is no shadow pte for the guest pte |
@@ -516,7 +548,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
516 | int level = PT_PAGE_TABLE_LEVEL; | 548 | int level = PT_PAGE_TABLE_LEVEL; |
517 | int force_pt_level; | 549 | int force_pt_level; |
518 | unsigned long mmu_seq; | 550 | unsigned long mmu_seq; |
519 | bool map_writable; | 551 | bool map_writable, is_self_change_mapping; |
520 | 552 | ||
521 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); | 553 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); |
522 | 554 | ||
@@ -544,8 +576,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
544 | return 0; | 576 | return 0; |
545 | } | 577 | } |
546 | 578 | ||
579 | vcpu->arch.write_fault_to_shadow_pgtable = false; | ||
580 | |||
581 | is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu, | ||
582 | &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable); | ||
583 | |||
547 | if (walker.level >= PT_DIRECTORY_LEVEL) | 584 | if (walker.level >= PT_DIRECTORY_LEVEL) |
548 | force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn); | 585 | force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn) |
586 | || is_self_change_mapping; | ||
549 | else | 587 | else |
550 | force_pt_level = 1; | 588 | force_pt_level = 1; |
551 | if (!force_pt_level) { | 589 | if (!force_pt_level) { |
@@ -564,6 +602,26 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
564 | walker.gfn, pfn, walker.pte_access, &r)) | 602 | walker.gfn, pfn, walker.pte_access, &r)) |
565 | return r; | 603 | return r; |
566 | 604 | ||
605 | /* | ||
606 | * Do not change pte_access if the pfn is a mmio page, otherwise | ||
607 | * we will cache the incorrect access into mmio spte. | ||
608 | */ | ||
609 | if (write_fault && !(walker.pte_access & ACC_WRITE_MASK) && | ||
610 | !is_write_protection(vcpu) && !user_fault && | ||
611 | !is_noslot_pfn(pfn)) { | ||
612 | walker.pte_access |= ACC_WRITE_MASK; | ||
613 | walker.pte_access &= ~ACC_USER_MASK; | ||
614 | |||
615 | /* | ||
616 | * If we converted a user page to a kernel page, | ||
617 | * so that the kernel can write to it when cr0.wp=0, | ||
618 | * then we should prevent the kernel from executing it | ||
619 | * if SMEP is enabled. | ||
620 | */ | ||
621 | if (kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)) | ||
622 | walker.pte_access &= ~ACC_EXEC_MASK; | ||
623 | } | ||
624 | |||
567 | spin_lock(&vcpu->kvm->mmu_lock); | 625 | spin_lock(&vcpu->kvm->mmu_lock); |
568 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) | 626 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) |
569 | goto out_unlock; | 627 | goto out_unlock; |
@@ -572,7 +630,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
572 | kvm_mmu_free_some_pages(vcpu); | 630 | kvm_mmu_free_some_pages(vcpu); |
573 | if (!force_pt_level) | 631 | if (!force_pt_level) |
574 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); | 632 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); |
575 | r = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, | 633 | r = FNAME(fetch)(vcpu, addr, &walker, write_fault, |
576 | level, pfn, map_writable, prefault); | 634 | level, pfn, map_writable, prefault); |
577 | ++vcpu->stat.pf_fixed; | 635 | ++vcpu->stat.pf_fixed; |
578 | kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); | 636 | kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); |
@@ -747,7 +805,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
747 | 805 | ||
748 | host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE; | 806 | host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE; |
749 | 807 | ||
750 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, | 808 | set_spte(vcpu, &sp->spt[i], pte_access, |
751 | PT_PAGE_TABLE_LEVEL, gfn, | 809 | PT_PAGE_TABLE_LEVEL, gfn, |
752 | spte_to_pfn(sp->spt[i]), true, false, | 810 | spte_to_pfn(sp->spt[i]), true, false, |
753 | host_writable); | 811 | host_writable); |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d29d3cd1c156..e1b1ce21bc00 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -3571,6 +3571,26 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | |||
3571 | set_cr_intercept(svm, INTERCEPT_CR8_WRITE); | 3571 | set_cr_intercept(svm, INTERCEPT_CR8_WRITE); |
3572 | } | 3572 | } |
3573 | 3573 | ||
3574 | static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) | ||
3575 | { | ||
3576 | return; | ||
3577 | } | ||
3578 | |||
3579 | static int svm_vm_has_apicv(struct kvm *kvm) | ||
3580 | { | ||
3581 | return 0; | ||
3582 | } | ||
3583 | |||
3584 | static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | ||
3585 | { | ||
3586 | return; | ||
3587 | } | ||
3588 | |||
3589 | static void svm_hwapic_isr_update(struct kvm *kvm, int isr) | ||
3590 | { | ||
3591 | return; | ||
3592 | } | ||
3593 | |||
3574 | static int svm_nmi_allowed(struct kvm_vcpu *vcpu) | 3594 | static int svm_nmi_allowed(struct kvm_vcpu *vcpu) |
3575 | { | 3595 | { |
3576 | struct vcpu_svm *svm = to_svm(vcpu); | 3596 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -4290,6 +4310,10 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4290 | .enable_nmi_window = enable_nmi_window, | 4310 | .enable_nmi_window = enable_nmi_window, |
4291 | .enable_irq_window = enable_irq_window, | 4311 | .enable_irq_window = enable_irq_window, |
4292 | .update_cr8_intercept = update_cr8_intercept, | 4312 | .update_cr8_intercept = update_cr8_intercept, |
4313 | .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, | ||
4314 | .vm_has_apicv = svm_vm_has_apicv, | ||
4315 | .load_eoi_exitmap = svm_load_eoi_exitmap, | ||
4316 | .hwapic_isr_update = svm_hwapic_isr_update, | ||
4293 | 4317 | ||
4294 | .set_tss_addr = svm_set_tss_addr, | 4318 | .set_tss_addr = svm_set_tss_addr, |
4295 | .get_tdp_level = get_npt_level, | 4319 | .get_tdp_level = get_npt_level, |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9120ae1901e4..6667042714cc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -84,6 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO); | |||
84 | static bool __read_mostly fasteoi = 1; | 84 | static bool __read_mostly fasteoi = 1; |
85 | module_param(fasteoi, bool, S_IRUGO); | 85 | module_param(fasteoi, bool, S_IRUGO); |
86 | 86 | ||
87 | static bool __read_mostly enable_apicv_reg_vid; | ||
88 | |||
87 | /* | 89 | /* |
88 | * If nested=1, nested virtualization is supported, i.e., guests may use | 90 | * If nested=1, nested virtualization is supported, i.e., guests may use |
89 | * VMX and be a hypervisor for its own guests. If nested=0, guests may not | 91 | * VMX and be a hypervisor for its own guests. If nested=0, guests may not |
@@ -92,12 +94,8 @@ module_param(fasteoi, bool, S_IRUGO); | |||
92 | static bool __read_mostly nested = 0; | 94 | static bool __read_mostly nested = 0; |
93 | module_param(nested, bool, S_IRUGO); | 95 | module_param(nested, bool, S_IRUGO); |
94 | 96 | ||
95 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ | 97 | #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) |
96 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) | 98 | #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) |
97 | #define KVM_GUEST_CR0_MASK \ | ||
98 | (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) | ||
99 | #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \ | ||
100 | (X86_CR0_WP | X86_CR0_NE) | ||
101 | #define KVM_VM_CR0_ALWAYS_ON \ | 99 | #define KVM_VM_CR0_ALWAYS_ON \ |
102 | (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) | 100 | (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) |
103 | #define KVM_CR4_GUEST_OWNED_BITS \ | 101 | #define KVM_CR4_GUEST_OWNED_BITS \ |
@@ -624,6 +622,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
624 | struct kvm_segment *var, int seg); | 622 | struct kvm_segment *var, int seg); |
625 | static void vmx_get_segment(struct kvm_vcpu *vcpu, | 623 | static void vmx_get_segment(struct kvm_vcpu *vcpu, |
626 | struct kvm_segment *var, int seg); | 624 | struct kvm_segment *var, int seg); |
625 | static bool guest_state_valid(struct kvm_vcpu *vcpu); | ||
626 | static u32 vmx_segment_access_rights(struct kvm_segment *var); | ||
627 | 627 | ||
628 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 628 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
629 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 629 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
@@ -638,6 +638,8 @@ static unsigned long *vmx_io_bitmap_a; | |||
638 | static unsigned long *vmx_io_bitmap_b; | 638 | static unsigned long *vmx_io_bitmap_b; |
639 | static unsigned long *vmx_msr_bitmap_legacy; | 639 | static unsigned long *vmx_msr_bitmap_legacy; |
640 | static unsigned long *vmx_msr_bitmap_longmode; | 640 | static unsigned long *vmx_msr_bitmap_longmode; |
641 | static unsigned long *vmx_msr_bitmap_legacy_x2apic; | ||
642 | static unsigned long *vmx_msr_bitmap_longmode_x2apic; | ||
641 | 643 | ||
642 | static bool cpu_has_load_ia32_efer; | 644 | static bool cpu_has_load_ia32_efer; |
643 | static bool cpu_has_load_perf_global_ctrl; | 645 | static bool cpu_has_load_perf_global_ctrl; |
@@ -762,6 +764,24 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void) | |||
762 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 764 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
763 | } | 765 | } |
764 | 766 | ||
767 | static inline bool cpu_has_vmx_virtualize_x2apic_mode(void) | ||
768 | { | ||
769 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
770 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | ||
771 | } | ||
772 | |||
773 | static inline bool cpu_has_vmx_apic_register_virt(void) | ||
774 | { | ||
775 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
776 | SECONDARY_EXEC_APIC_REGISTER_VIRT; | ||
777 | } | ||
778 | |||
779 | static inline bool cpu_has_vmx_virtual_intr_delivery(void) | ||
780 | { | ||
781 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
782 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; | ||
783 | } | ||
784 | |||
765 | static inline bool cpu_has_vmx_flexpriority(void) | 785 | static inline bool cpu_has_vmx_flexpriority(void) |
766 | { | 786 | { |
767 | return cpu_has_vmx_tpr_shadow() && | 787 | return cpu_has_vmx_tpr_shadow() && |
@@ -1694,7 +1714,6 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) | |||
1694 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 1714 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
1695 | { | 1715 | { |
1696 | __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); | 1716 | __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); |
1697 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | ||
1698 | to_vmx(vcpu)->rflags = rflags; | 1717 | to_vmx(vcpu)->rflags = rflags; |
1699 | if (to_vmx(vcpu)->rmode.vm86_active) { | 1718 | if (to_vmx(vcpu)->rmode.vm86_active) { |
1700 | to_vmx(vcpu)->rmode.save_rflags = rflags; | 1719 | to_vmx(vcpu)->rmode.save_rflags = rflags; |
@@ -1820,6 +1839,25 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) | |||
1820 | vmx->guest_msrs[from] = tmp; | 1839 | vmx->guest_msrs[from] = tmp; |
1821 | } | 1840 | } |
1822 | 1841 | ||
1842 | static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) | ||
1843 | { | ||
1844 | unsigned long *msr_bitmap; | ||
1845 | |||
1846 | if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) { | ||
1847 | if (is_long_mode(vcpu)) | ||
1848 | msr_bitmap = vmx_msr_bitmap_longmode_x2apic; | ||
1849 | else | ||
1850 | msr_bitmap = vmx_msr_bitmap_legacy_x2apic; | ||
1851 | } else { | ||
1852 | if (is_long_mode(vcpu)) | ||
1853 | msr_bitmap = vmx_msr_bitmap_longmode; | ||
1854 | else | ||
1855 | msr_bitmap = vmx_msr_bitmap_legacy; | ||
1856 | } | ||
1857 | |||
1858 | vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); | ||
1859 | } | ||
1860 | |||
1823 | /* | 1861 | /* |
1824 | * Set up the vmcs to automatically save and restore system | 1862 | * Set up the vmcs to automatically save and restore system |
1825 | * msrs. Don't touch the 64-bit msrs if the guest is in legacy | 1863 | * msrs. Don't touch the 64-bit msrs if the guest is in legacy |
@@ -1828,7 +1866,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) | |||
1828 | static void setup_msrs(struct vcpu_vmx *vmx) | 1866 | static void setup_msrs(struct vcpu_vmx *vmx) |
1829 | { | 1867 | { |
1830 | int save_nmsrs, index; | 1868 | int save_nmsrs, index; |
1831 | unsigned long *msr_bitmap; | ||
1832 | 1869 | ||
1833 | save_nmsrs = 0; | 1870 | save_nmsrs = 0; |
1834 | #ifdef CONFIG_X86_64 | 1871 | #ifdef CONFIG_X86_64 |
@@ -1860,14 +1897,8 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
1860 | 1897 | ||
1861 | vmx->save_nmsrs = save_nmsrs; | 1898 | vmx->save_nmsrs = save_nmsrs; |
1862 | 1899 | ||
1863 | if (cpu_has_vmx_msr_bitmap()) { | 1900 | if (cpu_has_vmx_msr_bitmap()) |
1864 | if (is_long_mode(&vmx->vcpu)) | 1901 | vmx_set_msr_bitmap(&vmx->vcpu); |
1865 | msr_bitmap = vmx_msr_bitmap_longmode; | ||
1866 | else | ||
1867 | msr_bitmap = vmx_msr_bitmap_legacy; | ||
1868 | |||
1869 | vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); | ||
1870 | } | ||
1871 | } | 1902 | } |
1872 | 1903 | ||
1873 | /* | 1904 | /* |
@@ -2533,13 +2564,16 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2533 | if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { | 2564 | if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { |
2534 | min2 = 0; | 2565 | min2 = 0; |
2535 | opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 2566 | opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
2567 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | | ||
2536 | SECONDARY_EXEC_WBINVD_EXITING | | 2568 | SECONDARY_EXEC_WBINVD_EXITING | |
2537 | SECONDARY_EXEC_ENABLE_VPID | | 2569 | SECONDARY_EXEC_ENABLE_VPID | |
2538 | SECONDARY_EXEC_ENABLE_EPT | | 2570 | SECONDARY_EXEC_ENABLE_EPT | |
2539 | SECONDARY_EXEC_UNRESTRICTED_GUEST | | 2571 | SECONDARY_EXEC_UNRESTRICTED_GUEST | |
2540 | SECONDARY_EXEC_PAUSE_LOOP_EXITING | | 2572 | SECONDARY_EXEC_PAUSE_LOOP_EXITING | |
2541 | SECONDARY_EXEC_RDTSCP | | 2573 | SECONDARY_EXEC_RDTSCP | |
2542 | SECONDARY_EXEC_ENABLE_INVPCID; | 2574 | SECONDARY_EXEC_ENABLE_INVPCID | |
2575 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | ||
2576 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; | ||
2543 | if (adjust_vmx_controls(min2, opt2, | 2577 | if (adjust_vmx_controls(min2, opt2, |
2544 | MSR_IA32_VMX_PROCBASED_CTLS2, | 2578 | MSR_IA32_VMX_PROCBASED_CTLS2, |
2545 | &_cpu_based_2nd_exec_control) < 0) | 2579 | &_cpu_based_2nd_exec_control) < 0) |
@@ -2550,6 +2584,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2550 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) | 2584 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) |
2551 | _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; | 2585 | _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; |
2552 | #endif | 2586 | #endif |
2587 | |||
2588 | if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) | ||
2589 | _cpu_based_2nd_exec_control &= ~( | ||
2590 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | ||
2591 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | | ||
2592 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); | ||
2593 | |||
2553 | if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { | 2594 | if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { |
2554 | /* CR3 accesses and invlpg don't need to cause VM Exits when EPT | 2595 | /* CR3 accesses and invlpg don't need to cause VM Exits when EPT |
2555 | enabled */ | 2596 | enabled */ |
@@ -2747,6 +2788,15 @@ static __init int hardware_setup(void) | |||
2747 | if (!cpu_has_vmx_ple()) | 2788 | if (!cpu_has_vmx_ple()) |
2748 | ple_gap = 0; | 2789 | ple_gap = 0; |
2749 | 2790 | ||
2791 | if (!cpu_has_vmx_apic_register_virt() || | ||
2792 | !cpu_has_vmx_virtual_intr_delivery()) | ||
2793 | enable_apicv_reg_vid = 0; | ||
2794 | |||
2795 | if (enable_apicv_reg_vid) | ||
2796 | kvm_x86_ops->update_cr8_intercept = NULL; | ||
2797 | else | ||
2798 | kvm_x86_ops->hwapic_irr_update = NULL; | ||
2799 | |||
2750 | if (nested) | 2800 | if (nested) |
2751 | nested_vmx_setup_ctls_msrs(); | 2801 | nested_vmx_setup_ctls_msrs(); |
2752 | 2802 | ||
@@ -2758,18 +2808,28 @@ static __exit void hardware_unsetup(void) | |||
2758 | free_kvm_area(); | 2808 | free_kvm_area(); |
2759 | } | 2809 | } |
2760 | 2810 | ||
2761 | static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save) | 2811 | static bool emulation_required(struct kvm_vcpu *vcpu) |
2762 | { | 2812 | { |
2763 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 2813 | return emulate_invalid_guest_state && !guest_state_valid(vcpu); |
2764 | struct kvm_segment tmp = *save; | 2814 | } |
2765 | 2815 | ||
2766 | if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) { | 2816 | static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg, |
2767 | tmp.base = vmcs_readl(sf->base); | 2817 | struct kvm_segment *save) |
2768 | tmp.selector = vmcs_read16(sf->selector); | 2818 | { |
2769 | tmp.dpl = tmp.selector & SELECTOR_RPL_MASK; | 2819 | if (!emulate_invalid_guest_state) { |
2770 | tmp.s = 1; | 2820 | /* |
2821 | * CS and SS RPL should be equal during guest entry according | ||
2822 | * to VMX spec, but in reality it is not always so. Since vcpu | ||
2823 | * is in the middle of the transition from real mode to | ||
2824 | * protected mode it is safe to assume that RPL 0 is a good | ||
2825 | * default value. | ||
2826 | */ | ||
2827 | if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS) | ||
2828 | save->selector &= ~SELECTOR_RPL_MASK; | ||
2829 | save->dpl = save->selector & SELECTOR_RPL_MASK; | ||
2830 | save->s = 1; | ||
2771 | } | 2831 | } |
2772 | vmx_set_segment(vcpu, &tmp, seg); | 2832 | vmx_set_segment(vcpu, save, seg); |
2773 | } | 2833 | } |
2774 | 2834 | ||
2775 | static void enter_pmode(struct kvm_vcpu *vcpu) | 2835 | static void enter_pmode(struct kvm_vcpu *vcpu) |
@@ -2777,7 +2837,17 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
2777 | unsigned long flags; | 2837 | unsigned long flags; |
2778 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2838 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2779 | 2839 | ||
2780 | vmx->emulation_required = 1; | 2840 | /* |
2841 | * Update real mode segment cache. It may be not up-to-date if sement | ||
2842 | * register was written while vcpu was in a guest mode. | ||
2843 | */ | ||
2844 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); | ||
2845 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); | ||
2846 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); | ||
2847 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); | ||
2848 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); | ||
2849 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); | ||
2850 | |||
2781 | vmx->rmode.vm86_active = 0; | 2851 | vmx->rmode.vm86_active = 0; |
2782 | 2852 | ||
2783 | vmx_segment_cache_clear(vmx); | 2853 | vmx_segment_cache_clear(vmx); |
@@ -2794,22 +2864,16 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
2794 | 2864 | ||
2795 | update_exception_bitmap(vcpu); | 2865 | update_exception_bitmap(vcpu); |
2796 | 2866 | ||
2797 | if (emulate_invalid_guest_state) | 2867 | fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); |
2798 | return; | 2868 | fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); |
2799 | 2869 | fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); | |
2800 | fix_pmode_dataseg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); | 2870 | fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); |
2801 | fix_pmode_dataseg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); | 2871 | fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); |
2802 | fix_pmode_dataseg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); | 2872 | fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); |
2803 | fix_pmode_dataseg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); | ||
2804 | |||
2805 | vmx_segment_cache_clear(vmx); | ||
2806 | 2873 | ||
2807 | vmcs_write16(GUEST_SS_SELECTOR, 0); | 2874 | /* CPL is always 0 when CPU enters protected mode */ |
2808 | vmcs_write32(GUEST_SS_AR_BYTES, 0x93); | 2875 | __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); |
2809 | 2876 | vmx->cpl = 0; | |
2810 | vmcs_write16(GUEST_CS_SELECTOR, | ||
2811 | vmcs_read16(GUEST_CS_SELECTOR) & ~SELECTOR_RPL_MASK); | ||
2812 | vmcs_write32(GUEST_CS_AR_BYTES, 0x9b); | ||
2813 | } | 2877 | } |
2814 | 2878 | ||
2815 | static gva_t rmode_tss_base(struct kvm *kvm) | 2879 | static gva_t rmode_tss_base(struct kvm *kvm) |
@@ -2831,36 +2895,51 @@ static gva_t rmode_tss_base(struct kvm *kvm) | |||
2831 | static void fix_rmode_seg(int seg, struct kvm_segment *save) | 2895 | static void fix_rmode_seg(int seg, struct kvm_segment *save) |
2832 | { | 2896 | { |
2833 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 2897 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
2834 | 2898 | struct kvm_segment var = *save; | |
2835 | vmcs_write16(sf->selector, save->base >> 4); | 2899 | |
2836 | vmcs_write32(sf->base, save->base & 0xffff0); | 2900 | var.dpl = 0x3; |
2837 | vmcs_write32(sf->limit, 0xffff); | 2901 | if (seg == VCPU_SREG_CS) |
2838 | vmcs_write32(sf->ar_bytes, 0xf3); | 2902 | var.type = 0x3; |
2839 | if (save->base & 0xf) | 2903 | |
2840 | printk_once(KERN_WARNING "kvm: segment base is not paragraph" | 2904 | if (!emulate_invalid_guest_state) { |
2841 | " aligned when entering protected mode (seg=%d)", | 2905 | var.selector = var.base >> 4; |
2842 | seg); | 2906 | var.base = var.base & 0xffff0; |
2907 | var.limit = 0xffff; | ||
2908 | var.g = 0; | ||
2909 | var.db = 0; | ||
2910 | var.present = 1; | ||
2911 | var.s = 1; | ||
2912 | var.l = 0; | ||
2913 | var.unusable = 0; | ||
2914 | var.type = 0x3; | ||
2915 | var.avl = 0; | ||
2916 | if (save->base & 0xf) | ||
2917 | printk_once(KERN_WARNING "kvm: segment base is not " | ||
2918 | "paragraph aligned when entering " | ||
2919 | "protected mode (seg=%d)", seg); | ||
2920 | } | ||
2921 | |||
2922 | vmcs_write16(sf->selector, var.selector); | ||
2923 | vmcs_write32(sf->base, var.base); | ||
2924 | vmcs_write32(sf->limit, var.limit); | ||
2925 | vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var)); | ||
2843 | } | 2926 | } |
2844 | 2927 | ||
2845 | static void enter_rmode(struct kvm_vcpu *vcpu) | 2928 | static void enter_rmode(struct kvm_vcpu *vcpu) |
2846 | { | 2929 | { |
2847 | unsigned long flags; | 2930 | unsigned long flags; |
2848 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2931 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2849 | struct kvm_segment var; | ||
2850 | |||
2851 | if (enable_unrestricted_guest) | ||
2852 | return; | ||
2853 | 2932 | ||
2854 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); | 2933 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); |
2855 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); | 2934 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); |
2856 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); | 2935 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); |
2857 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); | 2936 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); |
2858 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); | 2937 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); |
2938 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); | ||
2939 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); | ||
2859 | 2940 | ||
2860 | vmx->emulation_required = 1; | ||
2861 | vmx->rmode.vm86_active = 1; | 2941 | vmx->rmode.vm86_active = 1; |
2862 | 2942 | ||
2863 | |||
2864 | /* | 2943 | /* |
2865 | * Very old userspace does not call KVM_SET_TSS_ADDR before entering | 2944 | * Very old userspace does not call KVM_SET_TSS_ADDR before entering |
2866 | * vcpu. Call it here with phys address pointing 16M below 4G. | 2945 | * vcpu. Call it here with phys address pointing 16M below 4G. |
@@ -2888,28 +2967,13 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
2888 | vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); | 2967 | vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); |
2889 | update_exception_bitmap(vcpu); | 2968 | update_exception_bitmap(vcpu); |
2890 | 2969 | ||
2891 | if (emulate_invalid_guest_state) | 2970 | fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); |
2892 | goto continue_rmode; | 2971 | fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); |
2893 | 2972 | fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); | |
2894 | vmx_get_segment(vcpu, &var, VCPU_SREG_SS); | 2973 | fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); |
2895 | vmx_set_segment(vcpu, &var, VCPU_SREG_SS); | 2974 | fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); |
2896 | 2975 | fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); | |
2897 | vmx_get_segment(vcpu, &var, VCPU_SREG_CS); | ||
2898 | vmx_set_segment(vcpu, &var, VCPU_SREG_CS); | ||
2899 | |||
2900 | vmx_get_segment(vcpu, &var, VCPU_SREG_ES); | ||
2901 | vmx_set_segment(vcpu, &var, VCPU_SREG_ES); | ||
2902 | |||
2903 | vmx_get_segment(vcpu, &var, VCPU_SREG_DS); | ||
2904 | vmx_set_segment(vcpu, &var, VCPU_SREG_DS); | ||
2905 | 2976 | ||
2906 | vmx_get_segment(vcpu, &var, VCPU_SREG_GS); | ||
2907 | vmx_set_segment(vcpu, &var, VCPU_SREG_GS); | ||
2908 | |||
2909 | vmx_get_segment(vcpu, &var, VCPU_SREG_FS); | ||
2910 | vmx_set_segment(vcpu, &var, VCPU_SREG_FS); | ||
2911 | |||
2912 | continue_rmode: | ||
2913 | kvm_mmu_reset_context(vcpu); | 2977 | kvm_mmu_reset_context(vcpu); |
2914 | } | 2978 | } |
2915 | 2979 | ||
@@ -3068,17 +3132,18 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
3068 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3132 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3069 | unsigned long hw_cr0; | 3133 | unsigned long hw_cr0; |
3070 | 3134 | ||
3135 | hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK); | ||
3071 | if (enable_unrestricted_guest) | 3136 | if (enable_unrestricted_guest) |
3072 | hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST) | 3137 | hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; |
3073 | | KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; | 3138 | else { |
3074 | else | 3139 | hw_cr0 |= KVM_VM_CR0_ALWAYS_ON; |
3075 | hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON; | ||
3076 | 3140 | ||
3077 | if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) | 3141 | if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) |
3078 | enter_pmode(vcpu); | 3142 | enter_pmode(vcpu); |
3079 | 3143 | ||
3080 | if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE)) | 3144 | if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE)) |
3081 | enter_rmode(vcpu); | 3145 | enter_rmode(vcpu); |
3146 | } | ||
3082 | 3147 | ||
3083 | #ifdef CONFIG_X86_64 | 3148 | #ifdef CONFIG_X86_64 |
3084 | if (vcpu->arch.efer & EFER_LME) { | 3149 | if (vcpu->arch.efer & EFER_LME) { |
@@ -3098,7 +3163,9 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
3098 | vmcs_writel(CR0_READ_SHADOW, cr0); | 3163 | vmcs_writel(CR0_READ_SHADOW, cr0); |
3099 | vmcs_writel(GUEST_CR0, hw_cr0); | 3164 | vmcs_writel(GUEST_CR0, hw_cr0); |
3100 | vcpu->arch.cr0 = cr0; | 3165 | vcpu->arch.cr0 = cr0; |
3101 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | 3166 | |
3167 | /* depends on vcpu->arch.cr0 to be set to a new value */ | ||
3168 | vmx->emulation_required = emulation_required(vcpu); | ||
3102 | } | 3169 | } |
3103 | 3170 | ||
3104 | static u64 construct_eptp(unsigned long root_hpa) | 3171 | static u64 construct_eptp(unsigned long root_hpa) |
@@ -3155,6 +3222,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
3155 | if (!is_paging(vcpu)) { | 3222 | if (!is_paging(vcpu)) { |
3156 | hw_cr4 &= ~X86_CR4_PAE; | 3223 | hw_cr4 &= ~X86_CR4_PAE; |
3157 | hw_cr4 |= X86_CR4_PSE; | 3224 | hw_cr4 |= X86_CR4_PSE; |
3225 | /* | ||
3226 | * SMEP is disabled if CPU is in non-paging mode in | ||
3227 | * hardware. However KVM always uses paging mode to | ||
3228 | * emulate guest non-paging mode with TDP. | ||
3229 | * To emulate this behavior, SMEP needs to be manually | ||
3230 | * disabled when guest switches to non-paging mode. | ||
3231 | */ | ||
3232 | hw_cr4 &= ~X86_CR4_SMEP; | ||
3158 | } else if (!(cr4 & X86_CR4_PAE)) { | 3233 | } else if (!(cr4 & X86_CR4_PAE)) { |
3159 | hw_cr4 &= ~X86_CR4_PAE; | 3234 | hw_cr4 &= ~X86_CR4_PAE; |
3160 | } | 3235 | } |
@@ -3171,10 +3246,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
3171 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3246 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3172 | u32 ar; | 3247 | u32 ar; |
3173 | 3248 | ||
3174 | if (vmx->rmode.vm86_active | 3249 | if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { |
3175 | && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES | ||
3176 | || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS | ||
3177 | || seg == VCPU_SREG_GS)) { | ||
3178 | *var = vmx->rmode.segs[seg]; | 3250 | *var = vmx->rmode.segs[seg]; |
3179 | if (seg == VCPU_SREG_TR | 3251 | if (seg == VCPU_SREG_TR |
3180 | || var->selector == vmx_read_guest_seg_selector(vmx, seg)) | 3252 | || var->selector == vmx_read_guest_seg_selector(vmx, seg)) |
@@ -3187,8 +3259,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
3187 | var->limit = vmx_read_guest_seg_limit(vmx, seg); | 3259 | var->limit = vmx_read_guest_seg_limit(vmx, seg); |
3188 | var->selector = vmx_read_guest_seg_selector(vmx, seg); | 3260 | var->selector = vmx_read_guest_seg_selector(vmx, seg); |
3189 | ar = vmx_read_guest_seg_ar(vmx, seg); | 3261 | ar = vmx_read_guest_seg_ar(vmx, seg); |
3190 | if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) | ||
3191 | ar = 0; | ||
3192 | var->type = ar & 15; | 3262 | var->type = ar & 15; |
3193 | var->s = (ar >> 4) & 1; | 3263 | var->s = (ar >> 4) & 1; |
3194 | var->dpl = (ar >> 5) & 3; | 3264 | var->dpl = (ar >> 5) & 3; |
@@ -3211,8 +3281,10 @@ static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) | |||
3211 | return vmx_read_guest_seg_base(to_vmx(vcpu), seg); | 3281 | return vmx_read_guest_seg_base(to_vmx(vcpu), seg); |
3212 | } | 3282 | } |
3213 | 3283 | ||
3214 | static int __vmx_get_cpl(struct kvm_vcpu *vcpu) | 3284 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) |
3215 | { | 3285 | { |
3286 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
3287 | |||
3216 | if (!is_protmode(vcpu)) | 3288 | if (!is_protmode(vcpu)) |
3217 | return 0; | 3289 | return 0; |
3218 | 3290 | ||
@@ -3220,24 +3292,9 @@ static int __vmx_get_cpl(struct kvm_vcpu *vcpu) | |||
3220 | && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ | 3292 | && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ |
3221 | return 3; | 3293 | return 3; |
3222 | 3294 | ||
3223 | return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3; | ||
3224 | } | ||
3225 | |||
3226 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) | ||
3227 | { | ||
3228 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
3229 | |||
3230 | /* | ||
3231 | * If we enter real mode with cs.sel & 3 != 0, the normal CPL calculations | ||
3232 | * fail; use the cache instead. | ||
3233 | */ | ||
3234 | if (unlikely(vmx->emulation_required && emulate_invalid_guest_state)) { | ||
3235 | return vmx->cpl; | ||
3236 | } | ||
3237 | |||
3238 | if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { | 3295 | if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { |
3239 | __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | 3296 | __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); |
3240 | vmx->cpl = __vmx_get_cpl(vcpu); | 3297 | vmx->cpl = vmx_read_guest_seg_selector(vmx, VCPU_SREG_CS) & 3; |
3241 | } | 3298 | } |
3242 | 3299 | ||
3243 | return vmx->cpl; | 3300 | return vmx->cpl; |
@@ -3269,28 +3326,23 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
3269 | { | 3326 | { |
3270 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3327 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3271 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 3328 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
3272 | u32 ar; | ||
3273 | 3329 | ||
3274 | vmx_segment_cache_clear(vmx); | 3330 | vmx_segment_cache_clear(vmx); |
3331 | if (seg == VCPU_SREG_CS) | ||
3332 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | ||
3275 | 3333 | ||
3276 | if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { | 3334 | if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { |
3277 | vmcs_write16(sf->selector, var->selector); | 3335 | vmx->rmode.segs[seg] = *var; |
3278 | vmx->rmode.segs[VCPU_SREG_TR] = *var; | 3336 | if (seg == VCPU_SREG_TR) |
3279 | return; | 3337 | vmcs_write16(sf->selector, var->selector); |
3338 | else if (var->s) | ||
3339 | fix_rmode_seg(seg, &vmx->rmode.segs[seg]); | ||
3340 | goto out; | ||
3280 | } | 3341 | } |
3342 | |||
3281 | vmcs_writel(sf->base, var->base); | 3343 | vmcs_writel(sf->base, var->base); |
3282 | vmcs_write32(sf->limit, var->limit); | 3344 | vmcs_write32(sf->limit, var->limit); |
3283 | vmcs_write16(sf->selector, var->selector); | 3345 | vmcs_write16(sf->selector, var->selector); |
3284 | if (vmx->rmode.vm86_active && var->s) { | ||
3285 | vmx->rmode.segs[seg] = *var; | ||
3286 | /* | ||
3287 | * Hack real-mode segments into vm86 compatibility. | ||
3288 | */ | ||
3289 | if (var->base == 0xffff0000 && var->selector == 0xf000) | ||
3290 | vmcs_writel(sf->base, 0xf0000); | ||
3291 | ar = 0xf3; | ||
3292 | } else | ||
3293 | ar = vmx_segment_access_rights(var); | ||
3294 | 3346 | ||
3295 | /* | 3347 | /* |
3296 | * Fix the "Accessed" bit in AR field of segment registers for older | 3348 | * Fix the "Accessed" bit in AR field of segment registers for older |
@@ -3304,42 +3356,12 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
3304 | * kvm hack. | 3356 | * kvm hack. |
3305 | */ | 3357 | */ |
3306 | if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR)) | 3358 | if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR)) |
3307 | ar |= 0x1; /* Accessed */ | 3359 | var->type |= 0x1; /* Accessed */ |
3308 | 3360 | ||
3309 | vmcs_write32(sf->ar_bytes, ar); | 3361 | vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); |
3310 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | ||
3311 | 3362 | ||
3312 | /* | 3363 | out: |
3313 | * Fix segments for real mode guest in hosts that don't have | 3364 | vmx->emulation_required |= emulation_required(vcpu); |
3314 | * "unrestricted_mode" or it was disabled. | ||
3315 | * This is done to allow migration of the guests from hosts with | ||
3316 | * unrestricted guest like Westmere to older host that don't have | ||
3317 | * unrestricted guest like Nehelem. | ||
3318 | */ | ||
3319 | if (vmx->rmode.vm86_active) { | ||
3320 | switch (seg) { | ||
3321 | case VCPU_SREG_CS: | ||
3322 | vmcs_write32(GUEST_CS_AR_BYTES, 0xf3); | ||
3323 | vmcs_write32(GUEST_CS_LIMIT, 0xffff); | ||
3324 | if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000) | ||
3325 | vmcs_writel(GUEST_CS_BASE, 0xf0000); | ||
3326 | vmcs_write16(GUEST_CS_SELECTOR, | ||
3327 | vmcs_readl(GUEST_CS_BASE) >> 4); | ||
3328 | break; | ||
3329 | case VCPU_SREG_ES: | ||
3330 | case VCPU_SREG_DS: | ||
3331 | case VCPU_SREG_GS: | ||
3332 | case VCPU_SREG_FS: | ||
3333 | fix_rmode_seg(seg, &vmx->rmode.segs[seg]); | ||
3334 | break; | ||
3335 | case VCPU_SREG_SS: | ||
3336 | vmcs_write16(GUEST_SS_SELECTOR, | ||
3337 | vmcs_readl(GUEST_SS_BASE) >> 4); | ||
3338 | vmcs_write32(GUEST_SS_LIMIT, 0xffff); | ||
3339 | vmcs_write32(GUEST_SS_AR_BYTES, 0xf3); | ||
3340 | break; | ||
3341 | } | ||
3342 | } | ||
3343 | } | 3365 | } |
3344 | 3366 | ||
3345 | static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | 3367 | static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) |
@@ -3380,13 +3402,16 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) | |||
3380 | u32 ar; | 3402 | u32 ar; |
3381 | 3403 | ||
3382 | vmx_get_segment(vcpu, &var, seg); | 3404 | vmx_get_segment(vcpu, &var, seg); |
3405 | var.dpl = 0x3; | ||
3406 | if (seg == VCPU_SREG_CS) | ||
3407 | var.type = 0x3; | ||
3383 | ar = vmx_segment_access_rights(&var); | 3408 | ar = vmx_segment_access_rights(&var); |
3384 | 3409 | ||
3385 | if (var.base != (var.selector << 4)) | 3410 | if (var.base != (var.selector << 4)) |
3386 | return false; | 3411 | return false; |
3387 | if (var.limit < 0xffff) | 3412 | if (var.limit != 0xffff) |
3388 | return false; | 3413 | return false; |
3389 | if (((ar | (3 << AR_DPL_SHIFT)) & ~(AR_G_MASK | AR_DB_MASK)) != 0xf3) | 3414 | if (ar != 0xf3) |
3390 | return false; | 3415 | return false; |
3391 | 3416 | ||
3392 | return true; | 3417 | return true; |
@@ -3521,6 +3546,9 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) | |||
3521 | */ | 3546 | */ |
3522 | static bool guest_state_valid(struct kvm_vcpu *vcpu) | 3547 | static bool guest_state_valid(struct kvm_vcpu *vcpu) |
3523 | { | 3548 | { |
3549 | if (enable_unrestricted_guest) | ||
3550 | return true; | ||
3551 | |||
3524 | /* real mode guest state checks */ | 3552 | /* real mode guest state checks */ |
3525 | if (!is_protmode(vcpu)) { | 3553 | if (!is_protmode(vcpu)) { |
3526 | if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) | 3554 | if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) |
@@ -3644,12 +3672,9 @@ static void seg_setup(int seg) | |||
3644 | vmcs_write16(sf->selector, 0); | 3672 | vmcs_write16(sf->selector, 0); |
3645 | vmcs_writel(sf->base, 0); | 3673 | vmcs_writel(sf->base, 0); |
3646 | vmcs_write32(sf->limit, 0xffff); | 3674 | vmcs_write32(sf->limit, 0xffff); |
3647 | if (enable_unrestricted_guest) { | 3675 | ar = 0x93; |
3648 | ar = 0x93; | 3676 | if (seg == VCPU_SREG_CS) |
3649 | if (seg == VCPU_SREG_CS) | 3677 | ar |= 0x08; /* code segment */ |
3650 | ar |= 0x08; /* code segment */ | ||
3651 | } else | ||
3652 | ar = 0xf3; | ||
3653 | 3678 | ||
3654 | vmcs_write32(sf->ar_bytes, ar); | 3679 | vmcs_write32(sf->ar_bytes, ar); |
3655 | } | 3680 | } |
@@ -3667,7 +3692,7 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
3667 | kvm_userspace_mem.flags = 0; | 3692 | kvm_userspace_mem.flags = 0; |
3668 | kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; | 3693 | kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; |
3669 | kvm_userspace_mem.memory_size = PAGE_SIZE; | 3694 | kvm_userspace_mem.memory_size = PAGE_SIZE; |
3670 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); | 3695 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); |
3671 | if (r) | 3696 | if (r) |
3672 | goto out; | 3697 | goto out; |
3673 | 3698 | ||
@@ -3697,7 +3722,7 @@ static int alloc_identity_pagetable(struct kvm *kvm) | |||
3697 | kvm_userspace_mem.guest_phys_addr = | 3722 | kvm_userspace_mem.guest_phys_addr = |
3698 | kvm->arch.ept_identity_map_addr; | 3723 | kvm->arch.ept_identity_map_addr; |
3699 | kvm_userspace_mem.memory_size = PAGE_SIZE; | 3724 | kvm_userspace_mem.memory_size = PAGE_SIZE; |
3700 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); | 3725 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); |
3701 | if (r) | 3726 | if (r) |
3702 | goto out; | 3727 | goto out; |
3703 | 3728 | ||
@@ -3739,7 +3764,10 @@ static void free_vpid(struct vcpu_vmx *vmx) | |||
3739 | spin_unlock(&vmx_vpid_lock); | 3764 | spin_unlock(&vmx_vpid_lock); |
3740 | } | 3765 | } |
3741 | 3766 | ||
3742 | static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) | 3767 | #define MSR_TYPE_R 1 |
3768 | #define MSR_TYPE_W 2 | ||
3769 | static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, | ||
3770 | u32 msr, int type) | ||
3743 | { | 3771 | { |
3744 | int f = sizeof(unsigned long); | 3772 | int f = sizeof(unsigned long); |
3745 | 3773 | ||
@@ -3752,20 +3780,93 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) | |||
3752 | * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. | 3780 | * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. |
3753 | */ | 3781 | */ |
3754 | if (msr <= 0x1fff) { | 3782 | if (msr <= 0x1fff) { |
3755 | __clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */ | 3783 | if (type & MSR_TYPE_R) |
3756 | __clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */ | 3784 | /* read-low */ |
3785 | __clear_bit(msr, msr_bitmap + 0x000 / f); | ||
3786 | |||
3787 | if (type & MSR_TYPE_W) | ||
3788 | /* write-low */ | ||
3789 | __clear_bit(msr, msr_bitmap + 0x800 / f); | ||
3790 | |||
3757 | } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { | 3791 | } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { |
3758 | msr &= 0x1fff; | 3792 | msr &= 0x1fff; |
3759 | __clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */ | 3793 | if (type & MSR_TYPE_R) |
3760 | __clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */ | 3794 | /* read-high */ |
3795 | __clear_bit(msr, msr_bitmap + 0x400 / f); | ||
3796 | |||
3797 | if (type & MSR_TYPE_W) | ||
3798 | /* write-high */ | ||
3799 | __clear_bit(msr, msr_bitmap + 0xc00 / f); | ||
3800 | |||
3801 | } | ||
3802 | } | ||
3803 | |||
3804 | static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, | ||
3805 | u32 msr, int type) | ||
3806 | { | ||
3807 | int f = sizeof(unsigned long); | ||
3808 | |||
3809 | if (!cpu_has_vmx_msr_bitmap()) | ||
3810 | return; | ||
3811 | |||
3812 | /* | ||
3813 | * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals | ||
3814 | * have the write-low and read-high bitmap offsets the wrong way round. | ||
3815 | * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. | ||
3816 | */ | ||
3817 | if (msr <= 0x1fff) { | ||
3818 | if (type & MSR_TYPE_R) | ||
3819 | /* read-low */ | ||
3820 | __set_bit(msr, msr_bitmap + 0x000 / f); | ||
3821 | |||
3822 | if (type & MSR_TYPE_W) | ||
3823 | /* write-low */ | ||
3824 | __set_bit(msr, msr_bitmap + 0x800 / f); | ||
3825 | |||
3826 | } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { | ||
3827 | msr &= 0x1fff; | ||
3828 | if (type & MSR_TYPE_R) | ||
3829 | /* read-high */ | ||
3830 | __set_bit(msr, msr_bitmap + 0x400 / f); | ||
3831 | |||
3832 | if (type & MSR_TYPE_W) | ||
3833 | /* write-high */ | ||
3834 | __set_bit(msr, msr_bitmap + 0xc00 / f); | ||
3835 | |||
3761 | } | 3836 | } |
3762 | } | 3837 | } |
3763 | 3838 | ||
3764 | static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) | 3839 | static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) |
3765 | { | 3840 | { |
3766 | if (!longmode_only) | 3841 | if (!longmode_only) |
3767 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr); | 3842 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, |
3768 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr); | 3843 | msr, MSR_TYPE_R | MSR_TYPE_W); |
3844 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, | ||
3845 | msr, MSR_TYPE_R | MSR_TYPE_W); | ||
3846 | } | ||
3847 | |||
3848 | static void vmx_enable_intercept_msr_read_x2apic(u32 msr) | ||
3849 | { | ||
3850 | __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, | ||
3851 | msr, MSR_TYPE_R); | ||
3852 | __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, | ||
3853 | msr, MSR_TYPE_R); | ||
3854 | } | ||
3855 | |||
3856 | static void vmx_disable_intercept_msr_read_x2apic(u32 msr) | ||
3857 | { | ||
3858 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, | ||
3859 | msr, MSR_TYPE_R); | ||
3860 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, | ||
3861 | msr, MSR_TYPE_R); | ||
3862 | } | ||
3863 | |||
3864 | static void vmx_disable_intercept_msr_write_x2apic(u32 msr) | ||
3865 | { | ||
3866 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, | ||
3867 | msr, MSR_TYPE_W); | ||
3868 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, | ||
3869 | msr, MSR_TYPE_W); | ||
3769 | } | 3870 | } |
3770 | 3871 | ||
3771 | /* | 3872 | /* |
@@ -3844,6 +3945,11 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) | |||
3844 | return exec_control; | 3945 | return exec_control; |
3845 | } | 3946 | } |
3846 | 3947 | ||
3948 | static int vmx_vm_has_apicv(struct kvm *kvm) | ||
3949 | { | ||
3950 | return enable_apicv_reg_vid && irqchip_in_kernel(kvm); | ||
3951 | } | ||
3952 | |||
3847 | static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | 3953 | static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) |
3848 | { | 3954 | { |
3849 | u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; | 3955 | u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; |
@@ -3861,6 +3967,10 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | |||
3861 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; | 3967 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; |
3862 | if (!ple_gap) | 3968 | if (!ple_gap) |
3863 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; | 3969 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; |
3970 | if (!vmx_vm_has_apicv(vmx->vcpu.kvm)) | ||
3971 | exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | | ||
3972 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); | ||
3973 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | ||
3864 | return exec_control; | 3974 | return exec_control; |
3865 | } | 3975 | } |
3866 | 3976 | ||
@@ -3905,6 +4015,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
3905 | vmx_secondary_exec_control(vmx)); | 4015 | vmx_secondary_exec_control(vmx)); |
3906 | } | 4016 | } |
3907 | 4017 | ||
4018 | if (enable_apicv_reg_vid) { | ||
4019 | vmcs_write64(EOI_EXIT_BITMAP0, 0); | ||
4020 | vmcs_write64(EOI_EXIT_BITMAP1, 0); | ||
4021 | vmcs_write64(EOI_EXIT_BITMAP2, 0); | ||
4022 | vmcs_write64(EOI_EXIT_BITMAP3, 0); | ||
4023 | |||
4024 | vmcs_write16(GUEST_INTR_STATUS, 0); | ||
4025 | } | ||
4026 | |||
3908 | if (ple_gap) { | 4027 | if (ple_gap) { |
3909 | vmcs_write32(PLE_GAP, ple_gap); | 4028 | vmcs_write32(PLE_GAP, ple_gap); |
3910 | vmcs_write32(PLE_WINDOW, ple_window); | 4029 | vmcs_write32(PLE_WINDOW, ple_window); |
@@ -3990,14 +4109,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
3990 | vmx_segment_cache_clear(vmx); | 4109 | vmx_segment_cache_clear(vmx); |
3991 | 4110 | ||
3992 | seg_setup(VCPU_SREG_CS); | 4111 | seg_setup(VCPU_SREG_CS); |
3993 | /* | 4112 | if (kvm_vcpu_is_bsp(&vmx->vcpu)) |
3994 | * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode | ||
3995 | * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh. | ||
3996 | */ | ||
3997 | if (kvm_vcpu_is_bsp(&vmx->vcpu)) { | ||
3998 | vmcs_write16(GUEST_CS_SELECTOR, 0xf000); | 4113 | vmcs_write16(GUEST_CS_SELECTOR, 0xf000); |
3999 | vmcs_writel(GUEST_CS_BASE, 0x000f0000); | 4114 | else { |
4000 | } else { | ||
4001 | vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); | 4115 | vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); |
4002 | vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); | 4116 | vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); |
4003 | } | 4117 | } |
@@ -4073,9 +4187,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
4073 | 4187 | ||
4074 | ret = 0; | 4188 | ret = 0; |
4075 | 4189 | ||
4076 | /* HACK: Don't enable emulation on guest boot/reset */ | ||
4077 | vmx->emulation_required = 0; | ||
4078 | |||
4079 | return ret; | 4190 | return ret; |
4080 | } | 4191 | } |
4081 | 4192 | ||
@@ -4251,7 +4362,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) | |||
4251 | .flags = 0, | 4362 | .flags = 0, |
4252 | }; | 4363 | }; |
4253 | 4364 | ||
4254 | ret = kvm_set_memory_region(kvm, &tss_mem, 0); | 4365 | ret = kvm_set_memory_region(kvm, &tss_mem, false); |
4255 | if (ret) | 4366 | if (ret) |
4256 | return ret; | 4367 | return ret; |
4257 | kvm->arch.tss_addr = addr; | 4368 | kvm->arch.tss_addr = addr; |
@@ -4261,28 +4372,9 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) | |||
4261 | return 0; | 4372 | return 0; |
4262 | } | 4373 | } |
4263 | 4374 | ||
4264 | static int handle_rmode_exception(struct kvm_vcpu *vcpu, | 4375 | static bool rmode_exception(struct kvm_vcpu *vcpu, int vec) |
4265 | int vec, u32 err_code) | ||
4266 | { | 4376 | { |
4267 | /* | ||
4268 | * Instruction with address size override prefix opcode 0x67 | ||
4269 | * Cause the #SS fault with 0 error code in VM86 mode. | ||
4270 | */ | ||
4271 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) | ||
4272 | if (emulate_instruction(vcpu, 0) == EMULATE_DONE) | ||
4273 | return 1; | ||
4274 | /* | ||
4275 | * Forward all other exceptions that are valid in real mode. | ||
4276 | * FIXME: Breaks guest debugging in real mode, needs to be fixed with | ||
4277 | * the required debugging infrastructure rework. | ||
4278 | */ | ||
4279 | switch (vec) { | 4377 | switch (vec) { |
4280 | case DB_VECTOR: | ||
4281 | if (vcpu->guest_debug & | ||
4282 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) | ||
4283 | return 0; | ||
4284 | kvm_queue_exception(vcpu, vec); | ||
4285 | return 1; | ||
4286 | case BP_VECTOR: | 4378 | case BP_VECTOR: |
4287 | /* | 4379 | /* |
4288 | * Update instruction length as we may reinject the exception | 4380 | * Update instruction length as we may reinject the exception |
@@ -4291,7 +4383,12 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
4291 | to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = | 4383 | to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = |
4292 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | 4384 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); |
4293 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) | 4385 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) |
4294 | return 0; | 4386 | return false; |
4387 | /* fall through */ | ||
4388 | case DB_VECTOR: | ||
4389 | if (vcpu->guest_debug & | ||
4390 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) | ||
4391 | return false; | ||
4295 | /* fall through */ | 4392 | /* fall through */ |
4296 | case DE_VECTOR: | 4393 | case DE_VECTOR: |
4297 | case OF_VECTOR: | 4394 | case OF_VECTOR: |
@@ -4301,10 +4398,37 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
4301 | case SS_VECTOR: | 4398 | case SS_VECTOR: |
4302 | case GP_VECTOR: | 4399 | case GP_VECTOR: |
4303 | case MF_VECTOR: | 4400 | case MF_VECTOR: |
4304 | kvm_queue_exception(vcpu, vec); | 4401 | return true; |
4305 | return 1; | 4402 | break; |
4306 | } | 4403 | } |
4307 | return 0; | 4404 | return false; |
4405 | } | ||
4406 | |||
4407 | static int handle_rmode_exception(struct kvm_vcpu *vcpu, | ||
4408 | int vec, u32 err_code) | ||
4409 | { | ||
4410 | /* | ||
4411 | * Instruction with address size override prefix opcode 0x67 | ||
4412 | * Cause the #SS fault with 0 error code in VM86 mode. | ||
4413 | */ | ||
4414 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) { | ||
4415 | if (emulate_instruction(vcpu, 0) == EMULATE_DONE) { | ||
4416 | if (vcpu->arch.halt_request) { | ||
4417 | vcpu->arch.halt_request = 0; | ||
4418 | return kvm_emulate_halt(vcpu); | ||
4419 | } | ||
4420 | return 1; | ||
4421 | } | ||
4422 | return 0; | ||
4423 | } | ||
4424 | |||
4425 | /* | ||
4426 | * Forward all other exceptions that are valid in real mode. | ||
4427 | * FIXME: Breaks guest debugging in real mode, needs to be fixed with | ||
4428 | * the required debugging infrastructure rework. | ||
4429 | */ | ||
4430 | kvm_queue_exception(vcpu, vec); | ||
4431 | return 1; | ||
4308 | } | 4432 | } |
4309 | 4433 | ||
4310 | /* | 4434 | /* |
@@ -4392,17 +4516,11 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
4392 | return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0); | 4516 | return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0); |
4393 | } | 4517 | } |
4394 | 4518 | ||
4395 | if (vmx->rmode.vm86_active && | ||
4396 | handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK, | ||
4397 | error_code)) { | ||
4398 | if (vcpu->arch.halt_request) { | ||
4399 | vcpu->arch.halt_request = 0; | ||
4400 | return kvm_emulate_halt(vcpu); | ||
4401 | } | ||
4402 | return 1; | ||
4403 | } | ||
4404 | |||
4405 | ex_no = intr_info & INTR_INFO_VECTOR_MASK; | 4519 | ex_no = intr_info & INTR_INFO_VECTOR_MASK; |
4520 | |||
4521 | if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no)) | ||
4522 | return handle_rmode_exception(vcpu, ex_no, error_code); | ||
4523 | |||
4406 | switch (ex_no) { | 4524 | switch (ex_no) { |
4407 | case DB_VECTOR: | 4525 | case DB_VECTOR: |
4408 | dr6 = vmcs_readl(EXIT_QUALIFICATION); | 4526 | dr6 = vmcs_readl(EXIT_QUALIFICATION); |
@@ -4820,6 +4938,26 @@ static int handle_apic_access(struct kvm_vcpu *vcpu) | |||
4820 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; | 4938 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; |
4821 | } | 4939 | } |
4822 | 4940 | ||
4941 | static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) | ||
4942 | { | ||
4943 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | ||
4944 | int vector = exit_qualification & 0xff; | ||
4945 | |||
4946 | /* EOI-induced VM exit is trap-like and thus no need to adjust IP */ | ||
4947 | kvm_apic_set_eoi_accelerated(vcpu, vector); | ||
4948 | return 1; | ||
4949 | } | ||
4950 | |||
4951 | static int handle_apic_write(struct kvm_vcpu *vcpu) | ||
4952 | { | ||
4953 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | ||
4954 | u32 offset = exit_qualification & 0xfff; | ||
4955 | |||
4956 | /* APIC-write VM exit is trap-like and thus no need to adjust IP */ | ||
4957 | kvm_apic_write_nodecode(vcpu, offset); | ||
4958 | return 1; | ||
4959 | } | ||
4960 | |||
4823 | static int handle_task_switch(struct kvm_vcpu *vcpu) | 4961 | static int handle_task_switch(struct kvm_vcpu *vcpu) |
4824 | { | 4962 | { |
4825 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 4963 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -5065,7 +5203,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
5065 | schedule(); | 5203 | schedule(); |
5066 | } | 5204 | } |
5067 | 5205 | ||
5068 | vmx->emulation_required = !guest_state_valid(vcpu); | 5206 | vmx->emulation_required = emulation_required(vcpu); |
5069 | out: | 5207 | out: |
5070 | return ret; | 5208 | return ret; |
5071 | } | 5209 | } |
@@ -5754,6 +5892,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
5754 | [EXIT_REASON_VMON] = handle_vmon, | 5892 | [EXIT_REASON_VMON] = handle_vmon, |
5755 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, | 5893 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, |
5756 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, | 5894 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, |
5895 | [EXIT_REASON_APIC_WRITE] = handle_apic_write, | ||
5896 | [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced, | ||
5757 | [EXIT_REASON_WBINVD] = handle_wbinvd, | 5897 | [EXIT_REASON_WBINVD] = handle_wbinvd, |
5758 | [EXIT_REASON_XSETBV] = handle_xsetbv, | 5898 | [EXIT_REASON_XSETBV] = handle_xsetbv, |
5759 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, | 5899 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, |
@@ -5780,7 +5920,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, | |||
5780 | u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX]; | 5920 | u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX]; |
5781 | gpa_t bitmap; | 5921 | gpa_t bitmap; |
5782 | 5922 | ||
5783 | if (!nested_cpu_has(get_vmcs12(vcpu), CPU_BASED_USE_MSR_BITMAPS)) | 5923 | if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) |
5784 | return 1; | 5924 | return 1; |
5785 | 5925 | ||
5786 | /* | 5926 | /* |
@@ -6008,7 +6148,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
6008 | u32 vectoring_info = vmx->idt_vectoring_info; | 6148 | u32 vectoring_info = vmx->idt_vectoring_info; |
6009 | 6149 | ||
6010 | /* If guest state is invalid, start emulating */ | 6150 | /* If guest state is invalid, start emulating */ |
6011 | if (vmx->emulation_required && emulate_invalid_guest_state) | 6151 | if (vmx->emulation_required) |
6012 | return handle_invalid_guest_state(vcpu); | 6152 | return handle_invalid_guest_state(vcpu); |
6013 | 6153 | ||
6014 | /* | 6154 | /* |
@@ -6103,6 +6243,85 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | |||
6103 | vmcs_write32(TPR_THRESHOLD, irr); | 6243 | vmcs_write32(TPR_THRESHOLD, irr); |
6104 | } | 6244 | } |
6105 | 6245 | ||
6246 | static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) | ||
6247 | { | ||
6248 | u32 sec_exec_control; | ||
6249 | |||
6250 | /* | ||
6251 | * There is not point to enable virtualize x2apic without enable | ||
6252 | * apicv | ||
6253 | */ | ||
6254 | if (!cpu_has_vmx_virtualize_x2apic_mode() || | ||
6255 | !vmx_vm_has_apicv(vcpu->kvm)) | ||
6256 | return; | ||
6257 | |||
6258 | if (!vm_need_tpr_shadow(vcpu->kvm)) | ||
6259 | return; | ||
6260 | |||
6261 | sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
6262 | |||
6263 | if (set) { | ||
6264 | sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | ||
6265 | sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | ||
6266 | } else { | ||
6267 | sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | ||
6268 | sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | ||
6269 | } | ||
6270 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); | ||
6271 | |||
6272 | vmx_set_msr_bitmap(vcpu); | ||
6273 | } | ||
6274 | |||
6275 | static void vmx_hwapic_isr_update(struct kvm *kvm, int isr) | ||
6276 | { | ||
6277 | u16 status; | ||
6278 | u8 old; | ||
6279 | |||
6280 | if (!vmx_vm_has_apicv(kvm)) | ||
6281 | return; | ||
6282 | |||
6283 | if (isr == -1) | ||
6284 | isr = 0; | ||
6285 | |||
6286 | status = vmcs_read16(GUEST_INTR_STATUS); | ||
6287 | old = status >> 8; | ||
6288 | if (isr != old) { | ||
6289 | status &= 0xff; | ||
6290 | status |= isr << 8; | ||
6291 | vmcs_write16(GUEST_INTR_STATUS, status); | ||
6292 | } | ||
6293 | } | ||
6294 | |||
6295 | static void vmx_set_rvi(int vector) | ||
6296 | { | ||
6297 | u16 status; | ||
6298 | u8 old; | ||
6299 | |||
6300 | status = vmcs_read16(GUEST_INTR_STATUS); | ||
6301 | old = (u8)status & 0xff; | ||
6302 | if ((u8)vector != old) { | ||
6303 | status &= ~0xff; | ||
6304 | status |= (u8)vector; | ||
6305 | vmcs_write16(GUEST_INTR_STATUS, status); | ||
6306 | } | ||
6307 | } | ||
6308 | |||
6309 | static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) | ||
6310 | { | ||
6311 | if (max_irr == -1) | ||
6312 | return; | ||
6313 | |||
6314 | vmx_set_rvi(max_irr); | ||
6315 | } | ||
6316 | |||
6317 | static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | ||
6318 | { | ||
6319 | vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); | ||
6320 | vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); | ||
6321 | vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); | ||
6322 | vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]); | ||
6323 | } | ||
6324 | |||
6106 | static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) | 6325 | static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) |
6107 | { | 6326 | { |
6108 | u32 exit_intr_info; | 6327 | u32 exit_intr_info; |
@@ -6291,7 +6510,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6291 | 6510 | ||
6292 | /* Don't enter VMX if guest state is invalid, let the exit handler | 6511 | /* Don't enter VMX if guest state is invalid, let the exit handler |
6293 | start emulation until we arrive back to a valid state */ | 6512 | start emulation until we arrive back to a valid state */ |
6294 | if (vmx->emulation_required && emulate_invalid_guest_state) | 6513 | if (vmx->emulation_required) |
6295 | return; | 6514 | return; |
6296 | 6515 | ||
6297 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) | 6516 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) |
@@ -7366,6 +7585,11 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
7366 | .enable_nmi_window = enable_nmi_window, | 7585 | .enable_nmi_window = enable_nmi_window, |
7367 | .enable_irq_window = enable_irq_window, | 7586 | .enable_irq_window = enable_irq_window, |
7368 | .update_cr8_intercept = update_cr8_intercept, | 7587 | .update_cr8_intercept = update_cr8_intercept, |
7588 | .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, | ||
7589 | .vm_has_apicv = vmx_vm_has_apicv, | ||
7590 | .load_eoi_exitmap = vmx_load_eoi_exitmap, | ||
7591 | .hwapic_irr_update = vmx_hwapic_irr_update, | ||
7592 | .hwapic_isr_update = vmx_hwapic_isr_update, | ||
7369 | 7593 | ||
7370 | .set_tss_addr = vmx_set_tss_addr, | 7594 | .set_tss_addr = vmx_set_tss_addr, |
7371 | .get_tdp_level = get_ept_level, | 7595 | .get_tdp_level = get_ept_level, |
@@ -7398,7 +7622,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
7398 | 7622 | ||
7399 | static int __init vmx_init(void) | 7623 | static int __init vmx_init(void) |
7400 | { | 7624 | { |
7401 | int r, i; | 7625 | int r, i, msr; |
7402 | 7626 | ||
7403 | rdmsrl_safe(MSR_EFER, &host_efer); | 7627 | rdmsrl_safe(MSR_EFER, &host_efer); |
7404 | 7628 | ||
@@ -7419,11 +7643,19 @@ static int __init vmx_init(void) | |||
7419 | if (!vmx_msr_bitmap_legacy) | 7643 | if (!vmx_msr_bitmap_legacy) |
7420 | goto out1; | 7644 | goto out1; |
7421 | 7645 | ||
7646 | vmx_msr_bitmap_legacy_x2apic = | ||
7647 | (unsigned long *)__get_free_page(GFP_KERNEL); | ||
7648 | if (!vmx_msr_bitmap_legacy_x2apic) | ||
7649 | goto out2; | ||
7422 | 7650 | ||
7423 | vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); | 7651 | vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); |
7424 | if (!vmx_msr_bitmap_longmode) | 7652 | if (!vmx_msr_bitmap_longmode) |
7425 | goto out2; | 7653 | goto out3; |
7426 | 7654 | ||
7655 | vmx_msr_bitmap_longmode_x2apic = | ||
7656 | (unsigned long *)__get_free_page(GFP_KERNEL); | ||
7657 | if (!vmx_msr_bitmap_longmode_x2apic) | ||
7658 | goto out4; | ||
7427 | 7659 | ||
7428 | /* | 7660 | /* |
7429 | * Allow direct access to the PC debug port (it is often used for I/O | 7661 | * Allow direct access to the PC debug port (it is often used for I/O |
@@ -7455,6 +7687,28 @@ static int __init vmx_init(void) | |||
7455 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); | 7687 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); |
7456 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); | 7688 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); |
7457 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); | 7689 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); |
7690 | memcpy(vmx_msr_bitmap_legacy_x2apic, | ||
7691 | vmx_msr_bitmap_legacy, PAGE_SIZE); | ||
7692 | memcpy(vmx_msr_bitmap_longmode_x2apic, | ||
7693 | vmx_msr_bitmap_longmode, PAGE_SIZE); | ||
7694 | |||
7695 | if (enable_apicv_reg_vid) { | ||
7696 | for (msr = 0x800; msr <= 0x8ff; msr++) | ||
7697 | vmx_disable_intercept_msr_read_x2apic(msr); | ||
7698 | |||
7699 | /* According SDM, in x2apic mode, the whole id reg is used. | ||
7700 | * But in KVM, it only use the highest eight bits. Need to | ||
7701 | * intercept it */ | ||
7702 | vmx_enable_intercept_msr_read_x2apic(0x802); | ||
7703 | /* TMCCT */ | ||
7704 | vmx_enable_intercept_msr_read_x2apic(0x839); | ||
7705 | /* TPR */ | ||
7706 | vmx_disable_intercept_msr_write_x2apic(0x808); | ||
7707 | /* EOI */ | ||
7708 | vmx_disable_intercept_msr_write_x2apic(0x80b); | ||
7709 | /* SELF-IPI */ | ||
7710 | vmx_disable_intercept_msr_write_x2apic(0x83f); | ||
7711 | } | ||
7458 | 7712 | ||
7459 | if (enable_ept) { | 7713 | if (enable_ept) { |
7460 | kvm_mmu_set_mask_ptes(0ull, | 7714 | kvm_mmu_set_mask_ptes(0ull, |
@@ -7468,8 +7722,10 @@ static int __init vmx_init(void) | |||
7468 | 7722 | ||
7469 | return 0; | 7723 | return 0; |
7470 | 7724 | ||
7471 | out3: | 7725 | out4: |
7472 | free_page((unsigned long)vmx_msr_bitmap_longmode); | 7726 | free_page((unsigned long)vmx_msr_bitmap_longmode); |
7727 | out3: | ||
7728 | free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); | ||
7473 | out2: | 7729 | out2: |
7474 | free_page((unsigned long)vmx_msr_bitmap_legacy); | 7730 | free_page((unsigned long)vmx_msr_bitmap_legacy); |
7475 | out1: | 7731 | out1: |
@@ -7481,6 +7737,8 @@ out: | |||
7481 | 7737 | ||
7482 | static void __exit vmx_exit(void) | 7738 | static void __exit vmx_exit(void) |
7483 | { | 7739 | { |
7740 | free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); | ||
7741 | free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); | ||
7484 | free_page((unsigned long)vmx_msr_bitmap_legacy); | 7742 | free_page((unsigned long)vmx_msr_bitmap_legacy); |
7485 | free_page((unsigned long)vmx_msr_bitmap_longmode); | 7743 | free_page((unsigned long)vmx_msr_bitmap_longmode); |
7486 | free_page((unsigned long)vmx_io_bitmap_b); | 7744 | free_page((unsigned long)vmx_io_bitmap_b); |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 37040079cd6b..f71500af1f81 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -872,8 +872,6 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
872 | 872 | ||
873 | kvm_x86_ops->set_efer(vcpu, efer); | 873 | kvm_x86_ops->set_efer(vcpu, efer); |
874 | 874 | ||
875 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; | ||
876 | |||
877 | /* Update reserved bits */ | 875 | /* Update reserved bits */ |
878 | if ((efer ^ old_efer) & EFER_NX) | 876 | if ((efer ^ old_efer) & EFER_NX) |
879 | kvm_mmu_reset_context(vcpu); | 877 | kvm_mmu_reset_context(vcpu); |
@@ -2522,7 +2520,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
2522 | r = KVM_MAX_VCPUS; | 2520 | r = KVM_MAX_VCPUS; |
2523 | break; | 2521 | break; |
2524 | case KVM_CAP_NR_MEMSLOTS: | 2522 | case KVM_CAP_NR_MEMSLOTS: |
2525 | r = KVM_MEMORY_SLOTS; | 2523 | r = KVM_USER_MEM_SLOTS; |
2526 | break; | 2524 | break; |
2527 | case KVM_CAP_PV_MMU: /* obsolete */ | 2525 | case KVM_CAP_PV_MMU: /* obsolete */ |
2528 | r = 0; | 2526 | r = 0; |
@@ -3274,12 +3272,10 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, | |||
3274 | return -EINVAL; | 3272 | return -EINVAL; |
3275 | 3273 | ||
3276 | mutex_lock(&kvm->slots_lock); | 3274 | mutex_lock(&kvm->slots_lock); |
3277 | spin_lock(&kvm->mmu_lock); | ||
3278 | 3275 | ||
3279 | kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); | 3276 | kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); |
3280 | kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; | 3277 | kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; |
3281 | 3278 | ||
3282 | spin_unlock(&kvm->mmu_lock); | ||
3283 | mutex_unlock(&kvm->slots_lock); | 3279 | mutex_unlock(&kvm->slots_lock); |
3284 | return 0; | 3280 | return 0; |
3285 | } | 3281 | } |
@@ -3439,7 +3435,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) | |||
3439 | mutex_lock(&kvm->slots_lock); | 3435 | mutex_lock(&kvm->slots_lock); |
3440 | 3436 | ||
3441 | r = -EINVAL; | 3437 | r = -EINVAL; |
3442 | if (log->slot >= KVM_MEMORY_SLOTS) | 3438 | if (log->slot >= KVM_USER_MEM_SLOTS) |
3443 | goto out; | 3439 | goto out; |
3444 | 3440 | ||
3445 | memslot = id_to_memslot(kvm->memslots, log->slot); | 3441 | memslot = id_to_memslot(kvm->memslots, log->slot); |
@@ -4495,8 +4491,10 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector, | |||
4495 | kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); | 4491 | kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); |
4496 | *selector = var.selector; | 4492 | *selector = var.selector; |
4497 | 4493 | ||
4498 | if (var.unusable) | 4494 | if (var.unusable) { |
4495 | memset(desc, 0, sizeof(*desc)); | ||
4499 | return false; | 4496 | return false; |
4497 | } | ||
4500 | 4498 | ||
4501 | if (var.g) | 4499 | if (var.g) |
4502 | var.limit >>= 12; | 4500 | var.limit >>= 12; |
@@ -4757,26 +4755,26 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) | |||
4757 | return r; | 4755 | return r; |
4758 | } | 4756 | } |
4759 | 4757 | ||
4760 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | 4758 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, |
4759 | bool write_fault_to_shadow_pgtable) | ||
4761 | { | 4760 | { |
4762 | gpa_t gpa; | 4761 | gpa_t gpa = cr2; |
4763 | pfn_t pfn; | 4762 | pfn_t pfn; |
4764 | 4763 | ||
4765 | if (tdp_enabled) | 4764 | if (!vcpu->arch.mmu.direct_map) { |
4766 | return false; | 4765 | /* |
4767 | 4766 | * Write permission should be allowed since only | |
4768 | /* | 4767 | * write access need to be emulated. |
4769 | * if emulation was due to access to shadowed page table | 4768 | */ |
4770 | * and it failed try to unshadow page and re-enter the | 4769 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); |
4771 | * guest to let CPU execute the instruction. | ||
4772 | */ | ||
4773 | if (kvm_mmu_unprotect_page_virt(vcpu, gva)) | ||
4774 | return true; | ||
4775 | |||
4776 | gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL); | ||
4777 | 4770 | ||
4778 | if (gpa == UNMAPPED_GVA) | 4771 | /* |
4779 | return true; /* let cpu generate fault */ | 4772 | * If the mapping is invalid in guest, let cpu retry |
4773 | * it to generate fault. | ||
4774 | */ | ||
4775 | if (gpa == UNMAPPED_GVA) | ||
4776 | return true; | ||
4777 | } | ||
4780 | 4778 | ||
4781 | /* | 4779 | /* |
4782 | * Do not retry the unhandleable instruction if it faults on the | 4780 | * Do not retry the unhandleable instruction if it faults on the |
@@ -4785,12 +4783,43 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | |||
4785 | * instruction -> ... | 4783 | * instruction -> ... |
4786 | */ | 4784 | */ |
4787 | pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); | 4785 | pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); |
4788 | if (!is_error_noslot_pfn(pfn)) { | 4786 | |
4789 | kvm_release_pfn_clean(pfn); | 4787 | /* |
4788 | * If the instruction failed on the error pfn, it can not be fixed, | ||
4789 | * report the error to userspace. | ||
4790 | */ | ||
4791 | if (is_error_noslot_pfn(pfn)) | ||
4792 | return false; | ||
4793 | |||
4794 | kvm_release_pfn_clean(pfn); | ||
4795 | |||
4796 | /* The instructions are well-emulated on direct mmu. */ | ||
4797 | if (vcpu->arch.mmu.direct_map) { | ||
4798 | unsigned int indirect_shadow_pages; | ||
4799 | |||
4800 | spin_lock(&vcpu->kvm->mmu_lock); | ||
4801 | indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages; | ||
4802 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
4803 | |||
4804 | if (indirect_shadow_pages) | ||
4805 | kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); | ||
4806 | |||
4790 | return true; | 4807 | return true; |
4791 | } | 4808 | } |
4792 | 4809 | ||
4793 | return false; | 4810 | /* |
4811 | * if emulation was due to access to shadowed page table | ||
4812 | * and it failed try to unshadow page and re-enter the | ||
4813 | * guest to let CPU execute the instruction. | ||
4814 | */ | ||
4815 | kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); | ||
4816 | |||
4817 | /* | ||
4818 | * If the access faults on its page table, it can not | ||
4819 | * be fixed by unprotecting shadow page and it should | ||
4820 | * be reported to userspace. | ||
4821 | */ | ||
4822 | return !write_fault_to_shadow_pgtable; | ||
4794 | } | 4823 | } |
4795 | 4824 | ||
4796 | static bool retry_instruction(struct x86_emulate_ctxt *ctxt, | 4825 | static bool retry_instruction(struct x86_emulate_ctxt *ctxt, |
@@ -4832,7 +4861,7 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, | |||
4832 | if (!vcpu->arch.mmu.direct_map) | 4861 | if (!vcpu->arch.mmu.direct_map) |
4833 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); | 4862 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); |
4834 | 4863 | ||
4835 | kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 4864 | kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); |
4836 | 4865 | ||
4837 | return true; | 4866 | return true; |
4838 | } | 4867 | } |
@@ -4849,7 +4878,13 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4849 | int r; | 4878 | int r; |
4850 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; | 4879 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; |
4851 | bool writeback = true; | 4880 | bool writeback = true; |
4881 | bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable; | ||
4852 | 4882 | ||
4883 | /* | ||
4884 | * Clear write_fault_to_shadow_pgtable here to ensure it is | ||
4885 | * never reused. | ||
4886 | */ | ||
4887 | vcpu->arch.write_fault_to_shadow_pgtable = false; | ||
4853 | kvm_clear_exception_queue(vcpu); | 4888 | kvm_clear_exception_queue(vcpu); |
4854 | 4889 | ||
4855 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { | 4890 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { |
@@ -4868,7 +4903,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4868 | if (r != EMULATION_OK) { | 4903 | if (r != EMULATION_OK) { |
4869 | if (emulation_type & EMULTYPE_TRAP_UD) | 4904 | if (emulation_type & EMULTYPE_TRAP_UD) |
4870 | return EMULATE_FAIL; | 4905 | return EMULATE_FAIL; |
4871 | if (reexecute_instruction(vcpu, cr2)) | 4906 | if (reexecute_instruction(vcpu, cr2, |
4907 | write_fault_to_spt)) | ||
4872 | return EMULATE_DONE; | 4908 | return EMULATE_DONE; |
4873 | if (emulation_type & EMULTYPE_SKIP) | 4909 | if (emulation_type & EMULTYPE_SKIP) |
4874 | return EMULATE_FAIL; | 4910 | return EMULATE_FAIL; |
@@ -4898,7 +4934,7 @@ restart: | |||
4898 | return EMULATE_DONE; | 4934 | return EMULATE_DONE; |
4899 | 4935 | ||
4900 | if (r == EMULATION_FAILED) { | 4936 | if (r == EMULATION_FAILED) { |
4901 | if (reexecute_instruction(vcpu, cr2)) | 4937 | if (reexecute_instruction(vcpu, cr2, write_fault_to_spt)) |
4902 | return EMULATE_DONE; | 4938 | return EMULATE_DONE; |
4903 | 4939 | ||
4904 | return handle_emulation_failure(vcpu); | 4940 | return handle_emulation_failure(vcpu); |
@@ -5541,7 +5577,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) | |||
5541 | vcpu->arch.nmi_injected = true; | 5577 | vcpu->arch.nmi_injected = true; |
5542 | kvm_x86_ops->set_nmi(vcpu); | 5578 | kvm_x86_ops->set_nmi(vcpu); |
5543 | } | 5579 | } |
5544 | } else if (kvm_cpu_has_interrupt(vcpu)) { | 5580 | } else if (kvm_cpu_has_injectable_intr(vcpu)) { |
5545 | if (kvm_x86_ops->interrupt_allowed(vcpu)) { | 5581 | if (kvm_x86_ops->interrupt_allowed(vcpu)) { |
5546 | kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), | 5582 | kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), |
5547 | false); | 5583 | false); |
@@ -5609,6 +5645,16 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) | |||
5609 | #endif | 5645 | #endif |
5610 | } | 5646 | } |
5611 | 5647 | ||
5648 | static void update_eoi_exitmap(struct kvm_vcpu *vcpu) | ||
5649 | { | ||
5650 | u64 eoi_exit_bitmap[4]; | ||
5651 | |||
5652 | memset(eoi_exit_bitmap, 0, 32); | ||
5653 | |||
5654 | kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap); | ||
5655 | kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); | ||
5656 | } | ||
5657 | |||
5612 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | 5658 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
5613 | { | 5659 | { |
5614 | int r; | 5660 | int r; |
@@ -5662,6 +5708,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5662 | kvm_handle_pmu_event(vcpu); | 5708 | kvm_handle_pmu_event(vcpu); |
5663 | if (kvm_check_request(KVM_REQ_PMI, vcpu)) | 5709 | if (kvm_check_request(KVM_REQ_PMI, vcpu)) |
5664 | kvm_deliver_pmi(vcpu); | 5710 | kvm_deliver_pmi(vcpu); |
5711 | if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu)) | ||
5712 | update_eoi_exitmap(vcpu); | ||
5665 | } | 5713 | } |
5666 | 5714 | ||
5667 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { | 5715 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { |
@@ -5670,10 +5718,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5670 | /* enable NMI/IRQ window open exits if needed */ | 5718 | /* enable NMI/IRQ window open exits if needed */ |
5671 | if (vcpu->arch.nmi_pending) | 5719 | if (vcpu->arch.nmi_pending) |
5672 | kvm_x86_ops->enable_nmi_window(vcpu); | 5720 | kvm_x86_ops->enable_nmi_window(vcpu); |
5673 | else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) | 5721 | else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) |
5674 | kvm_x86_ops->enable_irq_window(vcpu); | 5722 | kvm_x86_ops->enable_irq_window(vcpu); |
5675 | 5723 | ||
5676 | if (kvm_lapic_enabled(vcpu)) { | 5724 | if (kvm_lapic_enabled(vcpu)) { |
5725 | /* | ||
5726 | * Update architecture specific hints for APIC | ||
5727 | * virtual interrupt delivery. | ||
5728 | */ | ||
5729 | if (kvm_x86_ops->hwapic_irr_update) | ||
5730 | kvm_x86_ops->hwapic_irr_update(vcpu, | ||
5731 | kvm_lapic_find_highest_irr(vcpu)); | ||
5677 | update_cr8_intercept(vcpu); | 5732 | update_cr8_intercept(vcpu); |
5678 | kvm_lapic_sync_to_vapic(vcpu); | 5733 | kvm_lapic_sync_to_vapic(vcpu); |
5679 | } | 5734 | } |
@@ -6853,48 +6908,43 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
6853 | struct kvm_memory_slot *memslot, | 6908 | struct kvm_memory_slot *memslot, |
6854 | struct kvm_memory_slot old, | 6909 | struct kvm_memory_slot old, |
6855 | struct kvm_userspace_memory_region *mem, | 6910 | struct kvm_userspace_memory_region *mem, |
6856 | int user_alloc) | 6911 | bool user_alloc) |
6857 | { | 6912 | { |
6858 | int npages = memslot->npages; | 6913 | int npages = memslot->npages; |
6859 | int map_flags = MAP_PRIVATE | MAP_ANONYMOUS; | ||
6860 | 6914 | ||
6861 | /* Prevent internal slot pages from being moved by fork()/COW. */ | 6915 | /* |
6862 | if (memslot->id >= KVM_MEMORY_SLOTS) | 6916 | * Only private memory slots need to be mapped here since |
6863 | map_flags = MAP_SHARED | MAP_ANONYMOUS; | 6917 | * KVM_SET_MEMORY_REGION ioctl is no longer supported. |
6864 | |||
6865 | /*To keep backward compatibility with older userspace, | ||
6866 | *x86 needs to handle !user_alloc case. | ||
6867 | */ | 6918 | */ |
6868 | if (!user_alloc) { | 6919 | if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) { |
6869 | if (npages && !old.npages) { | 6920 | unsigned long userspace_addr; |
6870 | unsigned long userspace_addr; | ||
6871 | 6921 | ||
6872 | userspace_addr = vm_mmap(NULL, 0, | 6922 | /* |
6873 | npages * PAGE_SIZE, | 6923 | * MAP_SHARED to prevent internal slot pages from being moved |
6874 | PROT_READ | PROT_WRITE, | 6924 | * by fork()/COW. |
6875 | map_flags, | 6925 | */ |
6876 | 0); | 6926 | userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE, |
6927 | PROT_READ | PROT_WRITE, | ||
6928 | MAP_SHARED | MAP_ANONYMOUS, 0); | ||
6877 | 6929 | ||
6878 | if (IS_ERR((void *)userspace_addr)) | 6930 | if (IS_ERR((void *)userspace_addr)) |
6879 | return PTR_ERR((void *)userspace_addr); | 6931 | return PTR_ERR((void *)userspace_addr); |
6880 | 6932 | ||
6881 | memslot->userspace_addr = userspace_addr; | 6933 | memslot->userspace_addr = userspace_addr; |
6882 | } | ||
6883 | } | 6934 | } |
6884 | 6935 | ||
6885 | |||
6886 | return 0; | 6936 | return 0; |
6887 | } | 6937 | } |
6888 | 6938 | ||
6889 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 6939 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
6890 | struct kvm_userspace_memory_region *mem, | 6940 | struct kvm_userspace_memory_region *mem, |
6891 | struct kvm_memory_slot old, | 6941 | struct kvm_memory_slot old, |
6892 | int user_alloc) | 6942 | bool user_alloc) |
6893 | { | 6943 | { |
6894 | 6944 | ||
6895 | int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; | 6945 | int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; |
6896 | 6946 | ||
6897 | if (!user_alloc && !old.user_alloc && old.npages && !npages) { | 6947 | if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) { |
6898 | int ret; | 6948 | int ret; |
6899 | 6949 | ||
6900 | ret = vm_munmap(old.userspace_addr, | 6950 | ret = vm_munmap(old.userspace_addr, |
@@ -6908,11 +6958,15 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
6908 | if (!kvm->arch.n_requested_mmu_pages) | 6958 | if (!kvm->arch.n_requested_mmu_pages) |
6909 | nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); | 6959 | nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); |
6910 | 6960 | ||
6911 | spin_lock(&kvm->mmu_lock); | ||
6912 | if (nr_mmu_pages) | 6961 | if (nr_mmu_pages) |
6913 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); | 6962 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); |
6914 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 6963 | /* |
6915 | spin_unlock(&kvm->mmu_lock); | 6964 | * Write protect all pages for dirty logging. |
6965 | * Existing largepage mappings are destroyed here and new ones will | ||
6966 | * not be created until the end of the logging. | ||
6967 | */ | ||
6968 | if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) | ||
6969 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | ||
6916 | /* | 6970 | /* |
6917 | * If memory slot is created, or moved, we need to clear all | 6971 | * If memory slot is created, or moved, we need to clear all |
6918 | * mmio sptes. | 6972 | * mmio sptes. |