diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-21 20:16:21 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-21 20:16:21 -0400 |
commit | 98edb6ca4174f17a64890a02f44c211c8b44fb3c (patch) | |
tree | 033bc5f7da410046d28dd1cefcd2d63cda33d25b /arch/x86 | |
parent | a8251096b427283c47e7d8f9568be6b388dd68ec (diff) | |
parent | 8fbf065d625617bbbf6b72d5f78f84ad13c8b547 (diff) |
Merge branch 'kvm-updates/2.6.35' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.35' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (269 commits)
KVM: x86: Add missing locking to arch specific vcpu ioctls
KVM: PPC: Add missing vcpu_load()/vcpu_put() in vcpu ioctls
KVM: MMU: Segregate shadow pages with different cr0.wp
KVM: x86: Check LMA bit before set_efer
KVM: Don't allow lmsw to clear cr0.pe
KVM: Add cpuid.txt file
KVM: x86: Tell the guest we'll warn it about tsc stability
x86, paravirt: don't compute pvclock adjustments if we trust the tsc
x86: KVM guest: Try using new kvm clock msrs
KVM: x86: export paravirtual cpuid flags in KVM_GET_SUPPORTED_CPUID
KVM: x86: add new KVMCLOCK cpuid feature
KVM: x86: change msr numbers for kvmclock
x86, paravirt: Add a global synchronization point for pvclock
x86, paravirt: Enable pvclock flags in vcpu_time_info structure
KVM: x86: Inject #GP with the right rip on efer writes
KVM: SVM: Don't allow nested guest to VMMCALL into host
KVM: x86: Fix exception reinjection forced to true
KVM: Fix wallclock version writing race
KVM: MMU: Don't read pdptrs with mmu spinlock held in mmu_alloc_roots
KVM: VMX: enable VMXON check with SMX enabled (Intel TXT)
...
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/kvm.h | 17 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_emulate.h | 46 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 80 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_para.h | 13 | ||||
-rw-r--r-- | arch/x86/include/asm/msr-index.h | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/pvclock-abi.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/pvclock.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/svm.h | 9 | ||||
-rw-r--r-- | arch/x86/include/asm/vmx.h | 12 | ||||
-rw-r--r-- | arch/x86/kernel/kvmclock.c | 56 | ||||
-rw-r--r-- | arch/x86/kernel/pvclock.c | 37 | ||||
-rw-r--r-- | arch/x86/kernel/tboot.c | 1 | ||||
-rw-r--r-- | arch/x86/kvm/emulate.c | 1247 | ||||
-rw-r--r-- | arch/x86/kvm/i8259.c | 53 | ||||
-rw-r--r-- | arch/x86/kvm/irq.h | 1 | ||||
-rw-r--r-- | arch/x86/kvm/kvm_timer.h | 4 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 225 | ||||
-rw-r--r-- | arch/x86/kvm/mmutrace.h | 84 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 46 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 944 | ||||
-rw-r--r-- | arch/x86/kvm/timer.c | 3 | ||||
-rw-r--r-- | arch/x86/kvm/trace.h | 165 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 378 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 1599 | ||||
-rw-r--r-- | arch/x86/kvm/x86.h | 7 |
25 files changed, 2987 insertions, 2050 deletions
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index f46b79f6c16c..ff90055c7f0b 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h | |||
@@ -21,6 +21,7 @@ | |||
21 | #define __KVM_HAVE_PIT_STATE2 | 21 | #define __KVM_HAVE_PIT_STATE2 |
22 | #define __KVM_HAVE_XEN_HVM | 22 | #define __KVM_HAVE_XEN_HVM |
23 | #define __KVM_HAVE_VCPU_EVENTS | 23 | #define __KVM_HAVE_VCPU_EVENTS |
24 | #define __KVM_HAVE_DEBUGREGS | ||
24 | 25 | ||
25 | /* Architectural interrupt line count. */ | 26 | /* Architectural interrupt line count. */ |
26 | #define KVM_NR_INTERRUPTS 256 | 27 | #define KVM_NR_INTERRUPTS 256 |
@@ -257,6 +258,11 @@ struct kvm_reinject_control { | |||
257 | /* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ | 258 | /* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ |
258 | #define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 | 259 | #define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 |
259 | #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 | 260 | #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 |
261 | #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 | ||
262 | |||
263 | /* Interrupt shadow states */ | ||
264 | #define KVM_X86_SHADOW_INT_MOV_SS 0x01 | ||
265 | #define KVM_X86_SHADOW_INT_STI 0x02 | ||
260 | 266 | ||
261 | /* for KVM_GET/SET_VCPU_EVENTS */ | 267 | /* for KVM_GET/SET_VCPU_EVENTS */ |
262 | struct kvm_vcpu_events { | 268 | struct kvm_vcpu_events { |
@@ -271,7 +277,7 @@ struct kvm_vcpu_events { | |||
271 | __u8 injected; | 277 | __u8 injected; |
272 | __u8 nr; | 278 | __u8 nr; |
273 | __u8 soft; | 279 | __u8 soft; |
274 | __u8 pad; | 280 | __u8 shadow; |
275 | } interrupt; | 281 | } interrupt; |
276 | struct { | 282 | struct { |
277 | __u8 injected; | 283 | __u8 injected; |
@@ -284,4 +290,13 @@ struct kvm_vcpu_events { | |||
284 | __u32 reserved[10]; | 290 | __u32 reserved[10]; |
285 | }; | 291 | }; |
286 | 292 | ||
293 | /* for KVM_GET/SET_DEBUGREGS */ | ||
294 | struct kvm_debugregs { | ||
295 | __u64 db[4]; | ||
296 | __u64 dr6; | ||
297 | __u64 dr7; | ||
298 | __u64 flags; | ||
299 | __u64 reserved[9]; | ||
300 | }; | ||
301 | |||
287 | #endif /* _ASM_X86_KVM_H */ | 302 | #endif /* _ASM_X86_KVM_H */ |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 7a6f54fa13ba..0b2729bf2070 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -11,6 +11,8 @@ | |||
11 | #ifndef _ASM_X86_KVM_X86_EMULATE_H | 11 | #ifndef _ASM_X86_KVM_X86_EMULATE_H |
12 | #define _ASM_X86_KVM_X86_EMULATE_H | 12 | #define _ASM_X86_KVM_X86_EMULATE_H |
13 | 13 | ||
14 | #include <asm/desc_defs.h> | ||
15 | |||
14 | struct x86_emulate_ctxt; | 16 | struct x86_emulate_ctxt; |
15 | 17 | ||
16 | /* | 18 | /* |
@@ -63,6 +65,15 @@ struct x86_emulate_ops { | |||
63 | unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); | 65 | unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); |
64 | 66 | ||
65 | /* | 67 | /* |
68 | * write_std: Write bytes of standard (non-emulated/special) memory. | ||
69 | * Used for descriptor writing. | ||
70 | * @addr: [IN ] Linear address to which to write. | ||
71 | * @val: [OUT] Value write to memory, zero-extended to 'u_long'. | ||
72 | * @bytes: [IN ] Number of bytes to write to memory. | ||
73 | */ | ||
74 | int (*write_std)(unsigned long addr, void *val, | ||
75 | unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); | ||
76 | /* | ||
66 | * fetch: Read bytes of standard (non-emulated/special) memory. | 77 | * fetch: Read bytes of standard (non-emulated/special) memory. |
67 | * Used for instruction fetch. | 78 | * Used for instruction fetch. |
68 | * @addr: [IN ] Linear address from which to read. | 79 | * @addr: [IN ] Linear address from which to read. |
@@ -109,6 +120,23 @@ struct x86_emulate_ops { | |||
109 | unsigned int bytes, | 120 | unsigned int bytes, |
110 | struct kvm_vcpu *vcpu); | 121 | struct kvm_vcpu *vcpu); |
111 | 122 | ||
123 | int (*pio_in_emulated)(int size, unsigned short port, void *val, | ||
124 | unsigned int count, struct kvm_vcpu *vcpu); | ||
125 | |||
126 | int (*pio_out_emulated)(int size, unsigned short port, const void *val, | ||
127 | unsigned int count, struct kvm_vcpu *vcpu); | ||
128 | |||
129 | bool (*get_cached_descriptor)(struct desc_struct *desc, | ||
130 | int seg, struct kvm_vcpu *vcpu); | ||
131 | void (*set_cached_descriptor)(struct desc_struct *desc, | ||
132 | int seg, struct kvm_vcpu *vcpu); | ||
133 | u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); | ||
134 | void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); | ||
135 | void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); | ||
136 | ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); | ||
137 | void (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); | ||
138 | int (*cpl)(struct kvm_vcpu *vcpu); | ||
139 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | ||
112 | }; | 140 | }; |
113 | 141 | ||
114 | /* Type, address-of, and value of an instruction's operand. */ | 142 | /* Type, address-of, and value of an instruction's operand. */ |
@@ -124,6 +152,12 @@ struct fetch_cache { | |||
124 | unsigned long end; | 152 | unsigned long end; |
125 | }; | 153 | }; |
126 | 154 | ||
155 | struct read_cache { | ||
156 | u8 data[1024]; | ||
157 | unsigned long pos; | ||
158 | unsigned long end; | ||
159 | }; | ||
160 | |||
127 | struct decode_cache { | 161 | struct decode_cache { |
128 | u8 twobyte; | 162 | u8 twobyte; |
129 | u8 b; | 163 | u8 b; |
@@ -139,7 +173,7 @@ struct decode_cache { | |||
139 | u8 seg_override; | 173 | u8 seg_override; |
140 | unsigned int d; | 174 | unsigned int d; |
141 | unsigned long regs[NR_VCPU_REGS]; | 175 | unsigned long regs[NR_VCPU_REGS]; |
142 | unsigned long eip, eip_orig; | 176 | unsigned long eip; |
143 | /* modrm */ | 177 | /* modrm */ |
144 | u8 modrm; | 178 | u8 modrm; |
145 | u8 modrm_mod; | 179 | u8 modrm_mod; |
@@ -151,16 +185,15 @@ struct decode_cache { | |||
151 | void *modrm_ptr; | 185 | void *modrm_ptr; |
152 | unsigned long modrm_val; | 186 | unsigned long modrm_val; |
153 | struct fetch_cache fetch; | 187 | struct fetch_cache fetch; |
188 | struct read_cache io_read; | ||
154 | }; | 189 | }; |
155 | 190 | ||
156 | #define X86_SHADOW_INT_MOV_SS 1 | ||
157 | #define X86_SHADOW_INT_STI 2 | ||
158 | |||
159 | struct x86_emulate_ctxt { | 191 | struct x86_emulate_ctxt { |
160 | /* Register state before/after emulation. */ | 192 | /* Register state before/after emulation. */ |
161 | struct kvm_vcpu *vcpu; | 193 | struct kvm_vcpu *vcpu; |
162 | 194 | ||
163 | unsigned long eflags; | 195 | unsigned long eflags; |
196 | unsigned long eip; /* eip before instruction emulation */ | ||
164 | /* Emulated execution mode, represented by an X86EMUL_MODE value. */ | 197 | /* Emulated execution mode, represented by an X86EMUL_MODE value. */ |
165 | int mode; | 198 | int mode; |
166 | u32 cs_base; | 199 | u32 cs_base; |
@@ -168,6 +201,7 @@ struct x86_emulate_ctxt { | |||
168 | /* interruptibility state, as a result of execution of STI or MOV SS */ | 201 | /* interruptibility state, as a result of execution of STI or MOV SS */ |
169 | int interruptibility; | 202 | int interruptibility; |
170 | 203 | ||
204 | bool restart; /* restart string instruction after writeback */ | ||
171 | /* decode cache */ | 205 | /* decode cache */ |
172 | struct decode_cache decode; | 206 | struct decode_cache decode; |
173 | }; | 207 | }; |
@@ -194,5 +228,9 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, | |||
194 | struct x86_emulate_ops *ops); | 228 | struct x86_emulate_ops *ops); |
195 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, | 229 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, |
196 | struct x86_emulate_ops *ops); | 230 | struct x86_emulate_ops *ops); |
231 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | ||
232 | struct x86_emulate_ops *ops, | ||
233 | u16 tss_selector, int reason, | ||
234 | bool has_error_code, u32 error_code); | ||
197 | 235 | ||
198 | #endif /* _ASM_X86_KVM_X86_EMULATE_H */ | 236 | #endif /* _ASM_X86_KVM_X86_EMULATE_H */ |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 06d9e79ca37d..76f5483cffec 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -171,15 +171,15 @@ struct kvm_pte_chain { | |||
171 | union kvm_mmu_page_role { | 171 | union kvm_mmu_page_role { |
172 | unsigned word; | 172 | unsigned word; |
173 | struct { | 173 | struct { |
174 | unsigned glevels:4; | ||
175 | unsigned level:4; | 174 | unsigned level:4; |
175 | unsigned cr4_pae:1; | ||
176 | unsigned quadrant:2; | 176 | unsigned quadrant:2; |
177 | unsigned pad_for_nice_hex_output:6; | 177 | unsigned pad_for_nice_hex_output:6; |
178 | unsigned direct:1; | 178 | unsigned direct:1; |
179 | unsigned access:3; | 179 | unsigned access:3; |
180 | unsigned invalid:1; | 180 | unsigned invalid:1; |
181 | unsigned cr4_pge:1; | ||
182 | unsigned nxe:1; | 181 | unsigned nxe:1; |
182 | unsigned cr0_wp:1; | ||
183 | }; | 183 | }; |
184 | }; | 184 | }; |
185 | 185 | ||
@@ -187,8 +187,6 @@ struct kvm_mmu_page { | |||
187 | struct list_head link; | 187 | struct list_head link; |
188 | struct hlist_node hash_link; | 188 | struct hlist_node hash_link; |
189 | 189 | ||
190 | struct list_head oos_link; | ||
191 | |||
192 | /* | 190 | /* |
193 | * The following two entries are used to key the shadow page in the | 191 | * The following two entries are used to key the shadow page in the |
194 | * hash table. | 192 | * hash table. |
@@ -204,9 +202,9 @@ struct kvm_mmu_page { | |||
204 | * in this shadow page. | 202 | * in this shadow page. |
205 | */ | 203 | */ |
206 | DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | 204 | DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); |
207 | int multimapped; /* More than one parent_pte? */ | 205 | bool multimapped; /* More than one parent_pte? */ |
208 | int root_count; /* Currently serving as active root */ | ||
209 | bool unsync; | 206 | bool unsync; |
207 | int root_count; /* Currently serving as active root */ | ||
210 | unsigned int unsync_children; | 208 | unsigned int unsync_children; |
211 | union { | 209 | union { |
212 | u64 *parent_pte; /* !multimapped */ | 210 | u64 *parent_pte; /* !multimapped */ |
@@ -224,14 +222,9 @@ struct kvm_pv_mmu_op_buffer { | |||
224 | 222 | ||
225 | struct kvm_pio_request { | 223 | struct kvm_pio_request { |
226 | unsigned long count; | 224 | unsigned long count; |
227 | int cur_count; | ||
228 | gva_t guest_gva; | ||
229 | int in; | 225 | int in; |
230 | int port; | 226 | int port; |
231 | int size; | 227 | int size; |
232 | int string; | ||
233 | int down; | ||
234 | int rep; | ||
235 | }; | 228 | }; |
236 | 229 | ||
237 | /* | 230 | /* |
@@ -320,6 +313,7 @@ struct kvm_vcpu_arch { | |||
320 | struct kvm_queued_exception { | 313 | struct kvm_queued_exception { |
321 | bool pending; | 314 | bool pending; |
322 | bool has_error_code; | 315 | bool has_error_code; |
316 | bool reinject; | ||
323 | u8 nr; | 317 | u8 nr; |
324 | u32 error_code; | 318 | u32 error_code; |
325 | } exception; | 319 | } exception; |
@@ -362,8 +356,8 @@ struct kvm_vcpu_arch { | |||
362 | u64 *mce_banks; | 356 | u64 *mce_banks; |
363 | 357 | ||
364 | /* used for guest single stepping over the given code position */ | 358 | /* used for guest single stepping over the given code position */ |
365 | u16 singlestep_cs; | ||
366 | unsigned long singlestep_rip; | 359 | unsigned long singlestep_rip; |
360 | |||
367 | /* fields used by HYPER-V emulation */ | 361 | /* fields used by HYPER-V emulation */ |
368 | u64 hv_vapic; | 362 | u64 hv_vapic; |
369 | }; | 363 | }; |
@@ -389,6 +383,7 @@ struct kvm_arch { | |||
389 | unsigned int n_free_mmu_pages; | 383 | unsigned int n_free_mmu_pages; |
390 | unsigned int n_requested_mmu_pages; | 384 | unsigned int n_requested_mmu_pages; |
391 | unsigned int n_alloc_mmu_pages; | 385 | unsigned int n_alloc_mmu_pages; |
386 | atomic_t invlpg_counter; | ||
392 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; | 387 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; |
393 | /* | 388 | /* |
394 | * Hash table of struct kvm_mmu_page. | 389 | * Hash table of struct kvm_mmu_page. |
@@ -461,11 +456,6 @@ struct kvm_vcpu_stat { | |||
461 | u32 nmi_injections; | 456 | u32 nmi_injections; |
462 | }; | 457 | }; |
463 | 458 | ||
464 | struct descriptor_table { | ||
465 | u16 limit; | ||
466 | unsigned long base; | ||
467 | } __attribute__((packed)); | ||
468 | |||
469 | struct kvm_x86_ops { | 459 | struct kvm_x86_ops { |
470 | int (*cpu_has_kvm_support)(void); /* __init */ | 460 | int (*cpu_has_kvm_support)(void); /* __init */ |
471 | int (*disabled_by_bios)(void); /* __init */ | 461 | int (*disabled_by_bios)(void); /* __init */ |
@@ -503,12 +493,11 @@ struct kvm_x86_ops { | |||
503 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); | 493 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); |
504 | void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); | 494 | void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); |
505 | void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); | 495 | void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); |
506 | void (*get_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 496 | void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
507 | void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 497 | void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
508 | void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 498 | void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
509 | void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 499 | void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
510 | int (*get_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long *dest); | 500 | void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); |
511 | int (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value); | ||
512 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); | 501 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); |
513 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); | 502 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); |
514 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | 503 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); |
@@ -527,7 +516,8 @@ struct kvm_x86_ops { | |||
527 | void (*set_irq)(struct kvm_vcpu *vcpu); | 516 | void (*set_irq)(struct kvm_vcpu *vcpu); |
528 | void (*set_nmi)(struct kvm_vcpu *vcpu); | 517 | void (*set_nmi)(struct kvm_vcpu *vcpu); |
529 | void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, | 518 | void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, |
530 | bool has_error_code, u32 error_code); | 519 | bool has_error_code, u32 error_code, |
520 | bool reinject); | ||
531 | int (*interrupt_allowed)(struct kvm_vcpu *vcpu); | 521 | int (*interrupt_allowed)(struct kvm_vcpu *vcpu); |
532 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); | 522 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); |
533 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); | 523 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); |
@@ -541,6 +531,8 @@ struct kvm_x86_ops { | |||
541 | int (*get_lpage_level)(void); | 531 | int (*get_lpage_level)(void); |
542 | bool (*rdtscp_supported)(void); | 532 | bool (*rdtscp_supported)(void); |
543 | 533 | ||
534 | void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); | ||
535 | |||
544 | const struct trace_print_flags *exit_reasons_str; | 536 | const struct trace_print_flags *exit_reasons_str; |
545 | }; | 537 | }; |
546 | 538 | ||
@@ -587,23 +579,14 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
587 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); | 579 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); |
588 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | 580 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); |
589 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | 581 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); |
590 | void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, | ||
591 | unsigned long *rflags); | ||
592 | 582 | ||
593 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr); | ||
594 | void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, | ||
595 | unsigned long *rflags); | ||
596 | void kvm_enable_efer_bits(u64); | 583 | void kvm_enable_efer_bits(u64); |
597 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); | 584 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); |
598 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | 585 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); |
599 | 586 | ||
600 | struct x86_emulate_ctxt; | 587 | struct x86_emulate_ctxt; |
601 | 588 | ||
602 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, | 589 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); |
603 | int size, unsigned port); | ||
604 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, | ||
605 | int size, unsigned long count, int down, | ||
606 | gva_t address, int rep, unsigned port); | ||
607 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); | 590 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); |
608 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); | 591 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); |
609 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); | 592 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); |
@@ -616,12 +599,15 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, | |||
616 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); | 599 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
617 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); | 600 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); |
618 | 601 | ||
619 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason); | 602 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, |
603 | bool has_error_code, u32 error_code); | ||
620 | 604 | ||
621 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); | 605 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); |
622 | void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); | 606 | void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); |
623 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); | 607 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); |
624 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); | 608 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); |
609 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); | ||
610 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); | ||
625 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); | 611 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); |
626 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); | 612 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); |
627 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); | 613 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); |
@@ -634,6 +620,8 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); | |||
634 | 620 | ||
635 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); | 621 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); |
636 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | 622 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); |
623 | void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); | ||
624 | void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | ||
637 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, | 625 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, |
638 | u32 error_code); | 626 | u32 error_code); |
639 | bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); | 627 | bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); |
@@ -649,8 +637,6 @@ int emulator_write_emulated(unsigned long addr, | |||
649 | unsigned int bytes, | 637 | unsigned int bytes, |
650 | struct kvm_vcpu *vcpu); | 638 | struct kvm_vcpu *vcpu); |
651 | 639 | ||
652 | unsigned long segment_base(u16 selector); | ||
653 | |||
654 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); | 640 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); |
655 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 641 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
656 | const u8 *new, int bytes, | 642 | const u8 *new, int bytes, |
@@ -675,7 +661,6 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); | |||
675 | void kvm_enable_tdp(void); | 661 | void kvm_enable_tdp(void); |
676 | void kvm_disable_tdp(void); | 662 | void kvm_disable_tdp(void); |
677 | 663 | ||
678 | int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); | ||
679 | int complete_pio(struct kvm_vcpu *vcpu); | 664 | int complete_pio(struct kvm_vcpu *vcpu); |
680 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); | 665 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); |
681 | 666 | ||
@@ -724,23 +709,6 @@ static inline void kvm_load_ldt(u16 sel) | |||
724 | asm("lldt %0" : : "rm"(sel)); | 709 | asm("lldt %0" : : "rm"(sel)); |
725 | } | 710 | } |
726 | 711 | ||
727 | static inline void kvm_get_idt(struct descriptor_table *table) | ||
728 | { | ||
729 | asm("sidt %0" : "=m"(*table)); | ||
730 | } | ||
731 | |||
732 | static inline void kvm_get_gdt(struct descriptor_table *table) | ||
733 | { | ||
734 | asm("sgdt %0" : "=m"(*table)); | ||
735 | } | ||
736 | |||
737 | static inline unsigned long kvm_read_tr_base(void) | ||
738 | { | ||
739 | u16 tr; | ||
740 | asm("str %0" : "=g"(tr)); | ||
741 | return segment_base(tr); | ||
742 | } | ||
743 | |||
744 | #ifdef CONFIG_X86_64 | 712 | #ifdef CONFIG_X86_64 |
745 | static inline unsigned long read_msr(unsigned long msr) | 713 | static inline unsigned long read_msr(unsigned long msr) |
746 | { | 714 | { |
@@ -826,4 +794,6 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v); | |||
826 | void kvm_define_shared_msr(unsigned index, u32 msr); | 794 | void kvm_define_shared_msr(unsigned index, u32 msr); |
827 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); | 795 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); |
828 | 796 | ||
797 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); | ||
798 | |||
829 | #endif /* _ASM_X86_KVM_HOST_H */ | 799 | #endif /* _ASM_X86_KVM_HOST_H */ |
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index ffae1420e7d7..05eba5e9a8e8 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
@@ -16,10 +16,23 @@ | |||
16 | #define KVM_FEATURE_CLOCKSOURCE 0 | 16 | #define KVM_FEATURE_CLOCKSOURCE 0 |
17 | #define KVM_FEATURE_NOP_IO_DELAY 1 | 17 | #define KVM_FEATURE_NOP_IO_DELAY 1 |
18 | #define KVM_FEATURE_MMU_OP 2 | 18 | #define KVM_FEATURE_MMU_OP 2 |
19 | /* This indicates that the new set of kvmclock msrs | ||
20 | * are available. The use of 0x11 and 0x12 is deprecated | ||
21 | */ | ||
22 | #define KVM_FEATURE_CLOCKSOURCE2 3 | ||
23 | |||
24 | /* The last 8 bits are used to indicate how to interpret the flags field | ||
25 | * in pvclock structure. If no bits are set, all flags are ignored. | ||
26 | */ | ||
27 | #define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24 | ||
19 | 28 | ||
20 | #define MSR_KVM_WALL_CLOCK 0x11 | 29 | #define MSR_KVM_WALL_CLOCK 0x11 |
21 | #define MSR_KVM_SYSTEM_TIME 0x12 | 30 | #define MSR_KVM_SYSTEM_TIME 0x12 |
22 | 31 | ||
32 | /* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */ | ||
33 | #define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 | ||
34 | #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 | ||
35 | |||
23 | #define KVM_MAX_MMU_OP_BATCH 32 | 36 | #define KVM_MAX_MMU_OP_BATCH 32 |
24 | 37 | ||
25 | /* Operations for KVM_HC_MMU_OP */ | 38 | /* Operations for KVM_HC_MMU_OP */ |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index bc473acfa7f9..f9324851eba0 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -202,8 +202,9 @@ | |||
202 | #define MSR_IA32_EBL_CR_POWERON 0x0000002a | 202 | #define MSR_IA32_EBL_CR_POWERON 0x0000002a |
203 | #define MSR_IA32_FEATURE_CONTROL 0x0000003a | 203 | #define MSR_IA32_FEATURE_CONTROL 0x0000003a |
204 | 204 | ||
205 | #define FEATURE_CONTROL_LOCKED (1<<0) | 205 | #define FEATURE_CONTROL_LOCKED (1<<0) |
206 | #define FEATURE_CONTROL_VMXON_ENABLED (1<<2) | 206 | #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) |
207 | #define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) | ||
207 | 208 | ||
208 | #define MSR_IA32_APICBASE 0x0000001b | 209 | #define MSR_IA32_APICBASE 0x0000001b |
209 | #define MSR_IA32_APICBASE_BSP (1<<8) | 210 | #define MSR_IA32_APICBASE_BSP (1<<8) |
diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h index 6d93508f2626..35f2d1948ada 100644 --- a/arch/x86/include/asm/pvclock-abi.h +++ b/arch/x86/include/asm/pvclock-abi.h | |||
@@ -29,7 +29,8 @@ struct pvclock_vcpu_time_info { | |||
29 | u64 system_time; | 29 | u64 system_time; |
30 | u32 tsc_to_system_mul; | 30 | u32 tsc_to_system_mul; |
31 | s8 tsc_shift; | 31 | s8 tsc_shift; |
32 | u8 pad[3]; | 32 | u8 flags; |
33 | u8 pad[2]; | ||
33 | } __attribute__((__packed__)); /* 32 bytes */ | 34 | } __attribute__((__packed__)); /* 32 bytes */ |
34 | 35 | ||
35 | struct pvclock_wall_clock { | 36 | struct pvclock_wall_clock { |
@@ -38,5 +39,6 @@ struct pvclock_wall_clock { | |||
38 | u32 nsec; | 39 | u32 nsec; |
39 | } __attribute__((__packed__)); | 40 | } __attribute__((__packed__)); |
40 | 41 | ||
42 | #define PVCLOCK_TSC_STABLE_BIT (1 << 0) | ||
41 | #endif /* __ASSEMBLY__ */ | 43 | #endif /* __ASSEMBLY__ */ |
42 | #endif /* _ASM_X86_PVCLOCK_ABI_H */ | 44 | #endif /* _ASM_X86_PVCLOCK_ABI_H */ |
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 53235fd5f8ce..cd02f324aa6b 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h | |||
@@ -6,6 +6,7 @@ | |||
6 | 6 | ||
7 | /* some helper functions for xen and kvm pv clock sources */ | 7 | /* some helper functions for xen and kvm pv clock sources */ |
8 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); | 8 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); |
9 | void pvclock_set_flags(u8 flags); | ||
9 | unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); | 10 | unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); |
10 | void pvclock_read_wallclock(struct pvclock_wall_clock *wall, | 11 | void pvclock_read_wallclock(struct pvclock_wall_clock *wall, |
11 | struct pvclock_vcpu_time_info *vcpu, | 12 | struct pvclock_vcpu_time_info *vcpu, |
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 38638cd2fa4c..0e831059ac5a 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h | |||
@@ -81,7 +81,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { | |||
81 | u32 event_inj_err; | 81 | u32 event_inj_err; |
82 | u64 nested_cr3; | 82 | u64 nested_cr3; |
83 | u64 lbr_ctl; | 83 | u64 lbr_ctl; |
84 | u8 reserved_5[832]; | 84 | u64 reserved_5; |
85 | u64 next_rip; | ||
86 | u8 reserved_6[816]; | ||
85 | }; | 87 | }; |
86 | 88 | ||
87 | 89 | ||
@@ -115,6 +117,10 @@ struct __attribute__ ((__packed__)) vmcb_control_area { | |||
115 | #define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) | 117 | #define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) |
116 | #define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) | 118 | #define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) |
117 | 119 | ||
120 | #define SVM_VM_CR_VALID_MASK 0x001fULL | ||
121 | #define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL | ||
122 | #define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL | ||
123 | |||
118 | struct __attribute__ ((__packed__)) vmcb_seg { | 124 | struct __attribute__ ((__packed__)) vmcb_seg { |
119 | u16 selector; | 125 | u16 selector; |
120 | u16 attrib; | 126 | u16 attrib; |
@@ -238,6 +244,7 @@ struct __attribute__ ((__packed__)) vmcb { | |||
238 | 244 | ||
239 | #define SVM_EXITINFOSHIFT_TS_REASON_IRET 36 | 245 | #define SVM_EXITINFOSHIFT_TS_REASON_IRET 36 |
240 | #define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 | 246 | #define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 |
247 | #define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44 | ||
241 | 248 | ||
242 | #define SVM_EXIT_READ_CR0 0x000 | 249 | #define SVM_EXIT_READ_CR0 0x000 |
243 | #define SVM_EXIT_READ_CR3 0x003 | 250 | #define SVM_EXIT_READ_CR3 0x003 |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index fb9a080740ec..9e6779f7cf2d 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -25,6 +25,8 @@ | |||
25 | * | 25 | * |
26 | */ | 26 | */ |
27 | 27 | ||
28 | #include <linux/types.h> | ||
29 | |||
28 | /* | 30 | /* |
29 | * Definitions of Primary Processor-Based VM-Execution Controls. | 31 | * Definitions of Primary Processor-Based VM-Execution Controls. |
30 | */ | 32 | */ |
@@ -120,6 +122,8 @@ enum vmcs_field { | |||
120 | GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, | 122 | GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, |
121 | GUEST_IA32_PAT = 0x00002804, | 123 | GUEST_IA32_PAT = 0x00002804, |
122 | GUEST_IA32_PAT_HIGH = 0x00002805, | 124 | GUEST_IA32_PAT_HIGH = 0x00002805, |
125 | GUEST_IA32_EFER = 0x00002806, | ||
126 | GUEST_IA32_EFER_HIGH = 0x00002807, | ||
123 | GUEST_PDPTR0 = 0x0000280a, | 127 | GUEST_PDPTR0 = 0x0000280a, |
124 | GUEST_PDPTR0_HIGH = 0x0000280b, | 128 | GUEST_PDPTR0_HIGH = 0x0000280b, |
125 | GUEST_PDPTR1 = 0x0000280c, | 129 | GUEST_PDPTR1 = 0x0000280c, |
@@ -130,6 +134,8 @@ enum vmcs_field { | |||
130 | GUEST_PDPTR3_HIGH = 0x00002811, | 134 | GUEST_PDPTR3_HIGH = 0x00002811, |
131 | HOST_IA32_PAT = 0x00002c00, | 135 | HOST_IA32_PAT = 0x00002c00, |
132 | HOST_IA32_PAT_HIGH = 0x00002c01, | 136 | HOST_IA32_PAT_HIGH = 0x00002c01, |
137 | HOST_IA32_EFER = 0x00002c02, | ||
138 | HOST_IA32_EFER_HIGH = 0x00002c03, | ||
133 | PIN_BASED_VM_EXEC_CONTROL = 0x00004000, | 139 | PIN_BASED_VM_EXEC_CONTROL = 0x00004000, |
134 | CPU_BASED_VM_EXEC_CONTROL = 0x00004002, | 140 | CPU_BASED_VM_EXEC_CONTROL = 0x00004002, |
135 | EXCEPTION_BITMAP = 0x00004004, | 141 | EXCEPTION_BITMAP = 0x00004004, |
@@ -394,6 +400,10 @@ enum vmcs_field { | |||
394 | #define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" | 400 | #define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" |
395 | #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" | 401 | #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" |
396 | 402 | ||
397 | 403 | struct vmx_msr_entry { | |
404 | u32 index; | ||
405 | u32 reserved; | ||
406 | u64 value; | ||
407 | } __aligned(16); | ||
398 | 408 | ||
399 | #endif | 409 | #endif |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index feaeb0d3aa4f..eb9b76c716c2 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -29,6 +29,8 @@ | |||
29 | #define KVM_SCALE 22 | 29 | #define KVM_SCALE 22 |
30 | 30 | ||
31 | static int kvmclock = 1; | 31 | static int kvmclock = 1; |
32 | static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; | ||
33 | static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; | ||
32 | 34 | ||
33 | static int parse_no_kvmclock(char *arg) | 35 | static int parse_no_kvmclock(char *arg) |
34 | { | 36 | { |
@@ -54,7 +56,8 @@ static unsigned long kvm_get_wallclock(void) | |||
54 | 56 | ||
55 | low = (int)__pa_symbol(&wall_clock); | 57 | low = (int)__pa_symbol(&wall_clock); |
56 | high = ((u64)__pa_symbol(&wall_clock) >> 32); | 58 | high = ((u64)__pa_symbol(&wall_clock) >> 32); |
57 | native_write_msr(MSR_KVM_WALL_CLOCK, low, high); | 59 | |
60 | native_write_msr(msr_kvm_wall_clock, low, high); | ||
58 | 61 | ||
59 | vcpu_time = &get_cpu_var(hv_clock); | 62 | vcpu_time = &get_cpu_var(hv_clock); |
60 | pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); | 63 | pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); |
@@ -130,7 +133,8 @@ static int kvm_register_clock(char *txt) | |||
130 | high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); | 133 | high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); |
131 | printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", | 134 | printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", |
132 | cpu, high, low, txt); | 135 | cpu, high, low, txt); |
133 | return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); | 136 | |
137 | return native_write_msr_safe(msr_kvm_system_time, low, high); | ||
134 | } | 138 | } |
135 | 139 | ||
136 | #ifdef CONFIG_X86_LOCAL_APIC | 140 | #ifdef CONFIG_X86_LOCAL_APIC |
@@ -165,14 +169,14 @@ static void __init kvm_smp_prepare_boot_cpu(void) | |||
165 | #ifdef CONFIG_KEXEC | 169 | #ifdef CONFIG_KEXEC |
166 | static void kvm_crash_shutdown(struct pt_regs *regs) | 170 | static void kvm_crash_shutdown(struct pt_regs *regs) |
167 | { | 171 | { |
168 | native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); | 172 | native_write_msr(msr_kvm_system_time, 0, 0); |
169 | native_machine_crash_shutdown(regs); | 173 | native_machine_crash_shutdown(regs); |
170 | } | 174 | } |
171 | #endif | 175 | #endif |
172 | 176 | ||
173 | static void kvm_shutdown(void) | 177 | static void kvm_shutdown(void) |
174 | { | 178 | { |
175 | native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); | 179 | native_write_msr(msr_kvm_system_time, 0, 0); |
176 | native_machine_shutdown(); | 180 | native_machine_shutdown(); |
177 | } | 181 | } |
178 | 182 | ||
@@ -181,27 +185,37 @@ void __init kvmclock_init(void) | |||
181 | if (!kvm_para_available()) | 185 | if (!kvm_para_available()) |
182 | return; | 186 | return; |
183 | 187 | ||
184 | if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { | 188 | if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE2)) { |
185 | if (kvm_register_clock("boot clock")) | 189 | msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW; |
186 | return; | 190 | msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW; |
187 | pv_time_ops.sched_clock = kvm_clock_read; | 191 | } else if (!(kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE))) |
188 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; | 192 | return; |
189 | x86_platform.get_wallclock = kvm_get_wallclock; | 193 | |
190 | x86_platform.set_wallclock = kvm_set_wallclock; | 194 | printk(KERN_INFO "kvm-clock: Using msrs %x and %x", |
195 | msr_kvm_system_time, msr_kvm_wall_clock); | ||
196 | |||
197 | if (kvm_register_clock("boot clock")) | ||
198 | return; | ||
199 | pv_time_ops.sched_clock = kvm_clock_read; | ||
200 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; | ||
201 | x86_platform.get_wallclock = kvm_get_wallclock; | ||
202 | x86_platform.set_wallclock = kvm_set_wallclock; | ||
191 | #ifdef CONFIG_X86_LOCAL_APIC | 203 | #ifdef CONFIG_X86_LOCAL_APIC |
192 | x86_cpuinit.setup_percpu_clockev = | 204 | x86_cpuinit.setup_percpu_clockev = |
193 | kvm_setup_secondary_clock; | 205 | kvm_setup_secondary_clock; |
194 | #endif | 206 | #endif |
195 | #ifdef CONFIG_SMP | 207 | #ifdef CONFIG_SMP |
196 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; | 208 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; |
197 | #endif | 209 | #endif |
198 | machine_ops.shutdown = kvm_shutdown; | 210 | machine_ops.shutdown = kvm_shutdown; |
199 | #ifdef CONFIG_KEXEC | 211 | #ifdef CONFIG_KEXEC |
200 | machine_ops.crash_shutdown = kvm_crash_shutdown; | 212 | machine_ops.crash_shutdown = kvm_crash_shutdown; |
201 | #endif | 213 | #endif |
202 | kvm_get_preset_lpj(); | 214 | kvm_get_preset_lpj(); |
203 | clocksource_register(&kvm_clock); | 215 | clocksource_register(&kvm_clock); |
204 | pv_info.paravirt_enabled = 1; | 216 | pv_info.paravirt_enabled = 1; |
205 | pv_info.name = "KVM"; | 217 | pv_info.name = "KVM"; |
206 | } | 218 | |
219 | if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) | ||
220 | pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); | ||
207 | } | 221 | } |
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 03801f2f761f..239427ca02af 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c | |||
@@ -31,8 +31,16 @@ struct pvclock_shadow_time { | |||
31 | u32 tsc_to_nsec_mul; | 31 | u32 tsc_to_nsec_mul; |
32 | int tsc_shift; | 32 | int tsc_shift; |
33 | u32 version; | 33 | u32 version; |
34 | u8 flags; | ||
34 | }; | 35 | }; |
35 | 36 | ||
37 | static u8 valid_flags __read_mostly = 0; | ||
38 | |||
39 | void pvclock_set_flags(u8 flags) | ||
40 | { | ||
41 | valid_flags = flags; | ||
42 | } | ||
43 | |||
36 | /* | 44 | /* |
37 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, | 45 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, |
38 | * yielding a 64-bit result. | 46 | * yielding a 64-bit result. |
@@ -91,6 +99,7 @@ static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, | |||
91 | dst->system_timestamp = src->system_time; | 99 | dst->system_timestamp = src->system_time; |
92 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; | 100 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; |
93 | dst->tsc_shift = src->tsc_shift; | 101 | dst->tsc_shift = src->tsc_shift; |
102 | dst->flags = src->flags; | ||
94 | rmb(); /* test version after fetching data */ | 103 | rmb(); /* test version after fetching data */ |
95 | } while ((src->version & 1) || (dst->version != src->version)); | 104 | } while ((src->version & 1) || (dst->version != src->version)); |
96 | 105 | ||
@@ -109,11 +118,14 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) | |||
109 | return pv_tsc_khz; | 118 | return pv_tsc_khz; |
110 | } | 119 | } |
111 | 120 | ||
121 | static atomic64_t last_value = ATOMIC64_INIT(0); | ||
122 | |||
112 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) | 123 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) |
113 | { | 124 | { |
114 | struct pvclock_shadow_time shadow; | 125 | struct pvclock_shadow_time shadow; |
115 | unsigned version; | 126 | unsigned version; |
116 | cycle_t ret, offset; | 127 | cycle_t ret, offset; |
128 | u64 last; | ||
117 | 129 | ||
118 | do { | 130 | do { |
119 | version = pvclock_get_time_values(&shadow, src); | 131 | version = pvclock_get_time_values(&shadow, src); |
@@ -123,6 +135,31 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) | |||
123 | barrier(); | 135 | barrier(); |
124 | } while (version != src->version); | 136 | } while (version != src->version); |
125 | 137 | ||
138 | if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && | ||
139 | (shadow.flags & PVCLOCK_TSC_STABLE_BIT)) | ||
140 | return ret; | ||
141 | |||
142 | /* | ||
143 | * Assumption here is that last_value, a global accumulator, always goes | ||
144 | * forward. If we are less than that, we should not be much smaller. | ||
145 | * We assume there is an error marging we're inside, and then the correction | ||
146 | * does not sacrifice accuracy. | ||
147 | * | ||
148 | * For reads: global may have changed between test and return, | ||
149 | * but this means someone else updated poked the clock at a later time. | ||
150 | * We just need to make sure we are not seeing a backwards event. | ||
151 | * | ||
152 | * For updates: last_value = ret is not enough, since two vcpus could be | ||
153 | * updating at the same time, and one of them could be slightly behind, | ||
154 | * making the assumption that last_value always go forward fail to hold. | ||
155 | */ | ||
156 | last = atomic64_read(&last_value); | ||
157 | do { | ||
158 | if (ret < last) | ||
159 | return last; | ||
160 | last = atomic64_cmpxchg(&last_value, last, ret); | ||
161 | } while (unlikely(last != ret)); | ||
162 | |||
126 | return ret; | 163 | return ret; |
127 | } | 164 | } |
128 | 165 | ||
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index cc2c60474fd0..c2f1b26141e2 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c | |||
@@ -46,6 +46,7 @@ | |||
46 | 46 | ||
47 | /* Global pointer to shared data; NULL means no measured launch. */ | 47 | /* Global pointer to shared data; NULL means no measured launch. */ |
48 | struct tboot *tboot __read_mostly; | 48 | struct tboot *tboot __read_mostly; |
49 | EXPORT_SYMBOL(tboot); | ||
49 | 50 | ||
50 | /* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */ | 51 | /* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */ |
51 | #define AP_WAIT_TIMEOUT 1 | 52 | #define AP_WAIT_TIMEOUT 1 |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4dade6ac0827..5ac0bb465ed6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <asm/kvm_emulate.h> | 33 | #include <asm/kvm_emulate.h> |
34 | 34 | ||
35 | #include "x86.h" | 35 | #include "x86.h" |
36 | #include "tss.h" | ||
36 | 37 | ||
37 | /* | 38 | /* |
38 | * Opcode effective-address decode tables. | 39 | * Opcode effective-address decode tables. |
@@ -50,6 +51,8 @@ | |||
50 | #define DstReg (2<<1) /* Register operand. */ | 51 | #define DstReg (2<<1) /* Register operand. */ |
51 | #define DstMem (3<<1) /* Memory operand. */ | 52 | #define DstMem (3<<1) /* Memory operand. */ |
52 | #define DstAcc (4<<1) /* Destination Accumulator */ | 53 | #define DstAcc (4<<1) /* Destination Accumulator */ |
54 | #define DstDI (5<<1) /* Destination is in ES:(E)DI */ | ||
55 | #define DstMem64 (6<<1) /* 64bit memory operand */ | ||
53 | #define DstMask (7<<1) | 56 | #define DstMask (7<<1) |
54 | /* Source operand type. */ | 57 | /* Source operand type. */ |
55 | #define SrcNone (0<<4) /* No source operand. */ | 58 | #define SrcNone (0<<4) /* No source operand. */ |
@@ -63,6 +66,7 @@ | |||
63 | #define SrcOne (7<<4) /* Implied '1' */ | 66 | #define SrcOne (7<<4) /* Implied '1' */ |
64 | #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ | 67 | #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ |
65 | #define SrcImmU (9<<4) /* Immediate operand, unsigned */ | 68 | #define SrcImmU (9<<4) /* Immediate operand, unsigned */ |
69 | #define SrcSI (0xa<<4) /* Source is in the DS:RSI */ | ||
66 | #define SrcMask (0xf<<4) | 70 | #define SrcMask (0xf<<4) |
67 | /* Generic ModRM decode. */ | 71 | /* Generic ModRM decode. */ |
68 | #define ModRM (1<<8) | 72 | #define ModRM (1<<8) |
@@ -85,6 +89,9 @@ | |||
85 | #define Src2ImmByte (2<<29) | 89 | #define Src2ImmByte (2<<29) |
86 | #define Src2One (3<<29) | 90 | #define Src2One (3<<29) |
87 | #define Src2Imm16 (4<<29) | 91 | #define Src2Imm16 (4<<29) |
92 | #define Src2Mem16 (5<<29) /* Used for Ep encoding. First argument has to be | ||
93 | in memory and second argument is located | ||
94 | immediately after the first one in memory. */ | ||
88 | #define Src2Mask (7<<29) | 95 | #define Src2Mask (7<<29) |
89 | 96 | ||
90 | enum { | 97 | enum { |
@@ -147,8 +154,8 @@ static u32 opcode_table[256] = { | |||
147 | 0, 0, 0, 0, | 154 | 0, 0, 0, 0, |
148 | /* 0x68 - 0x6F */ | 155 | /* 0x68 - 0x6F */ |
149 | SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, | 156 | SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, |
150 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ | 157 | DstDI | ByteOp | Mov | String, DstDI | Mov | String, /* insb, insw/insd */ |
151 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ | 158 | SrcSI | ByteOp | ImplicitOps | String, SrcSI | ImplicitOps | String, /* outsb, outsw/outsd */ |
152 | /* 0x70 - 0x77 */ | 159 | /* 0x70 - 0x77 */ |
153 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, | 160 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, |
154 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, | 161 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, |
@@ -173,12 +180,12 @@ static u32 opcode_table[256] = { | |||
173 | /* 0xA0 - 0xA7 */ | 180 | /* 0xA0 - 0xA7 */ |
174 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, | 181 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, |
175 | ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, | 182 | ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, |
176 | ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, | 183 | ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String, |
177 | ByteOp | ImplicitOps | String, ImplicitOps | String, | 184 | ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String, |
178 | /* 0xA8 - 0xAF */ | 185 | /* 0xA8 - 0xAF */ |
179 | 0, 0, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, | 186 | 0, 0, ByteOp | DstDI | Mov | String, DstDI | Mov | String, |
180 | ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, | 187 | ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String, |
181 | ByteOp | ImplicitOps | String, ImplicitOps | String, | 188 | ByteOp | DstDI | String, DstDI | String, |
182 | /* 0xB0 - 0xB7 */ | 189 | /* 0xB0 - 0xB7 */ |
183 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, | 190 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, |
184 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, | 191 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, |
@@ -204,13 +211,13 @@ static u32 opcode_table[256] = { | |||
204 | 0, 0, 0, 0, 0, 0, 0, 0, | 211 | 0, 0, 0, 0, 0, 0, 0, 0, |
205 | /* 0xE0 - 0xE7 */ | 212 | /* 0xE0 - 0xE7 */ |
206 | 0, 0, 0, 0, | 213 | 0, 0, 0, 0, |
207 | ByteOp | SrcImmUByte, SrcImmUByte, | 214 | ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, |
208 | ByteOp | SrcImmUByte, SrcImmUByte, | 215 | ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, |
209 | /* 0xE8 - 0xEF */ | 216 | /* 0xE8 - 0xEF */ |
210 | SrcImm | Stack, SrcImm | ImplicitOps, | 217 | SrcImm | Stack, SrcImm | ImplicitOps, |
211 | SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, | 218 | SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, |
212 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 219 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, |
213 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 220 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, |
214 | /* 0xF0 - 0xF7 */ | 221 | /* 0xF0 - 0xF7 */ |
215 | 0, 0, 0, 0, | 222 | 0, 0, 0, 0, |
216 | ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3, | 223 | ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3, |
@@ -343,7 +350,8 @@ static u32 group_table[] = { | |||
343 | [Group5*8] = | 350 | [Group5*8] = |
344 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, | 351 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, |
345 | SrcMem | ModRM | Stack, 0, | 352 | SrcMem | ModRM | Stack, 0, |
346 | SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0, | 353 | SrcMem | ModRM | Stack, SrcMem | ModRM | Src2Mem16 | ImplicitOps, |
354 | SrcMem | ModRM | Stack, 0, | ||
347 | [Group7*8] = | 355 | [Group7*8] = |
348 | 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, | 356 | 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, |
349 | SrcNone | ModRM | DstMem | Mov, 0, | 357 | SrcNone | ModRM | DstMem | Mov, 0, |
@@ -353,14 +361,14 @@ static u32 group_table[] = { | |||
353 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock, | 361 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock, |
354 | DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock, | 362 | DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock, |
355 | [Group9*8] = | 363 | [Group9*8] = |
356 | 0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0, | 364 | 0, DstMem64 | ModRM | Lock, 0, 0, 0, 0, 0, 0, |
357 | }; | 365 | }; |
358 | 366 | ||
359 | static u32 group2_table[] = { | 367 | static u32 group2_table[] = { |
360 | [Group7*8] = | 368 | [Group7*8] = |
361 | SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM, | 369 | SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM | Priv, |
362 | SrcNone | ModRM | DstMem | Mov, 0, | 370 | SrcNone | ModRM | DstMem | Mov, 0, |
363 | SrcMem16 | ModRM | Mov, 0, | 371 | SrcMem16 | ModRM | Mov | Priv, 0, |
364 | [Group9*8] = | 372 | [Group9*8] = |
365 | 0, 0, 0, 0, 0, 0, 0, 0, | 373 | 0, 0, 0, 0, 0, 0, 0, 0, |
366 | }; | 374 | }; |
@@ -562,7 +570,7 @@ static u32 group2_table[] = { | |||
562 | #define insn_fetch(_type, _size, _eip) \ | 570 | #define insn_fetch(_type, _size, _eip) \ |
563 | ({ unsigned long _x; \ | 571 | ({ unsigned long _x; \ |
564 | rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \ | 572 | rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \ |
565 | if (rc != 0) \ | 573 | if (rc != X86EMUL_CONTINUE) \ |
566 | goto done; \ | 574 | goto done; \ |
567 | (_eip) += (_size); \ | 575 | (_eip) += (_size); \ |
568 | (_type)_x; \ | 576 | (_type)_x; \ |
@@ -638,40 +646,40 @@ static unsigned long ss_base(struct x86_emulate_ctxt *ctxt) | |||
638 | 646 | ||
639 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | 647 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, |
640 | struct x86_emulate_ops *ops, | 648 | struct x86_emulate_ops *ops, |
641 | unsigned long linear, u8 *dest) | 649 | unsigned long eip, u8 *dest) |
642 | { | 650 | { |
643 | struct fetch_cache *fc = &ctxt->decode.fetch; | 651 | struct fetch_cache *fc = &ctxt->decode.fetch; |
644 | int rc; | 652 | int rc; |
645 | int size; | 653 | int size, cur_size; |
646 | 654 | ||
647 | if (linear < fc->start || linear >= fc->end) { | 655 | if (eip == fc->end) { |
648 | size = min(15UL, PAGE_SIZE - offset_in_page(linear)); | 656 | cur_size = fc->end - fc->start; |
649 | rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL); | 657 | size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); |
650 | if (rc) | 658 | rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size, |
659 | size, ctxt->vcpu, NULL); | ||
660 | if (rc != X86EMUL_CONTINUE) | ||
651 | return rc; | 661 | return rc; |
652 | fc->start = linear; | 662 | fc->end += size; |
653 | fc->end = linear + size; | ||
654 | } | 663 | } |
655 | *dest = fc->data[linear - fc->start]; | 664 | *dest = fc->data[eip - fc->start]; |
656 | return 0; | 665 | return X86EMUL_CONTINUE; |
657 | } | 666 | } |
658 | 667 | ||
659 | static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, | 668 | static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, |
660 | struct x86_emulate_ops *ops, | 669 | struct x86_emulate_ops *ops, |
661 | unsigned long eip, void *dest, unsigned size) | 670 | unsigned long eip, void *dest, unsigned size) |
662 | { | 671 | { |
663 | int rc = 0; | 672 | int rc; |
664 | 673 | ||
665 | /* x86 instructions are limited to 15 bytes. */ | 674 | /* x86 instructions are limited to 15 bytes. */ |
666 | if (eip + size - ctxt->decode.eip_orig > 15) | 675 | if (eip + size - ctxt->eip > 15) |
667 | return X86EMUL_UNHANDLEABLE; | 676 | return X86EMUL_UNHANDLEABLE; |
668 | eip += ctxt->cs_base; | ||
669 | while (size--) { | 677 | while (size--) { |
670 | rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); | 678 | rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); |
671 | if (rc) | 679 | if (rc != X86EMUL_CONTINUE) |
672 | return rc; | 680 | return rc; |
673 | } | 681 | } |
674 | return 0; | 682 | return X86EMUL_CONTINUE; |
675 | } | 683 | } |
676 | 684 | ||
677 | /* | 685 | /* |
@@ -702,7 +710,7 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, | |||
702 | *address = 0; | 710 | *address = 0; |
703 | rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, | 711 | rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, |
704 | ctxt->vcpu, NULL); | 712 | ctxt->vcpu, NULL); |
705 | if (rc) | 713 | if (rc != X86EMUL_CONTINUE) |
706 | return rc; | 714 | return rc; |
707 | rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, | 715 | rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, |
708 | ctxt->vcpu, NULL); | 716 | ctxt->vcpu, NULL); |
@@ -782,7 +790,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
782 | struct decode_cache *c = &ctxt->decode; | 790 | struct decode_cache *c = &ctxt->decode; |
783 | u8 sib; | 791 | u8 sib; |
784 | int index_reg = 0, base_reg = 0, scale; | 792 | int index_reg = 0, base_reg = 0, scale; |
785 | int rc = 0; | 793 | int rc = X86EMUL_CONTINUE; |
786 | 794 | ||
787 | if (c->rex_prefix) { | 795 | if (c->rex_prefix) { |
788 | c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */ | 796 | c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */ |
@@ -895,7 +903,7 @@ static int decode_abs(struct x86_emulate_ctxt *ctxt, | |||
895 | struct x86_emulate_ops *ops) | 903 | struct x86_emulate_ops *ops) |
896 | { | 904 | { |
897 | struct decode_cache *c = &ctxt->decode; | 905 | struct decode_cache *c = &ctxt->decode; |
898 | int rc = 0; | 906 | int rc = X86EMUL_CONTINUE; |
899 | 907 | ||
900 | switch (c->ad_bytes) { | 908 | switch (c->ad_bytes) { |
901 | case 2: | 909 | case 2: |
@@ -916,14 +924,18 @@ int | |||
916 | x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | 924 | x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
917 | { | 925 | { |
918 | struct decode_cache *c = &ctxt->decode; | 926 | struct decode_cache *c = &ctxt->decode; |
919 | int rc = 0; | 927 | int rc = X86EMUL_CONTINUE; |
920 | int mode = ctxt->mode; | 928 | int mode = ctxt->mode; |
921 | int def_op_bytes, def_ad_bytes, group; | 929 | int def_op_bytes, def_ad_bytes, group; |
922 | 930 | ||
923 | /* Shadow copy of register state. Committed on successful emulation. */ | ||
924 | 931 | ||
932 | /* we cannot decode insn before we complete previous rep insn */ | ||
933 | WARN_ON(ctxt->restart); | ||
934 | |||
935 | /* Shadow copy of register state. Committed on successful emulation. */ | ||
925 | memset(c, 0, sizeof(struct decode_cache)); | 936 | memset(c, 0, sizeof(struct decode_cache)); |
926 | c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu); | 937 | c->eip = ctxt->eip; |
938 | c->fetch.start = c->fetch.end = c->eip; | ||
927 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); | 939 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); |
928 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | 940 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); |
929 | 941 | ||
@@ -1015,11 +1027,6 @@ done_prefixes: | |||
1015 | } | 1027 | } |
1016 | } | 1028 | } |
1017 | 1029 | ||
1018 | if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | ||
1019 | kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction"); | ||
1020 | return -1; | ||
1021 | } | ||
1022 | |||
1023 | if (c->d & Group) { | 1030 | if (c->d & Group) { |
1024 | group = c->d & GroupMask; | 1031 | group = c->d & GroupMask; |
1025 | c->modrm = insn_fetch(u8, 1, c->eip); | 1032 | c->modrm = insn_fetch(u8, 1, c->eip); |
@@ -1046,7 +1053,7 @@ done_prefixes: | |||
1046 | rc = decode_modrm(ctxt, ops); | 1053 | rc = decode_modrm(ctxt, ops); |
1047 | else if (c->d & MemAbs) | 1054 | else if (c->d & MemAbs) |
1048 | rc = decode_abs(ctxt, ops); | 1055 | rc = decode_abs(ctxt, ops); |
1049 | if (rc) | 1056 | if (rc != X86EMUL_CONTINUE) |
1050 | goto done; | 1057 | goto done; |
1051 | 1058 | ||
1052 | if (!c->has_seg_override) | 1059 | if (!c->has_seg_override) |
@@ -1057,6 +1064,10 @@ done_prefixes: | |||
1057 | 1064 | ||
1058 | if (c->ad_bytes != 8) | 1065 | if (c->ad_bytes != 8) |
1059 | c->modrm_ea = (u32)c->modrm_ea; | 1066 | c->modrm_ea = (u32)c->modrm_ea; |
1067 | |||
1068 | if (c->rip_relative) | ||
1069 | c->modrm_ea += c->eip; | ||
1070 | |||
1060 | /* | 1071 | /* |
1061 | * Decode and fetch the source operand: register, memory | 1072 | * Decode and fetch the source operand: register, memory |
1062 | * or immediate. | 1073 | * or immediate. |
@@ -1091,6 +1102,8 @@ done_prefixes: | |||
1091 | break; | 1102 | break; |
1092 | } | 1103 | } |
1093 | c->src.type = OP_MEM; | 1104 | c->src.type = OP_MEM; |
1105 | c->src.ptr = (unsigned long *)c->modrm_ea; | ||
1106 | c->src.val = 0; | ||
1094 | break; | 1107 | break; |
1095 | case SrcImm: | 1108 | case SrcImm: |
1096 | case SrcImmU: | 1109 | case SrcImmU: |
@@ -1139,6 +1152,14 @@ done_prefixes: | |||
1139 | c->src.bytes = 1; | 1152 | c->src.bytes = 1; |
1140 | c->src.val = 1; | 1153 | c->src.val = 1; |
1141 | break; | 1154 | break; |
1155 | case SrcSI: | ||
1156 | c->src.type = OP_MEM; | ||
1157 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
1158 | c->src.ptr = (unsigned long *) | ||
1159 | register_address(c, seg_override_base(ctxt, c), | ||
1160 | c->regs[VCPU_REGS_RSI]); | ||
1161 | c->src.val = 0; | ||
1162 | break; | ||
1142 | } | 1163 | } |
1143 | 1164 | ||
1144 | /* | 1165 | /* |
@@ -1168,6 +1189,12 @@ done_prefixes: | |||
1168 | c->src2.bytes = 1; | 1189 | c->src2.bytes = 1; |
1169 | c->src2.val = 1; | 1190 | c->src2.val = 1; |
1170 | break; | 1191 | break; |
1192 | case Src2Mem16: | ||
1193 | c->src2.type = OP_MEM; | ||
1194 | c->src2.bytes = 2; | ||
1195 | c->src2.ptr = (unsigned long *)(c->modrm_ea + c->src.bytes); | ||
1196 | c->src2.val = 0; | ||
1197 | break; | ||
1171 | } | 1198 | } |
1172 | 1199 | ||
1173 | /* Decode and fetch the destination operand: register or memory. */ | 1200 | /* Decode and fetch the destination operand: register or memory. */ |
@@ -1180,6 +1207,7 @@ done_prefixes: | |||
1180 | c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); | 1207 | c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); |
1181 | break; | 1208 | break; |
1182 | case DstMem: | 1209 | case DstMem: |
1210 | case DstMem64: | ||
1183 | if ((c->d & ModRM) && c->modrm_mod == 3) { | 1211 | if ((c->d & ModRM) && c->modrm_mod == 3) { |
1184 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 1212 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
1185 | c->dst.type = OP_REG; | 1213 | c->dst.type = OP_REG; |
@@ -1188,12 +1216,24 @@ done_prefixes: | |||
1188 | break; | 1216 | break; |
1189 | } | 1217 | } |
1190 | c->dst.type = OP_MEM; | 1218 | c->dst.type = OP_MEM; |
1219 | c->dst.ptr = (unsigned long *)c->modrm_ea; | ||
1220 | if ((c->d & DstMask) == DstMem64) | ||
1221 | c->dst.bytes = 8; | ||
1222 | else | ||
1223 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
1224 | c->dst.val = 0; | ||
1225 | if (c->d & BitOp) { | ||
1226 | unsigned long mask = ~(c->dst.bytes * 8 - 1); | ||
1227 | |||
1228 | c->dst.ptr = (void *)c->dst.ptr + | ||
1229 | (c->src.val & mask) / 8; | ||
1230 | } | ||
1191 | break; | 1231 | break; |
1192 | case DstAcc: | 1232 | case DstAcc: |
1193 | c->dst.type = OP_REG; | 1233 | c->dst.type = OP_REG; |
1194 | c->dst.bytes = c->op_bytes; | 1234 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
1195 | c->dst.ptr = &c->regs[VCPU_REGS_RAX]; | 1235 | c->dst.ptr = &c->regs[VCPU_REGS_RAX]; |
1196 | switch (c->op_bytes) { | 1236 | switch (c->dst.bytes) { |
1197 | case 1: | 1237 | case 1: |
1198 | c->dst.val = *(u8 *)c->dst.ptr; | 1238 | c->dst.val = *(u8 *)c->dst.ptr; |
1199 | break; | 1239 | break; |
@@ -1203,18 +1243,248 @@ done_prefixes: | |||
1203 | case 4: | 1243 | case 4: |
1204 | c->dst.val = *(u32 *)c->dst.ptr; | 1244 | c->dst.val = *(u32 *)c->dst.ptr; |
1205 | break; | 1245 | break; |
1246 | case 8: | ||
1247 | c->dst.val = *(u64 *)c->dst.ptr; | ||
1248 | break; | ||
1206 | } | 1249 | } |
1207 | c->dst.orig_val = c->dst.val; | 1250 | c->dst.orig_val = c->dst.val; |
1208 | break; | 1251 | break; |
1252 | case DstDI: | ||
1253 | c->dst.type = OP_MEM; | ||
1254 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
1255 | c->dst.ptr = (unsigned long *) | ||
1256 | register_address(c, es_base(ctxt), | ||
1257 | c->regs[VCPU_REGS_RDI]); | ||
1258 | c->dst.val = 0; | ||
1259 | break; | ||
1209 | } | 1260 | } |
1210 | 1261 | ||
1211 | if (c->rip_relative) | ||
1212 | c->modrm_ea += c->eip; | ||
1213 | |||
1214 | done: | 1262 | done: |
1215 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | 1263 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
1216 | } | 1264 | } |
1217 | 1265 | ||
1266 | static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | ||
1267 | struct x86_emulate_ops *ops, | ||
1268 | unsigned int size, unsigned short port, | ||
1269 | void *dest) | ||
1270 | { | ||
1271 | struct read_cache *rc = &ctxt->decode.io_read; | ||
1272 | |||
1273 | if (rc->pos == rc->end) { /* refill pio read ahead */ | ||
1274 | struct decode_cache *c = &ctxt->decode; | ||
1275 | unsigned int in_page, n; | ||
1276 | unsigned int count = c->rep_prefix ? | ||
1277 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1; | ||
1278 | in_page = (ctxt->eflags & EFLG_DF) ? | ||
1279 | offset_in_page(c->regs[VCPU_REGS_RDI]) : | ||
1280 | PAGE_SIZE - offset_in_page(c->regs[VCPU_REGS_RDI]); | ||
1281 | n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size, | ||
1282 | count); | ||
1283 | if (n == 0) | ||
1284 | n = 1; | ||
1285 | rc->pos = rc->end = 0; | ||
1286 | if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu)) | ||
1287 | return 0; | ||
1288 | rc->end = n * size; | ||
1289 | } | ||
1290 | |||
1291 | memcpy(dest, rc->data + rc->pos, size); | ||
1292 | rc->pos += size; | ||
1293 | return 1; | ||
1294 | } | ||
1295 | |||
1296 | static u32 desc_limit_scaled(struct desc_struct *desc) | ||
1297 | { | ||
1298 | u32 limit = get_desc_limit(desc); | ||
1299 | |||
1300 | return desc->g ? (limit << 12) | 0xfff : limit; | ||
1301 | } | ||
1302 | |||
1303 | static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, | ||
1304 | struct x86_emulate_ops *ops, | ||
1305 | u16 selector, struct desc_ptr *dt) | ||
1306 | { | ||
1307 | if (selector & 1 << 2) { | ||
1308 | struct desc_struct desc; | ||
1309 | memset (dt, 0, sizeof *dt); | ||
1310 | if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu)) | ||
1311 | return; | ||
1312 | |||
1313 | dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ | ||
1314 | dt->address = get_desc_base(&desc); | ||
1315 | } else | ||
1316 | ops->get_gdt(dt, ctxt->vcpu); | ||
1317 | } | ||
1318 | |||
1319 | /* allowed just for 8 bytes segments */ | ||
1320 | static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, | ||
1321 | struct x86_emulate_ops *ops, | ||
1322 | u16 selector, struct desc_struct *desc) | ||
1323 | { | ||
1324 | struct desc_ptr dt; | ||
1325 | u16 index = selector >> 3; | ||
1326 | int ret; | ||
1327 | u32 err; | ||
1328 | ulong addr; | ||
1329 | |||
1330 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | ||
1331 | |||
1332 | if (dt.size < index * 8 + 7) { | ||
1333 | kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); | ||
1334 | return X86EMUL_PROPAGATE_FAULT; | ||
1335 | } | ||
1336 | addr = dt.address + index * 8; | ||
1337 | ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); | ||
1338 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
1339 | kvm_inject_page_fault(ctxt->vcpu, addr, err); | ||
1340 | |||
1341 | return ret; | ||
1342 | } | ||
1343 | |||
1344 | /* allowed just for 8 bytes segments */ | ||
1345 | static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | ||
1346 | struct x86_emulate_ops *ops, | ||
1347 | u16 selector, struct desc_struct *desc) | ||
1348 | { | ||
1349 | struct desc_ptr dt; | ||
1350 | u16 index = selector >> 3; | ||
1351 | u32 err; | ||
1352 | ulong addr; | ||
1353 | int ret; | ||
1354 | |||
1355 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | ||
1356 | |||
1357 | if (dt.size < index * 8 + 7) { | ||
1358 | kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); | ||
1359 | return X86EMUL_PROPAGATE_FAULT; | ||
1360 | } | ||
1361 | |||
1362 | addr = dt.address + index * 8; | ||
1363 | ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); | ||
1364 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
1365 | kvm_inject_page_fault(ctxt->vcpu, addr, err); | ||
1366 | |||
1367 | return ret; | ||
1368 | } | ||
1369 | |||
1370 | static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | ||
1371 | struct x86_emulate_ops *ops, | ||
1372 | u16 selector, int seg) | ||
1373 | { | ||
1374 | struct desc_struct seg_desc; | ||
1375 | u8 dpl, rpl, cpl; | ||
1376 | unsigned err_vec = GP_VECTOR; | ||
1377 | u32 err_code = 0; | ||
1378 | bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ | ||
1379 | int ret; | ||
1380 | |||
1381 | memset(&seg_desc, 0, sizeof seg_desc); | ||
1382 | |||
1383 | if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) | ||
1384 | || ctxt->mode == X86EMUL_MODE_REAL) { | ||
1385 | /* set real mode segment descriptor */ | ||
1386 | set_desc_base(&seg_desc, selector << 4); | ||
1387 | set_desc_limit(&seg_desc, 0xffff); | ||
1388 | seg_desc.type = 3; | ||
1389 | seg_desc.p = 1; | ||
1390 | seg_desc.s = 1; | ||
1391 | goto load; | ||
1392 | } | ||
1393 | |||
1394 | /* NULL selector is not valid for TR, CS and SS */ | ||
1395 | if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) | ||
1396 | && null_selector) | ||
1397 | goto exception; | ||
1398 | |||
1399 | /* TR should be in GDT only */ | ||
1400 | if (seg == VCPU_SREG_TR && (selector & (1 << 2))) | ||
1401 | goto exception; | ||
1402 | |||
1403 | if (null_selector) /* for NULL selector skip all following checks */ | ||
1404 | goto load; | ||
1405 | |||
1406 | ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc); | ||
1407 | if (ret != X86EMUL_CONTINUE) | ||
1408 | return ret; | ||
1409 | |||
1410 | err_code = selector & 0xfffc; | ||
1411 | err_vec = GP_VECTOR; | ||
1412 | |||
1413 | /* can't load system descriptor into segment selecor */ | ||
1414 | if (seg <= VCPU_SREG_GS && !seg_desc.s) | ||
1415 | goto exception; | ||
1416 | |||
1417 | if (!seg_desc.p) { | ||
1418 | err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; | ||
1419 | goto exception; | ||
1420 | } | ||
1421 | |||
1422 | rpl = selector & 3; | ||
1423 | dpl = seg_desc.dpl; | ||
1424 | cpl = ops->cpl(ctxt->vcpu); | ||
1425 | |||
1426 | switch (seg) { | ||
1427 | case VCPU_SREG_SS: | ||
1428 | /* | ||
1429 | * segment is not a writable data segment or segment | ||
1430 | * selector's RPL != CPL or segment selector's RPL != CPL | ||
1431 | */ | ||
1432 | if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl) | ||
1433 | goto exception; | ||
1434 | break; | ||
1435 | case VCPU_SREG_CS: | ||
1436 | if (!(seg_desc.type & 8)) | ||
1437 | goto exception; | ||
1438 | |||
1439 | if (seg_desc.type & 4) { | ||
1440 | /* conforming */ | ||
1441 | if (dpl > cpl) | ||
1442 | goto exception; | ||
1443 | } else { | ||
1444 | /* nonconforming */ | ||
1445 | if (rpl > cpl || dpl != cpl) | ||
1446 | goto exception; | ||
1447 | } | ||
1448 | /* CS(RPL) <- CPL */ | ||
1449 | selector = (selector & 0xfffc) | cpl; | ||
1450 | break; | ||
1451 | case VCPU_SREG_TR: | ||
1452 | if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9)) | ||
1453 | goto exception; | ||
1454 | break; | ||
1455 | case VCPU_SREG_LDTR: | ||
1456 | if (seg_desc.s || seg_desc.type != 2) | ||
1457 | goto exception; | ||
1458 | break; | ||
1459 | default: /* DS, ES, FS, or GS */ | ||
1460 | /* | ||
1461 | * segment is not a data or readable code segment or | ||
1462 | * ((segment is a data or nonconforming code segment) | ||
1463 | * and (both RPL and CPL > DPL)) | ||
1464 | */ | ||
1465 | if ((seg_desc.type & 0xa) == 0x8 || | ||
1466 | (((seg_desc.type & 0xc) != 0xc) && | ||
1467 | (rpl > dpl && cpl > dpl))) | ||
1468 | goto exception; | ||
1469 | break; | ||
1470 | } | ||
1471 | |||
1472 | if (seg_desc.s) { | ||
1473 | /* mark segment as accessed */ | ||
1474 | seg_desc.type |= 1; | ||
1475 | ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc); | ||
1476 | if (ret != X86EMUL_CONTINUE) | ||
1477 | return ret; | ||
1478 | } | ||
1479 | load: | ||
1480 | ops->set_segment_selector(selector, seg, ctxt->vcpu); | ||
1481 | ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu); | ||
1482 | return X86EMUL_CONTINUE; | ||
1483 | exception: | ||
1484 | kvm_queue_exception_e(ctxt->vcpu, err_vec, err_code); | ||
1485 | return X86EMUL_PROPAGATE_FAULT; | ||
1486 | } | ||
1487 | |||
1218 | static inline void emulate_push(struct x86_emulate_ctxt *ctxt) | 1488 | static inline void emulate_push(struct x86_emulate_ctxt *ctxt) |
1219 | { | 1489 | { |
1220 | struct decode_cache *c = &ctxt->decode; | 1490 | struct decode_cache *c = &ctxt->decode; |
@@ -1251,7 +1521,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, | |||
1251 | int rc; | 1521 | int rc; |
1252 | unsigned long val, change_mask; | 1522 | unsigned long val, change_mask; |
1253 | int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | 1523 | int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; |
1254 | int cpl = kvm_x86_ops->get_cpl(ctxt->vcpu); | 1524 | int cpl = ops->cpl(ctxt->vcpu); |
1255 | 1525 | ||
1256 | rc = emulate_pop(ctxt, ops, &val, len); | 1526 | rc = emulate_pop(ctxt, ops, &val, len); |
1257 | if (rc != X86EMUL_CONTINUE) | 1527 | if (rc != X86EMUL_CONTINUE) |
@@ -1306,10 +1576,10 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | |||
1306 | int rc; | 1576 | int rc; |
1307 | 1577 | ||
1308 | rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); | 1578 | rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); |
1309 | if (rc != 0) | 1579 | if (rc != X86EMUL_CONTINUE) |
1310 | return rc; | 1580 | return rc; |
1311 | 1581 | ||
1312 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, seg); | 1582 | rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg); |
1313 | return rc; | 1583 | return rc; |
1314 | } | 1584 | } |
1315 | 1585 | ||
@@ -1332,7 +1602,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt, | |||
1332 | struct x86_emulate_ops *ops) | 1602 | struct x86_emulate_ops *ops) |
1333 | { | 1603 | { |
1334 | struct decode_cache *c = &ctxt->decode; | 1604 | struct decode_cache *c = &ctxt->decode; |
1335 | int rc = 0; | 1605 | int rc = X86EMUL_CONTINUE; |
1336 | int reg = VCPU_REGS_RDI; | 1606 | int reg = VCPU_REGS_RDI; |
1337 | 1607 | ||
1338 | while (reg >= VCPU_REGS_RAX) { | 1608 | while (reg >= VCPU_REGS_RAX) { |
@@ -1343,7 +1613,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt, | |||
1343 | } | 1613 | } |
1344 | 1614 | ||
1345 | rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); | 1615 | rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); |
1346 | if (rc != 0) | 1616 | if (rc != X86EMUL_CONTINUE) |
1347 | break; | 1617 | break; |
1348 | --reg; | 1618 | --reg; |
1349 | } | 1619 | } |
@@ -1354,12 +1624,8 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, | |||
1354 | struct x86_emulate_ops *ops) | 1624 | struct x86_emulate_ops *ops) |
1355 | { | 1625 | { |
1356 | struct decode_cache *c = &ctxt->decode; | 1626 | struct decode_cache *c = &ctxt->decode; |
1357 | int rc; | ||
1358 | 1627 | ||
1359 | rc = emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); | 1628 | return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); |
1360 | if (rc != 0) | ||
1361 | return rc; | ||
1362 | return 0; | ||
1363 | } | 1629 | } |
1364 | 1630 | ||
1365 | static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) | 1631 | static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) |
@@ -1395,7 +1661,6 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, | |||
1395 | struct x86_emulate_ops *ops) | 1661 | struct x86_emulate_ops *ops) |
1396 | { | 1662 | { |
1397 | struct decode_cache *c = &ctxt->decode; | 1663 | struct decode_cache *c = &ctxt->decode; |
1398 | int rc = 0; | ||
1399 | 1664 | ||
1400 | switch (c->modrm_reg) { | 1665 | switch (c->modrm_reg) { |
1401 | case 0 ... 1: /* test */ | 1666 | case 0 ... 1: /* test */ |
@@ -1408,11 +1673,9 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, | |||
1408 | emulate_1op("neg", c->dst, ctxt->eflags); | 1673 | emulate_1op("neg", c->dst, ctxt->eflags); |
1409 | break; | 1674 | break; |
1410 | default: | 1675 | default: |
1411 | DPRINTF("Cannot emulate %02x\n", c->b); | 1676 | return 0; |
1412 | rc = X86EMUL_UNHANDLEABLE; | ||
1413 | break; | ||
1414 | } | 1677 | } |
1415 | return rc; | 1678 | return 1; |
1416 | } | 1679 | } |
1417 | 1680 | ||
1418 | static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, | 1681 | static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, |
@@ -1442,20 +1705,14 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, | |||
1442 | emulate_push(ctxt); | 1705 | emulate_push(ctxt); |
1443 | break; | 1706 | break; |
1444 | } | 1707 | } |
1445 | return 0; | 1708 | return X86EMUL_CONTINUE; |
1446 | } | 1709 | } |
1447 | 1710 | ||
1448 | static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, | 1711 | static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, |
1449 | struct x86_emulate_ops *ops, | 1712 | struct x86_emulate_ops *ops) |
1450 | unsigned long memop) | ||
1451 | { | 1713 | { |
1452 | struct decode_cache *c = &ctxt->decode; | 1714 | struct decode_cache *c = &ctxt->decode; |
1453 | u64 old, new; | 1715 | u64 old = c->dst.orig_val; |
1454 | int rc; | ||
1455 | |||
1456 | rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu); | ||
1457 | if (rc != X86EMUL_CONTINUE) | ||
1458 | return rc; | ||
1459 | 1716 | ||
1460 | if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || | 1717 | if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || |
1461 | ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) { | 1718 | ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) { |
@@ -1463,17 +1720,13 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, | |||
1463 | c->regs[VCPU_REGS_RAX] = (u32) (old >> 0); | 1720 | c->regs[VCPU_REGS_RAX] = (u32) (old >> 0); |
1464 | c->regs[VCPU_REGS_RDX] = (u32) (old >> 32); | 1721 | c->regs[VCPU_REGS_RDX] = (u32) (old >> 32); |
1465 | ctxt->eflags &= ~EFLG_ZF; | 1722 | ctxt->eflags &= ~EFLG_ZF; |
1466 | |||
1467 | } else { | 1723 | } else { |
1468 | new = ((u64)c->regs[VCPU_REGS_RCX] << 32) | | 1724 | c->dst.val = ((u64)c->regs[VCPU_REGS_RCX] << 32) | |
1469 | (u32) c->regs[VCPU_REGS_RBX]; | 1725 | (u32) c->regs[VCPU_REGS_RBX]; |
1470 | 1726 | ||
1471 | rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu); | ||
1472 | if (rc != X86EMUL_CONTINUE) | ||
1473 | return rc; | ||
1474 | ctxt->eflags |= EFLG_ZF; | 1727 | ctxt->eflags |= EFLG_ZF; |
1475 | } | 1728 | } |
1476 | return 0; | 1729 | return X86EMUL_CONTINUE; |
1477 | } | 1730 | } |
1478 | 1731 | ||
1479 | static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, | 1732 | static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, |
@@ -1484,14 +1737,14 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, | |||
1484 | unsigned long cs; | 1737 | unsigned long cs; |
1485 | 1738 | ||
1486 | rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); | 1739 | rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); |
1487 | if (rc) | 1740 | if (rc != X86EMUL_CONTINUE) |
1488 | return rc; | 1741 | return rc; |
1489 | if (c->op_bytes == 4) | 1742 | if (c->op_bytes == 4) |
1490 | c->eip = (u32)c->eip; | 1743 | c->eip = (u32)c->eip; |
1491 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); | 1744 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); |
1492 | if (rc) | 1745 | if (rc != X86EMUL_CONTINUE) |
1493 | return rc; | 1746 | return rc; |
1494 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, VCPU_SREG_CS); | 1747 | rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); |
1495 | return rc; | 1748 | return rc; |
1496 | } | 1749 | } |
1497 | 1750 | ||
@@ -1544,7 +1797,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, | |||
1544 | default: | 1797 | default: |
1545 | break; | 1798 | break; |
1546 | } | 1799 | } |
1547 | return 0; | 1800 | return X86EMUL_CONTINUE; |
1548 | } | 1801 | } |
1549 | 1802 | ||
1550 | static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) | 1803 | static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) |
@@ -1598,8 +1851,11 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt) | |||
1598 | u64 msr_data; | 1851 | u64 msr_data; |
1599 | 1852 | ||
1600 | /* syscall is not available in real mode */ | 1853 | /* syscall is not available in real mode */ |
1601 | if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86) | 1854 | if (ctxt->mode == X86EMUL_MODE_REAL || |
1602 | return X86EMUL_UNHANDLEABLE; | 1855 | ctxt->mode == X86EMUL_MODE_VM86) { |
1856 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
1857 | return X86EMUL_PROPAGATE_FAULT; | ||
1858 | } | ||
1603 | 1859 | ||
1604 | setup_syscalls_segments(ctxt, &cs, &ss); | 1860 | setup_syscalls_segments(ctxt, &cs, &ss); |
1605 | 1861 | ||
@@ -1649,14 +1905,16 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt) | |||
1649 | /* inject #GP if in real mode */ | 1905 | /* inject #GP if in real mode */ |
1650 | if (ctxt->mode == X86EMUL_MODE_REAL) { | 1906 | if (ctxt->mode == X86EMUL_MODE_REAL) { |
1651 | kvm_inject_gp(ctxt->vcpu, 0); | 1907 | kvm_inject_gp(ctxt->vcpu, 0); |
1652 | return X86EMUL_UNHANDLEABLE; | 1908 | return X86EMUL_PROPAGATE_FAULT; |
1653 | } | 1909 | } |
1654 | 1910 | ||
1655 | /* XXX sysenter/sysexit have not been tested in 64bit mode. | 1911 | /* XXX sysenter/sysexit have not been tested in 64bit mode. |
1656 | * Therefore, we inject an #UD. | 1912 | * Therefore, we inject an #UD. |
1657 | */ | 1913 | */ |
1658 | if (ctxt->mode == X86EMUL_MODE_PROT64) | 1914 | if (ctxt->mode == X86EMUL_MODE_PROT64) { |
1659 | return X86EMUL_UNHANDLEABLE; | 1915 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
1916 | return X86EMUL_PROPAGATE_FAULT; | ||
1917 | } | ||
1660 | 1918 | ||
1661 | setup_syscalls_segments(ctxt, &cs, &ss); | 1919 | setup_syscalls_segments(ctxt, &cs, &ss); |
1662 | 1920 | ||
@@ -1711,7 +1969,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
1711 | if (ctxt->mode == X86EMUL_MODE_REAL || | 1969 | if (ctxt->mode == X86EMUL_MODE_REAL || |
1712 | ctxt->mode == X86EMUL_MODE_VM86) { | 1970 | ctxt->mode == X86EMUL_MODE_VM86) { |
1713 | kvm_inject_gp(ctxt->vcpu, 0); | 1971 | kvm_inject_gp(ctxt->vcpu, 0); |
1714 | return X86EMUL_UNHANDLEABLE; | 1972 | return X86EMUL_PROPAGATE_FAULT; |
1715 | } | 1973 | } |
1716 | 1974 | ||
1717 | setup_syscalls_segments(ctxt, &cs, &ss); | 1975 | setup_syscalls_segments(ctxt, &cs, &ss); |
@@ -1756,7 +2014,8 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
1756 | return X86EMUL_CONTINUE; | 2014 | return X86EMUL_CONTINUE; |
1757 | } | 2015 | } |
1758 | 2016 | ||
1759 | static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) | 2017 | static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt, |
2018 | struct x86_emulate_ops *ops) | ||
1760 | { | 2019 | { |
1761 | int iopl; | 2020 | int iopl; |
1762 | if (ctxt->mode == X86EMUL_MODE_REAL) | 2021 | if (ctxt->mode == X86EMUL_MODE_REAL) |
@@ -1764,7 +2023,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) | |||
1764 | if (ctxt->mode == X86EMUL_MODE_VM86) | 2023 | if (ctxt->mode == X86EMUL_MODE_VM86) |
1765 | return true; | 2024 | return true; |
1766 | iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | 2025 | iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; |
1767 | return kvm_x86_ops->get_cpl(ctxt->vcpu) > iopl; | 2026 | return ops->cpl(ctxt->vcpu) > iopl; |
1768 | } | 2027 | } |
1769 | 2028 | ||
1770 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | 2029 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, |
@@ -1801,22 +2060,419 @@ static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, | |||
1801 | struct x86_emulate_ops *ops, | 2060 | struct x86_emulate_ops *ops, |
1802 | u16 port, u16 len) | 2061 | u16 port, u16 len) |
1803 | { | 2062 | { |
1804 | if (emulator_bad_iopl(ctxt)) | 2063 | if (emulator_bad_iopl(ctxt, ops)) |
1805 | if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) | 2064 | if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) |
1806 | return false; | 2065 | return false; |
1807 | return true; | 2066 | return true; |
1808 | } | 2067 | } |
1809 | 2068 | ||
2069 | static u32 get_cached_descriptor_base(struct x86_emulate_ctxt *ctxt, | ||
2070 | struct x86_emulate_ops *ops, | ||
2071 | int seg) | ||
2072 | { | ||
2073 | struct desc_struct desc; | ||
2074 | if (ops->get_cached_descriptor(&desc, seg, ctxt->vcpu)) | ||
2075 | return get_desc_base(&desc); | ||
2076 | else | ||
2077 | return ~0; | ||
2078 | } | ||
2079 | |||
2080 | static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, | ||
2081 | struct x86_emulate_ops *ops, | ||
2082 | struct tss_segment_16 *tss) | ||
2083 | { | ||
2084 | struct decode_cache *c = &ctxt->decode; | ||
2085 | |||
2086 | tss->ip = c->eip; | ||
2087 | tss->flag = ctxt->eflags; | ||
2088 | tss->ax = c->regs[VCPU_REGS_RAX]; | ||
2089 | tss->cx = c->regs[VCPU_REGS_RCX]; | ||
2090 | tss->dx = c->regs[VCPU_REGS_RDX]; | ||
2091 | tss->bx = c->regs[VCPU_REGS_RBX]; | ||
2092 | tss->sp = c->regs[VCPU_REGS_RSP]; | ||
2093 | tss->bp = c->regs[VCPU_REGS_RBP]; | ||
2094 | tss->si = c->regs[VCPU_REGS_RSI]; | ||
2095 | tss->di = c->regs[VCPU_REGS_RDI]; | ||
2096 | |||
2097 | tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); | ||
2098 | tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); | ||
2099 | tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); | ||
2100 | tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); | ||
2101 | tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); | ||
2102 | } | ||
2103 | |||
2104 | static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, | ||
2105 | struct x86_emulate_ops *ops, | ||
2106 | struct tss_segment_16 *tss) | ||
2107 | { | ||
2108 | struct decode_cache *c = &ctxt->decode; | ||
2109 | int ret; | ||
2110 | |||
2111 | c->eip = tss->ip; | ||
2112 | ctxt->eflags = tss->flag | 2; | ||
2113 | c->regs[VCPU_REGS_RAX] = tss->ax; | ||
2114 | c->regs[VCPU_REGS_RCX] = tss->cx; | ||
2115 | c->regs[VCPU_REGS_RDX] = tss->dx; | ||
2116 | c->regs[VCPU_REGS_RBX] = tss->bx; | ||
2117 | c->regs[VCPU_REGS_RSP] = tss->sp; | ||
2118 | c->regs[VCPU_REGS_RBP] = tss->bp; | ||
2119 | c->regs[VCPU_REGS_RSI] = tss->si; | ||
2120 | c->regs[VCPU_REGS_RDI] = tss->di; | ||
2121 | |||
2122 | /* | ||
2123 | * SDM says that segment selectors are loaded before segment | ||
2124 | * descriptors | ||
2125 | */ | ||
2126 | ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu); | ||
2127 | ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); | ||
2128 | ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); | ||
2129 | ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); | ||
2130 | ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); | ||
2131 | |||
2132 | /* | ||
2133 | * Now load segment descriptors. If fault happenes at this stage | ||
2134 | * it is handled in a context of new task | ||
2135 | */ | ||
2136 | ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR); | ||
2137 | if (ret != X86EMUL_CONTINUE) | ||
2138 | return ret; | ||
2139 | ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES); | ||
2140 | if (ret != X86EMUL_CONTINUE) | ||
2141 | return ret; | ||
2142 | ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS); | ||
2143 | if (ret != X86EMUL_CONTINUE) | ||
2144 | return ret; | ||
2145 | ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS); | ||
2146 | if (ret != X86EMUL_CONTINUE) | ||
2147 | return ret; | ||
2148 | ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS); | ||
2149 | if (ret != X86EMUL_CONTINUE) | ||
2150 | return ret; | ||
2151 | |||
2152 | return X86EMUL_CONTINUE; | ||
2153 | } | ||
2154 | |||
2155 | static int task_switch_16(struct x86_emulate_ctxt *ctxt, | ||
2156 | struct x86_emulate_ops *ops, | ||
2157 | u16 tss_selector, u16 old_tss_sel, | ||
2158 | ulong old_tss_base, struct desc_struct *new_desc) | ||
2159 | { | ||
2160 | struct tss_segment_16 tss_seg; | ||
2161 | int ret; | ||
2162 | u32 err, new_tss_base = get_desc_base(new_desc); | ||
2163 | |||
2164 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2165 | &err); | ||
2166 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2167 | /* FIXME: need to provide precise fault address */ | ||
2168 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
2169 | return ret; | ||
2170 | } | ||
2171 | |||
2172 | save_state_to_tss16(ctxt, ops, &tss_seg); | ||
2173 | |||
2174 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2175 | &err); | ||
2176 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2177 | /* FIXME: need to provide precise fault address */ | ||
2178 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
2179 | return ret; | ||
2180 | } | ||
2181 | |||
2182 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2183 | &err); | ||
2184 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2185 | /* FIXME: need to provide precise fault address */ | ||
2186 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
2187 | return ret; | ||
2188 | } | ||
2189 | |||
2190 | if (old_tss_sel != 0xffff) { | ||
2191 | tss_seg.prev_task_link = old_tss_sel; | ||
2192 | |||
2193 | ret = ops->write_std(new_tss_base, | ||
2194 | &tss_seg.prev_task_link, | ||
2195 | sizeof tss_seg.prev_task_link, | ||
2196 | ctxt->vcpu, &err); | ||
2197 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2198 | /* FIXME: need to provide precise fault address */ | ||
2199 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
2200 | return ret; | ||
2201 | } | ||
2202 | } | ||
2203 | |||
2204 | return load_state_from_tss16(ctxt, ops, &tss_seg); | ||
2205 | } | ||
2206 | |||
2207 | static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, | ||
2208 | struct x86_emulate_ops *ops, | ||
2209 | struct tss_segment_32 *tss) | ||
2210 | { | ||
2211 | struct decode_cache *c = &ctxt->decode; | ||
2212 | |||
2213 | tss->cr3 = ops->get_cr(3, ctxt->vcpu); | ||
2214 | tss->eip = c->eip; | ||
2215 | tss->eflags = ctxt->eflags; | ||
2216 | tss->eax = c->regs[VCPU_REGS_RAX]; | ||
2217 | tss->ecx = c->regs[VCPU_REGS_RCX]; | ||
2218 | tss->edx = c->regs[VCPU_REGS_RDX]; | ||
2219 | tss->ebx = c->regs[VCPU_REGS_RBX]; | ||
2220 | tss->esp = c->regs[VCPU_REGS_RSP]; | ||
2221 | tss->ebp = c->regs[VCPU_REGS_RBP]; | ||
2222 | tss->esi = c->regs[VCPU_REGS_RSI]; | ||
2223 | tss->edi = c->regs[VCPU_REGS_RDI]; | ||
2224 | |||
2225 | tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); | ||
2226 | tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); | ||
2227 | tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); | ||
2228 | tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); | ||
2229 | tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu); | ||
2230 | tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu); | ||
2231 | tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); | ||
2232 | } | ||
2233 | |||
2234 | static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | ||
2235 | struct x86_emulate_ops *ops, | ||
2236 | struct tss_segment_32 *tss) | ||
2237 | { | ||
2238 | struct decode_cache *c = &ctxt->decode; | ||
2239 | int ret; | ||
2240 | |||
2241 | ops->set_cr(3, tss->cr3, ctxt->vcpu); | ||
2242 | c->eip = tss->eip; | ||
2243 | ctxt->eflags = tss->eflags | 2; | ||
2244 | c->regs[VCPU_REGS_RAX] = tss->eax; | ||
2245 | c->regs[VCPU_REGS_RCX] = tss->ecx; | ||
2246 | c->regs[VCPU_REGS_RDX] = tss->edx; | ||
2247 | c->regs[VCPU_REGS_RBX] = tss->ebx; | ||
2248 | c->regs[VCPU_REGS_RSP] = tss->esp; | ||
2249 | c->regs[VCPU_REGS_RBP] = tss->ebp; | ||
2250 | c->regs[VCPU_REGS_RSI] = tss->esi; | ||
2251 | c->regs[VCPU_REGS_RDI] = tss->edi; | ||
2252 | |||
2253 | /* | ||
2254 | * SDM says that segment selectors are loaded before segment | ||
2255 | * descriptors | ||
2256 | */ | ||
2257 | ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu); | ||
2258 | ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); | ||
2259 | ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); | ||
2260 | ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); | ||
2261 | ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); | ||
2262 | ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu); | ||
2263 | ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu); | ||
2264 | |||
2265 | /* | ||
2266 | * Now load segment descriptors. If fault happenes at this stage | ||
2267 | * it is handled in a context of new task | ||
2268 | */ | ||
2269 | ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR); | ||
2270 | if (ret != X86EMUL_CONTINUE) | ||
2271 | return ret; | ||
2272 | ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES); | ||
2273 | if (ret != X86EMUL_CONTINUE) | ||
2274 | return ret; | ||
2275 | ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS); | ||
2276 | if (ret != X86EMUL_CONTINUE) | ||
2277 | return ret; | ||
2278 | ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS); | ||
2279 | if (ret != X86EMUL_CONTINUE) | ||
2280 | return ret; | ||
2281 | ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS); | ||
2282 | if (ret != X86EMUL_CONTINUE) | ||
2283 | return ret; | ||
2284 | ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS); | ||
2285 | if (ret != X86EMUL_CONTINUE) | ||
2286 | return ret; | ||
2287 | ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS); | ||
2288 | if (ret != X86EMUL_CONTINUE) | ||
2289 | return ret; | ||
2290 | |||
2291 | return X86EMUL_CONTINUE; | ||
2292 | } | ||
2293 | |||
2294 | static int task_switch_32(struct x86_emulate_ctxt *ctxt, | ||
2295 | struct x86_emulate_ops *ops, | ||
2296 | u16 tss_selector, u16 old_tss_sel, | ||
2297 | ulong old_tss_base, struct desc_struct *new_desc) | ||
2298 | { | ||
2299 | struct tss_segment_32 tss_seg; | ||
2300 | int ret; | ||
2301 | u32 err, new_tss_base = get_desc_base(new_desc); | ||
2302 | |||
2303 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2304 | &err); | ||
2305 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2306 | /* FIXME: need to provide precise fault address */ | ||
2307 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
2308 | return ret; | ||
2309 | } | ||
2310 | |||
2311 | save_state_to_tss32(ctxt, ops, &tss_seg); | ||
2312 | |||
2313 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2314 | &err); | ||
2315 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2316 | /* FIXME: need to provide precise fault address */ | ||
2317 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
2318 | return ret; | ||
2319 | } | ||
2320 | |||
2321 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2322 | &err); | ||
2323 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2324 | /* FIXME: need to provide precise fault address */ | ||
2325 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
2326 | return ret; | ||
2327 | } | ||
2328 | |||
2329 | if (old_tss_sel != 0xffff) { | ||
2330 | tss_seg.prev_task_link = old_tss_sel; | ||
2331 | |||
2332 | ret = ops->write_std(new_tss_base, | ||
2333 | &tss_seg.prev_task_link, | ||
2334 | sizeof tss_seg.prev_task_link, | ||
2335 | ctxt->vcpu, &err); | ||
2336 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2337 | /* FIXME: need to provide precise fault address */ | ||
2338 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
2339 | return ret; | ||
2340 | } | ||
2341 | } | ||
2342 | |||
2343 | return load_state_from_tss32(ctxt, ops, &tss_seg); | ||
2344 | } | ||
2345 | |||
2346 | static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | ||
2347 | struct x86_emulate_ops *ops, | ||
2348 | u16 tss_selector, int reason, | ||
2349 | bool has_error_code, u32 error_code) | ||
2350 | { | ||
2351 | struct desc_struct curr_tss_desc, next_tss_desc; | ||
2352 | int ret; | ||
2353 | u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); | ||
2354 | ulong old_tss_base = | ||
2355 | get_cached_descriptor_base(ctxt, ops, VCPU_SREG_TR); | ||
2356 | u32 desc_limit; | ||
2357 | |||
2358 | /* FIXME: old_tss_base == ~0 ? */ | ||
2359 | |||
2360 | ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc); | ||
2361 | if (ret != X86EMUL_CONTINUE) | ||
2362 | return ret; | ||
2363 | ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc); | ||
2364 | if (ret != X86EMUL_CONTINUE) | ||
2365 | return ret; | ||
2366 | |||
2367 | /* FIXME: check that next_tss_desc is tss */ | ||
2368 | |||
2369 | if (reason != TASK_SWITCH_IRET) { | ||
2370 | if ((tss_selector & 3) > next_tss_desc.dpl || | ||
2371 | ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) { | ||
2372 | kvm_inject_gp(ctxt->vcpu, 0); | ||
2373 | return X86EMUL_PROPAGATE_FAULT; | ||
2374 | } | ||
2375 | } | ||
2376 | |||
2377 | desc_limit = desc_limit_scaled(&next_tss_desc); | ||
2378 | if (!next_tss_desc.p || | ||
2379 | ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || | ||
2380 | desc_limit < 0x2b)) { | ||
2381 | kvm_queue_exception_e(ctxt->vcpu, TS_VECTOR, | ||
2382 | tss_selector & 0xfffc); | ||
2383 | return X86EMUL_PROPAGATE_FAULT; | ||
2384 | } | ||
2385 | |||
2386 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { | ||
2387 | curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */ | ||
2388 | write_segment_descriptor(ctxt, ops, old_tss_sel, | ||
2389 | &curr_tss_desc); | ||
2390 | } | ||
2391 | |||
2392 | if (reason == TASK_SWITCH_IRET) | ||
2393 | ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT; | ||
2394 | |||
2395 | /* set back link to prev task only if NT bit is set in eflags | ||
2396 | note that old_tss_sel is not used afetr this point */ | ||
2397 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | ||
2398 | old_tss_sel = 0xffff; | ||
2399 | |||
2400 | if (next_tss_desc.type & 8) | ||
2401 | ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel, | ||
2402 | old_tss_base, &next_tss_desc); | ||
2403 | else | ||
2404 | ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel, | ||
2405 | old_tss_base, &next_tss_desc); | ||
2406 | if (ret != X86EMUL_CONTINUE) | ||
2407 | return ret; | ||
2408 | |||
2409 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) | ||
2410 | ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT; | ||
2411 | |||
2412 | if (reason != TASK_SWITCH_IRET) { | ||
2413 | next_tss_desc.type |= (1 << 1); /* set busy flag */ | ||
2414 | write_segment_descriptor(ctxt, ops, tss_selector, | ||
2415 | &next_tss_desc); | ||
2416 | } | ||
2417 | |||
2418 | ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); | ||
2419 | ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu); | ||
2420 | ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu); | ||
2421 | |||
2422 | if (has_error_code) { | ||
2423 | struct decode_cache *c = &ctxt->decode; | ||
2424 | |||
2425 | c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; | ||
2426 | c->lock_prefix = 0; | ||
2427 | c->src.val = (unsigned long) error_code; | ||
2428 | emulate_push(ctxt); | ||
2429 | } | ||
2430 | |||
2431 | return ret; | ||
2432 | } | ||
2433 | |||
2434 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | ||
2435 | struct x86_emulate_ops *ops, | ||
2436 | u16 tss_selector, int reason, | ||
2437 | bool has_error_code, u32 error_code) | ||
2438 | { | ||
2439 | struct decode_cache *c = &ctxt->decode; | ||
2440 | int rc; | ||
2441 | |||
2442 | memset(c, 0, sizeof(struct decode_cache)); | ||
2443 | c->eip = ctxt->eip; | ||
2444 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | ||
2445 | c->dst.type = OP_NONE; | ||
2446 | |||
2447 | rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, | ||
2448 | has_error_code, error_code); | ||
2449 | |||
2450 | if (rc == X86EMUL_CONTINUE) { | ||
2451 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); | ||
2452 | kvm_rip_write(ctxt->vcpu, c->eip); | ||
2453 | rc = writeback(ctxt, ops); | ||
2454 | } | ||
2455 | |||
2456 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | ||
2457 | } | ||
2458 | |||
2459 | static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned long base, | ||
2460 | int reg, struct operand *op) | ||
2461 | { | ||
2462 | struct decode_cache *c = &ctxt->decode; | ||
2463 | int df = (ctxt->eflags & EFLG_DF) ? -1 : 1; | ||
2464 | |||
2465 | register_address_increment(c, &c->regs[reg], df * op->bytes); | ||
2466 | op->ptr = (unsigned long *)register_address(c, base, c->regs[reg]); | ||
2467 | } | ||
2468 | |||
1810 | int | 2469 | int |
1811 | x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | 2470 | x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
1812 | { | 2471 | { |
1813 | unsigned long memop = 0; | ||
1814 | u64 msr_data; | 2472 | u64 msr_data; |
1815 | unsigned long saved_eip = 0; | ||
1816 | struct decode_cache *c = &ctxt->decode; | 2473 | struct decode_cache *c = &ctxt->decode; |
1817 | unsigned int port; | 2474 | int rc = X86EMUL_CONTINUE; |
1818 | int io_dir_in; | 2475 | int saved_dst_type = c->dst.type; |
1819 | int rc = 0; | ||
1820 | 2476 | ||
1821 | ctxt->interruptibility = 0; | 2477 | ctxt->interruptibility = 0; |
1822 | 2478 | ||
@@ -1826,26 +2482,30 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1826 | */ | 2482 | */ |
1827 | 2483 | ||
1828 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | 2484 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); |
1829 | saved_eip = c->eip; | 2485 | |
2486 | if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | ||
2487 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
2488 | goto done; | ||
2489 | } | ||
1830 | 2490 | ||
1831 | /* LOCK prefix is allowed only with some instructions */ | 2491 | /* LOCK prefix is allowed only with some instructions */ |
1832 | if (c->lock_prefix && !(c->d & Lock)) { | 2492 | if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) { |
1833 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 2493 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
1834 | goto done; | 2494 | goto done; |
1835 | } | 2495 | } |
1836 | 2496 | ||
1837 | /* Privileged instruction can be executed only in CPL=0 */ | 2497 | /* Privileged instruction can be executed only in CPL=0 */ |
1838 | if ((c->d & Priv) && kvm_x86_ops->get_cpl(ctxt->vcpu)) { | 2498 | if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { |
1839 | kvm_inject_gp(ctxt->vcpu, 0); | 2499 | kvm_inject_gp(ctxt->vcpu, 0); |
1840 | goto done; | 2500 | goto done; |
1841 | } | 2501 | } |
1842 | 2502 | ||
1843 | if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs)) | ||
1844 | memop = c->modrm_ea; | ||
1845 | |||
1846 | if (c->rep_prefix && (c->d & String)) { | 2503 | if (c->rep_prefix && (c->d & String)) { |
2504 | ctxt->restart = true; | ||
1847 | /* All REP prefixes have the same first termination condition */ | 2505 | /* All REP prefixes have the same first termination condition */ |
1848 | if (c->regs[VCPU_REGS_RCX] == 0) { | 2506 | if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { |
2507 | string_done: | ||
2508 | ctxt->restart = false; | ||
1849 | kvm_rip_write(ctxt->vcpu, c->eip); | 2509 | kvm_rip_write(ctxt->vcpu, c->eip); |
1850 | goto done; | 2510 | goto done; |
1851 | } | 2511 | } |
@@ -1857,25 +2517,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1857 | * - if REPNE/REPNZ and ZF = 1 then done | 2517 | * - if REPNE/REPNZ and ZF = 1 then done |
1858 | */ | 2518 | */ |
1859 | if ((c->b == 0xa6) || (c->b == 0xa7) || | 2519 | if ((c->b == 0xa6) || (c->b == 0xa7) || |
1860 | (c->b == 0xae) || (c->b == 0xaf)) { | 2520 | (c->b == 0xae) || (c->b == 0xaf)) { |
1861 | if ((c->rep_prefix == REPE_PREFIX) && | 2521 | if ((c->rep_prefix == REPE_PREFIX) && |
1862 | ((ctxt->eflags & EFLG_ZF) == 0)) { | 2522 | ((ctxt->eflags & EFLG_ZF) == 0)) |
1863 | kvm_rip_write(ctxt->vcpu, c->eip); | 2523 | goto string_done; |
1864 | goto done; | ||
1865 | } | ||
1866 | if ((c->rep_prefix == REPNE_PREFIX) && | 2524 | if ((c->rep_prefix == REPNE_PREFIX) && |
1867 | ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) { | 2525 | ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) |
1868 | kvm_rip_write(ctxt->vcpu, c->eip); | 2526 | goto string_done; |
1869 | goto done; | ||
1870 | } | ||
1871 | } | 2527 | } |
1872 | c->regs[VCPU_REGS_RCX]--; | 2528 | c->eip = ctxt->eip; |
1873 | c->eip = kvm_rip_read(ctxt->vcpu); | ||
1874 | } | 2529 | } |
1875 | 2530 | ||
1876 | if (c->src.type == OP_MEM) { | 2531 | if (c->src.type == OP_MEM) { |
1877 | c->src.ptr = (unsigned long *)memop; | ||
1878 | c->src.val = 0; | ||
1879 | rc = ops->read_emulated((unsigned long)c->src.ptr, | 2532 | rc = ops->read_emulated((unsigned long)c->src.ptr, |
1880 | &c->src.val, | 2533 | &c->src.val, |
1881 | c->src.bytes, | 2534 | c->src.bytes, |
@@ -1885,29 +2538,25 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1885 | c->src.orig_val = c->src.val; | 2538 | c->src.orig_val = c->src.val; |
1886 | } | 2539 | } |
1887 | 2540 | ||
2541 | if (c->src2.type == OP_MEM) { | ||
2542 | rc = ops->read_emulated((unsigned long)c->src2.ptr, | ||
2543 | &c->src2.val, | ||
2544 | c->src2.bytes, | ||
2545 | ctxt->vcpu); | ||
2546 | if (rc != X86EMUL_CONTINUE) | ||
2547 | goto done; | ||
2548 | } | ||
2549 | |||
1888 | if ((c->d & DstMask) == ImplicitOps) | 2550 | if ((c->d & DstMask) == ImplicitOps) |
1889 | goto special_insn; | 2551 | goto special_insn; |
1890 | 2552 | ||
1891 | 2553 | ||
1892 | if (c->dst.type == OP_MEM) { | 2554 | if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { |
1893 | c->dst.ptr = (unsigned long *)memop; | 2555 | /* optimisation - avoid slow emulated read if Mov */ |
1894 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 2556 | rc = ops->read_emulated((unsigned long)c->dst.ptr, &c->dst.val, |
1895 | c->dst.val = 0; | 2557 | c->dst.bytes, ctxt->vcpu); |
1896 | if (c->d & BitOp) { | 2558 | if (rc != X86EMUL_CONTINUE) |
1897 | unsigned long mask = ~(c->dst.bytes * 8 - 1); | 2559 | goto done; |
1898 | |||
1899 | c->dst.ptr = (void *)c->dst.ptr + | ||
1900 | (c->src.val & mask) / 8; | ||
1901 | } | ||
1902 | if (!(c->d & Mov)) { | ||
1903 | /* optimisation - avoid slow emulated read */ | ||
1904 | rc = ops->read_emulated((unsigned long)c->dst.ptr, | ||
1905 | &c->dst.val, | ||
1906 | c->dst.bytes, | ||
1907 | ctxt->vcpu); | ||
1908 | if (rc != X86EMUL_CONTINUE) | ||
1909 | goto done; | ||
1910 | } | ||
1911 | } | 2560 | } |
1912 | c->dst.orig_val = c->dst.val; | 2561 | c->dst.orig_val = c->dst.val; |
1913 | 2562 | ||
@@ -1926,7 +2575,7 @@ special_insn: | |||
1926 | break; | 2575 | break; |
1927 | case 0x07: /* pop es */ | 2576 | case 0x07: /* pop es */ |
1928 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); | 2577 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); |
1929 | if (rc != 0) | 2578 | if (rc != X86EMUL_CONTINUE) |
1930 | goto done; | 2579 | goto done; |
1931 | break; | 2580 | break; |
1932 | case 0x08 ... 0x0d: | 2581 | case 0x08 ... 0x0d: |
@@ -1945,7 +2594,7 @@ special_insn: | |||
1945 | break; | 2594 | break; |
1946 | case 0x17: /* pop ss */ | 2595 | case 0x17: /* pop ss */ |
1947 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); | 2596 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); |
1948 | if (rc != 0) | 2597 | if (rc != X86EMUL_CONTINUE) |
1949 | goto done; | 2598 | goto done; |
1950 | break; | 2599 | break; |
1951 | case 0x18 ... 0x1d: | 2600 | case 0x18 ... 0x1d: |
@@ -1957,7 +2606,7 @@ special_insn: | |||
1957 | break; | 2606 | break; |
1958 | case 0x1f: /* pop ds */ | 2607 | case 0x1f: /* pop ds */ |
1959 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); | 2608 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); |
1960 | if (rc != 0) | 2609 | if (rc != X86EMUL_CONTINUE) |
1961 | goto done; | 2610 | goto done; |
1962 | break; | 2611 | break; |
1963 | case 0x20 ... 0x25: | 2612 | case 0x20 ... 0x25: |
@@ -1988,7 +2637,7 @@ special_insn: | |||
1988 | case 0x58 ... 0x5f: /* pop reg */ | 2637 | case 0x58 ... 0x5f: /* pop reg */ |
1989 | pop_instruction: | 2638 | pop_instruction: |
1990 | rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); | 2639 | rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); |
1991 | if (rc != 0) | 2640 | if (rc != X86EMUL_CONTINUE) |
1992 | goto done; | 2641 | goto done; |
1993 | break; | 2642 | break; |
1994 | case 0x60: /* pusha */ | 2643 | case 0x60: /* pusha */ |
@@ -1996,7 +2645,7 @@ special_insn: | |||
1996 | break; | 2645 | break; |
1997 | case 0x61: /* popa */ | 2646 | case 0x61: /* popa */ |
1998 | rc = emulate_popa(ctxt, ops); | 2647 | rc = emulate_popa(ctxt, ops); |
1999 | if (rc != 0) | 2648 | if (rc != X86EMUL_CONTINUE) |
2000 | goto done; | 2649 | goto done; |
2001 | break; | 2650 | break; |
2002 | case 0x63: /* movsxd */ | 2651 | case 0x63: /* movsxd */ |
@@ -2010,47 +2659,29 @@ special_insn: | |||
2010 | break; | 2659 | break; |
2011 | case 0x6c: /* insb */ | 2660 | case 0x6c: /* insb */ |
2012 | case 0x6d: /* insw/insd */ | 2661 | case 0x6d: /* insw/insd */ |
2662 | c->dst.bytes = min(c->dst.bytes, 4u); | ||
2013 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], | 2663 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], |
2014 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | 2664 | c->dst.bytes)) { |
2015 | kvm_inject_gp(ctxt->vcpu, 0); | 2665 | kvm_inject_gp(ctxt->vcpu, 0); |
2016 | goto done; | 2666 | goto done; |
2017 | } | 2667 | } |
2018 | if (kvm_emulate_pio_string(ctxt->vcpu, | 2668 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, |
2019 | 1, | 2669 | c->regs[VCPU_REGS_RDX], &c->dst.val)) |
2020 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2670 | goto done; /* IO is needed, skip writeback */ |
2021 | c->rep_prefix ? | 2671 | break; |
2022 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, | ||
2023 | (ctxt->eflags & EFLG_DF), | ||
2024 | register_address(c, es_base(ctxt), | ||
2025 | c->regs[VCPU_REGS_RDI]), | ||
2026 | c->rep_prefix, | ||
2027 | c->regs[VCPU_REGS_RDX]) == 0) { | ||
2028 | c->eip = saved_eip; | ||
2029 | return -1; | ||
2030 | } | ||
2031 | return 0; | ||
2032 | case 0x6e: /* outsb */ | 2672 | case 0x6e: /* outsb */ |
2033 | case 0x6f: /* outsw/outsd */ | 2673 | case 0x6f: /* outsw/outsd */ |
2674 | c->src.bytes = min(c->src.bytes, 4u); | ||
2034 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], | 2675 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], |
2035 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | 2676 | c->src.bytes)) { |
2036 | kvm_inject_gp(ctxt->vcpu, 0); | 2677 | kvm_inject_gp(ctxt->vcpu, 0); |
2037 | goto done; | 2678 | goto done; |
2038 | } | 2679 | } |
2039 | if (kvm_emulate_pio_string(ctxt->vcpu, | 2680 | ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX], |
2040 | 0, | 2681 | &c->src.val, 1, ctxt->vcpu); |
2041 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2682 | |
2042 | c->rep_prefix ? | 2683 | c->dst.type = OP_NONE; /* nothing to writeback */ |
2043 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, | 2684 | break; |
2044 | (ctxt->eflags & EFLG_DF), | ||
2045 | register_address(c, | ||
2046 | seg_override_base(ctxt, c), | ||
2047 | c->regs[VCPU_REGS_RSI]), | ||
2048 | c->rep_prefix, | ||
2049 | c->regs[VCPU_REGS_RDX]) == 0) { | ||
2050 | c->eip = saved_eip; | ||
2051 | return -1; | ||
2052 | } | ||
2053 | return 0; | ||
2054 | case 0x70 ... 0x7f: /* jcc (short) */ | 2685 | case 0x70 ... 0x7f: /* jcc (short) */ |
2055 | if (test_cc(c->b, ctxt->eflags)) | 2686 | if (test_cc(c->b, ctxt->eflags)) |
2056 | jmp_rel(c, c->src.val); | 2687 | jmp_rel(c, c->src.val); |
@@ -2107,12 +2738,11 @@ special_insn: | |||
2107 | case 0x8c: { /* mov r/m, sreg */ | 2738 | case 0x8c: { /* mov r/m, sreg */ |
2108 | struct kvm_segment segreg; | 2739 | struct kvm_segment segreg; |
2109 | 2740 | ||
2110 | if (c->modrm_reg <= 5) | 2741 | if (c->modrm_reg <= VCPU_SREG_GS) |
2111 | kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); | 2742 | kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); |
2112 | else { | 2743 | else { |
2113 | printk(KERN_INFO "0x8c: Invalid segreg in modrm byte 0x%02x\n", | 2744 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
2114 | c->modrm); | 2745 | goto done; |
2115 | goto cannot_emulate; | ||
2116 | } | 2746 | } |
2117 | c->dst.val = segreg.selector; | 2747 | c->dst.val = segreg.selector; |
2118 | break; | 2748 | break; |
@@ -2132,16 +2762,16 @@ special_insn: | |||
2132 | } | 2762 | } |
2133 | 2763 | ||
2134 | if (c->modrm_reg == VCPU_SREG_SS) | 2764 | if (c->modrm_reg == VCPU_SREG_SS) |
2135 | toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS); | 2765 | toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_MOV_SS); |
2136 | 2766 | ||
2137 | rc = kvm_load_segment_descriptor(ctxt->vcpu, sel, c->modrm_reg); | 2767 | rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg); |
2138 | 2768 | ||
2139 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2769 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2140 | break; | 2770 | break; |
2141 | } | 2771 | } |
2142 | case 0x8f: /* pop (sole member of Grp1a) */ | 2772 | case 0x8f: /* pop (sole member of Grp1a) */ |
2143 | rc = emulate_grp1a(ctxt, ops); | 2773 | rc = emulate_grp1a(ctxt, ops); |
2144 | if (rc != 0) | 2774 | if (rc != X86EMUL_CONTINUE) |
2145 | goto done; | 2775 | goto done; |
2146 | break; | 2776 | break; |
2147 | case 0x90: /* nop / xchg r8,rax */ | 2777 | case 0x90: /* nop / xchg r8,rax */ |
@@ -2175,89 +2805,16 @@ special_insn: | |||
2175 | c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX]; | 2805 | c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX]; |
2176 | break; | 2806 | break; |
2177 | case 0xa4 ... 0xa5: /* movs */ | 2807 | case 0xa4 ... 0xa5: /* movs */ |
2178 | c->dst.type = OP_MEM; | 2808 | goto mov; |
2179 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
2180 | c->dst.ptr = (unsigned long *)register_address(c, | ||
2181 | es_base(ctxt), | ||
2182 | c->regs[VCPU_REGS_RDI]); | ||
2183 | rc = ops->read_emulated(register_address(c, | ||
2184 | seg_override_base(ctxt, c), | ||
2185 | c->regs[VCPU_REGS_RSI]), | ||
2186 | &c->dst.val, | ||
2187 | c->dst.bytes, ctxt->vcpu); | ||
2188 | if (rc != X86EMUL_CONTINUE) | ||
2189 | goto done; | ||
2190 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | ||
2191 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
2192 | : c->dst.bytes); | ||
2193 | register_address_increment(c, &c->regs[VCPU_REGS_RDI], | ||
2194 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
2195 | : c->dst.bytes); | ||
2196 | break; | ||
2197 | case 0xa6 ... 0xa7: /* cmps */ | 2809 | case 0xa6 ... 0xa7: /* cmps */ |
2198 | c->src.type = OP_NONE; /* Disable writeback. */ | ||
2199 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
2200 | c->src.ptr = (unsigned long *)register_address(c, | ||
2201 | seg_override_base(ctxt, c), | ||
2202 | c->regs[VCPU_REGS_RSI]); | ||
2203 | rc = ops->read_emulated((unsigned long)c->src.ptr, | ||
2204 | &c->src.val, | ||
2205 | c->src.bytes, | ||
2206 | ctxt->vcpu); | ||
2207 | if (rc != X86EMUL_CONTINUE) | ||
2208 | goto done; | ||
2209 | |||
2210 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2810 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2211 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
2212 | c->dst.ptr = (unsigned long *)register_address(c, | ||
2213 | es_base(ctxt), | ||
2214 | c->regs[VCPU_REGS_RDI]); | ||
2215 | rc = ops->read_emulated((unsigned long)c->dst.ptr, | ||
2216 | &c->dst.val, | ||
2217 | c->dst.bytes, | ||
2218 | ctxt->vcpu); | ||
2219 | if (rc != X86EMUL_CONTINUE) | ||
2220 | goto done; | ||
2221 | |||
2222 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); | 2811 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); |
2223 | 2812 | goto cmp; | |
2224 | emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); | ||
2225 | |||
2226 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | ||
2227 | (ctxt->eflags & EFLG_DF) ? -c->src.bytes | ||
2228 | : c->src.bytes); | ||
2229 | register_address_increment(c, &c->regs[VCPU_REGS_RDI], | ||
2230 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
2231 | : c->dst.bytes); | ||
2232 | |||
2233 | break; | ||
2234 | case 0xaa ... 0xab: /* stos */ | 2813 | case 0xaa ... 0xab: /* stos */ |
2235 | c->dst.type = OP_MEM; | ||
2236 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
2237 | c->dst.ptr = (unsigned long *)register_address(c, | ||
2238 | es_base(ctxt), | ||
2239 | c->regs[VCPU_REGS_RDI]); | ||
2240 | c->dst.val = c->regs[VCPU_REGS_RAX]; | 2814 | c->dst.val = c->regs[VCPU_REGS_RAX]; |
2241 | register_address_increment(c, &c->regs[VCPU_REGS_RDI], | ||
2242 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
2243 | : c->dst.bytes); | ||
2244 | break; | 2815 | break; |
2245 | case 0xac ... 0xad: /* lods */ | 2816 | case 0xac ... 0xad: /* lods */ |
2246 | c->dst.type = OP_REG; | 2817 | goto mov; |
2247 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
2248 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; | ||
2249 | rc = ops->read_emulated(register_address(c, | ||
2250 | seg_override_base(ctxt, c), | ||
2251 | c->regs[VCPU_REGS_RSI]), | ||
2252 | &c->dst.val, | ||
2253 | c->dst.bytes, | ||
2254 | ctxt->vcpu); | ||
2255 | if (rc != X86EMUL_CONTINUE) | ||
2256 | goto done; | ||
2257 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | ||
2258 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
2259 | : c->dst.bytes); | ||
2260 | break; | ||
2261 | case 0xae ... 0xaf: /* scas */ | 2818 | case 0xae ... 0xaf: /* scas */ |
2262 | DPRINTF("Urk! I don't handle SCAS.\n"); | 2819 | DPRINTF("Urk! I don't handle SCAS.\n"); |
2263 | goto cannot_emulate; | 2820 | goto cannot_emulate; |
@@ -2277,7 +2834,7 @@ special_insn: | |||
2277 | break; | 2834 | break; |
2278 | case 0xcb: /* ret far */ | 2835 | case 0xcb: /* ret far */ |
2279 | rc = emulate_ret_far(ctxt, ops); | 2836 | rc = emulate_ret_far(ctxt, ops); |
2280 | if (rc) | 2837 | if (rc != X86EMUL_CONTINUE) |
2281 | goto done; | 2838 | goto done; |
2282 | break; | 2839 | break; |
2283 | case 0xd0 ... 0xd1: /* Grp2 */ | 2840 | case 0xd0 ... 0xd1: /* Grp2 */ |
@@ -2290,14 +2847,10 @@ special_insn: | |||
2290 | break; | 2847 | break; |
2291 | case 0xe4: /* inb */ | 2848 | case 0xe4: /* inb */ |
2292 | case 0xe5: /* in */ | 2849 | case 0xe5: /* in */ |
2293 | port = c->src.val; | 2850 | goto do_io_in; |
2294 | io_dir_in = 1; | ||
2295 | goto do_io; | ||
2296 | case 0xe6: /* outb */ | 2851 | case 0xe6: /* outb */ |
2297 | case 0xe7: /* out */ | 2852 | case 0xe7: /* out */ |
2298 | port = c->src.val; | 2853 | goto do_io_out; |
2299 | io_dir_in = 0; | ||
2300 | goto do_io; | ||
2301 | case 0xe8: /* call (near) */ { | 2854 | case 0xe8: /* call (near) */ { |
2302 | long int rel = c->src.val; | 2855 | long int rel = c->src.val; |
2303 | c->src.val = (unsigned long) c->eip; | 2856 | c->src.val = (unsigned long) c->eip; |
@@ -2308,8 +2861,9 @@ special_insn: | |||
2308 | case 0xe9: /* jmp rel */ | 2861 | case 0xe9: /* jmp rel */ |
2309 | goto jmp; | 2862 | goto jmp; |
2310 | case 0xea: /* jmp far */ | 2863 | case 0xea: /* jmp far */ |
2311 | if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, | 2864 | jump_far: |
2312 | VCPU_SREG_CS)) | 2865 | if (load_segment_descriptor(ctxt, ops, c->src2.val, |
2866 | VCPU_SREG_CS)) | ||
2313 | goto done; | 2867 | goto done; |
2314 | 2868 | ||
2315 | c->eip = c->src.val; | 2869 | c->eip = c->src.val; |
@@ -2321,25 +2875,29 @@ special_insn: | |||
2321 | break; | 2875 | break; |
2322 | case 0xec: /* in al,dx */ | 2876 | case 0xec: /* in al,dx */ |
2323 | case 0xed: /* in (e/r)ax,dx */ | 2877 | case 0xed: /* in (e/r)ax,dx */ |
2324 | port = c->regs[VCPU_REGS_RDX]; | 2878 | c->src.val = c->regs[VCPU_REGS_RDX]; |
2325 | io_dir_in = 1; | 2879 | do_io_in: |
2326 | goto do_io; | 2880 | c->dst.bytes = min(c->dst.bytes, 4u); |
2881 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { | ||
2882 | kvm_inject_gp(ctxt->vcpu, 0); | ||
2883 | goto done; | ||
2884 | } | ||
2885 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, | ||
2886 | &c->dst.val)) | ||
2887 | goto done; /* IO is needed */ | ||
2888 | break; | ||
2327 | case 0xee: /* out al,dx */ | 2889 | case 0xee: /* out al,dx */ |
2328 | case 0xef: /* out (e/r)ax,dx */ | 2890 | case 0xef: /* out (e/r)ax,dx */ |
2329 | port = c->regs[VCPU_REGS_RDX]; | 2891 | c->src.val = c->regs[VCPU_REGS_RDX]; |
2330 | io_dir_in = 0; | 2892 | do_io_out: |
2331 | do_io: | 2893 | c->dst.bytes = min(c->dst.bytes, 4u); |
2332 | if (!emulator_io_permited(ctxt, ops, port, | 2894 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { |
2333 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | ||
2334 | kvm_inject_gp(ctxt->vcpu, 0); | 2895 | kvm_inject_gp(ctxt->vcpu, 0); |
2335 | goto done; | 2896 | goto done; |
2336 | } | 2897 | } |
2337 | if (kvm_emulate_pio(ctxt->vcpu, io_dir_in, | 2898 | ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1, |
2338 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2899 | ctxt->vcpu); |
2339 | port) != 0) { | 2900 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2340 | c->eip = saved_eip; | ||
2341 | goto cannot_emulate; | ||
2342 | } | ||
2343 | break; | 2901 | break; |
2344 | case 0xf4: /* hlt */ | 2902 | case 0xf4: /* hlt */ |
2345 | ctxt->vcpu->arch.halt_request = 1; | 2903 | ctxt->vcpu->arch.halt_request = 1; |
@@ -2350,16 +2908,15 @@ special_insn: | |||
2350 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2908 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2351 | break; | 2909 | break; |
2352 | case 0xf6 ... 0xf7: /* Grp3 */ | 2910 | case 0xf6 ... 0xf7: /* Grp3 */ |
2353 | rc = emulate_grp3(ctxt, ops); | 2911 | if (!emulate_grp3(ctxt, ops)) |
2354 | if (rc != 0) | 2912 | goto cannot_emulate; |
2355 | goto done; | ||
2356 | break; | 2913 | break; |
2357 | case 0xf8: /* clc */ | 2914 | case 0xf8: /* clc */ |
2358 | ctxt->eflags &= ~EFLG_CF; | 2915 | ctxt->eflags &= ~EFLG_CF; |
2359 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2916 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2360 | break; | 2917 | break; |
2361 | case 0xfa: /* cli */ | 2918 | case 0xfa: /* cli */ |
2362 | if (emulator_bad_iopl(ctxt)) | 2919 | if (emulator_bad_iopl(ctxt, ops)) |
2363 | kvm_inject_gp(ctxt->vcpu, 0); | 2920 | kvm_inject_gp(ctxt->vcpu, 0); |
2364 | else { | 2921 | else { |
2365 | ctxt->eflags &= ~X86_EFLAGS_IF; | 2922 | ctxt->eflags &= ~X86_EFLAGS_IF; |
@@ -2367,10 +2924,10 @@ special_insn: | |||
2367 | } | 2924 | } |
2368 | break; | 2925 | break; |
2369 | case 0xfb: /* sti */ | 2926 | case 0xfb: /* sti */ |
2370 | if (emulator_bad_iopl(ctxt)) | 2927 | if (emulator_bad_iopl(ctxt, ops)) |
2371 | kvm_inject_gp(ctxt->vcpu, 0); | 2928 | kvm_inject_gp(ctxt->vcpu, 0); |
2372 | else { | 2929 | else { |
2373 | toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); | 2930 | toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI); |
2374 | ctxt->eflags |= X86_EFLAGS_IF; | 2931 | ctxt->eflags |= X86_EFLAGS_IF; |
2375 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2932 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2376 | } | 2933 | } |
@@ -2383,28 +2940,55 @@ special_insn: | |||
2383 | ctxt->eflags |= EFLG_DF; | 2940 | ctxt->eflags |= EFLG_DF; |
2384 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2941 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2385 | break; | 2942 | break; |
2386 | case 0xfe ... 0xff: /* Grp4/Grp5 */ | 2943 | case 0xfe: /* Grp4 */ |
2944 | grp45: | ||
2387 | rc = emulate_grp45(ctxt, ops); | 2945 | rc = emulate_grp45(ctxt, ops); |
2388 | if (rc != 0) | 2946 | if (rc != X86EMUL_CONTINUE) |
2389 | goto done; | 2947 | goto done; |
2390 | break; | 2948 | break; |
2949 | case 0xff: /* Grp5 */ | ||
2950 | if (c->modrm_reg == 5) | ||
2951 | goto jump_far; | ||
2952 | goto grp45; | ||
2391 | } | 2953 | } |
2392 | 2954 | ||
2393 | writeback: | 2955 | writeback: |
2394 | rc = writeback(ctxt, ops); | 2956 | rc = writeback(ctxt, ops); |
2395 | if (rc != 0) | 2957 | if (rc != X86EMUL_CONTINUE) |
2396 | goto done; | 2958 | goto done; |
2397 | 2959 | ||
2960 | /* | ||
2961 | * restore dst type in case the decoding will be reused | ||
2962 | * (happens for string instruction ) | ||
2963 | */ | ||
2964 | c->dst.type = saved_dst_type; | ||
2965 | |||
2966 | if ((c->d & SrcMask) == SrcSI) | ||
2967 | string_addr_inc(ctxt, seg_override_base(ctxt, c), VCPU_REGS_RSI, | ||
2968 | &c->src); | ||
2969 | |||
2970 | if ((c->d & DstMask) == DstDI) | ||
2971 | string_addr_inc(ctxt, es_base(ctxt), VCPU_REGS_RDI, &c->dst); | ||
2972 | |||
2973 | if (c->rep_prefix && (c->d & String)) { | ||
2974 | struct read_cache *rc = &ctxt->decode.io_read; | ||
2975 | register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1); | ||
2976 | /* | ||
2977 | * Re-enter guest when pio read ahead buffer is empty or, | ||
2978 | * if it is not used, after each 1024 iteration. | ||
2979 | */ | ||
2980 | if ((rc->end == 0 && !(c->regs[VCPU_REGS_RCX] & 0x3ff)) || | ||
2981 | (rc->end != 0 && rc->end == rc->pos)) | ||
2982 | ctxt->restart = false; | ||
2983 | } | ||
2984 | |||
2398 | /* Commit shadow register state. */ | 2985 | /* Commit shadow register state. */ |
2399 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); | 2986 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); |
2400 | kvm_rip_write(ctxt->vcpu, c->eip); | 2987 | kvm_rip_write(ctxt->vcpu, c->eip); |
2988 | ops->set_rflags(ctxt->vcpu, ctxt->eflags); | ||
2401 | 2989 | ||
2402 | done: | 2990 | done: |
2403 | if (rc == X86EMUL_UNHANDLEABLE) { | 2991 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
2404 | c->eip = saved_eip; | ||
2405 | return -1; | ||
2406 | } | ||
2407 | return 0; | ||
2408 | 2992 | ||
2409 | twobyte_insn: | 2993 | twobyte_insn: |
2410 | switch (c->b) { | 2994 | switch (c->b) { |
@@ -2418,18 +3002,18 @@ twobyte_insn: | |||
2418 | goto cannot_emulate; | 3002 | goto cannot_emulate; |
2419 | 3003 | ||
2420 | rc = kvm_fix_hypercall(ctxt->vcpu); | 3004 | rc = kvm_fix_hypercall(ctxt->vcpu); |
2421 | if (rc) | 3005 | if (rc != X86EMUL_CONTINUE) |
2422 | goto done; | 3006 | goto done; |
2423 | 3007 | ||
2424 | /* Let the processor re-execute the fixed hypercall */ | 3008 | /* Let the processor re-execute the fixed hypercall */ |
2425 | c->eip = kvm_rip_read(ctxt->vcpu); | 3009 | c->eip = ctxt->eip; |
2426 | /* Disable writeback. */ | 3010 | /* Disable writeback. */ |
2427 | c->dst.type = OP_NONE; | 3011 | c->dst.type = OP_NONE; |
2428 | break; | 3012 | break; |
2429 | case 2: /* lgdt */ | 3013 | case 2: /* lgdt */ |
2430 | rc = read_descriptor(ctxt, ops, c->src.ptr, | 3014 | rc = read_descriptor(ctxt, ops, c->src.ptr, |
2431 | &size, &address, c->op_bytes); | 3015 | &size, &address, c->op_bytes); |
2432 | if (rc) | 3016 | if (rc != X86EMUL_CONTINUE) |
2433 | goto done; | 3017 | goto done; |
2434 | realmode_lgdt(ctxt->vcpu, size, address); | 3018 | realmode_lgdt(ctxt->vcpu, size, address); |
2435 | /* Disable writeback. */ | 3019 | /* Disable writeback. */ |
@@ -2440,7 +3024,7 @@ twobyte_insn: | |||
2440 | switch (c->modrm_rm) { | 3024 | switch (c->modrm_rm) { |
2441 | case 1: | 3025 | case 1: |
2442 | rc = kvm_fix_hypercall(ctxt->vcpu); | 3026 | rc = kvm_fix_hypercall(ctxt->vcpu); |
2443 | if (rc) | 3027 | if (rc != X86EMUL_CONTINUE) |
2444 | goto done; | 3028 | goto done; |
2445 | break; | 3029 | break; |
2446 | default: | 3030 | default: |
@@ -2450,7 +3034,7 @@ twobyte_insn: | |||
2450 | rc = read_descriptor(ctxt, ops, c->src.ptr, | 3034 | rc = read_descriptor(ctxt, ops, c->src.ptr, |
2451 | &size, &address, | 3035 | &size, &address, |
2452 | c->op_bytes); | 3036 | c->op_bytes); |
2453 | if (rc) | 3037 | if (rc != X86EMUL_CONTINUE) |
2454 | goto done; | 3038 | goto done; |
2455 | realmode_lidt(ctxt->vcpu, size, address); | 3039 | realmode_lidt(ctxt->vcpu, size, address); |
2456 | } | 3040 | } |
@@ -2459,15 +3043,18 @@ twobyte_insn: | |||
2459 | break; | 3043 | break; |
2460 | case 4: /* smsw */ | 3044 | case 4: /* smsw */ |
2461 | c->dst.bytes = 2; | 3045 | c->dst.bytes = 2; |
2462 | c->dst.val = realmode_get_cr(ctxt->vcpu, 0); | 3046 | c->dst.val = ops->get_cr(0, ctxt->vcpu); |
2463 | break; | 3047 | break; |
2464 | case 6: /* lmsw */ | 3048 | case 6: /* lmsw */ |
2465 | realmode_lmsw(ctxt->vcpu, (u16)c->src.val, | 3049 | ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0ful) | |
2466 | &ctxt->eflags); | 3050 | (c->src.val & 0x0f), ctxt->vcpu); |
2467 | c->dst.type = OP_NONE; | 3051 | c->dst.type = OP_NONE; |
2468 | break; | 3052 | break; |
3053 | case 5: /* not defined */ | ||
3054 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
3055 | goto done; | ||
2469 | case 7: /* invlpg*/ | 3056 | case 7: /* invlpg*/ |
2470 | emulate_invlpg(ctxt->vcpu, memop); | 3057 | emulate_invlpg(ctxt->vcpu, c->modrm_ea); |
2471 | /* Disable writeback. */ | 3058 | /* Disable writeback. */ |
2472 | c->dst.type = OP_NONE; | 3059 | c->dst.type = OP_NONE; |
2473 | break; | 3060 | break; |
@@ -2493,54 +3080,54 @@ twobyte_insn: | |||
2493 | c->dst.type = OP_NONE; | 3080 | c->dst.type = OP_NONE; |
2494 | break; | 3081 | break; |
2495 | case 0x20: /* mov cr, reg */ | 3082 | case 0x20: /* mov cr, reg */ |
2496 | if (c->modrm_mod != 3) | 3083 | switch (c->modrm_reg) { |
2497 | goto cannot_emulate; | 3084 | case 1: |
2498 | c->regs[c->modrm_rm] = | 3085 | case 5 ... 7: |
2499 | realmode_get_cr(ctxt->vcpu, c->modrm_reg); | 3086 | case 9 ... 15: |
3087 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
3088 | goto done; | ||
3089 | } | ||
3090 | c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu); | ||
2500 | c->dst.type = OP_NONE; /* no writeback */ | 3091 | c->dst.type = OP_NONE; /* no writeback */ |
2501 | break; | 3092 | break; |
2502 | case 0x21: /* mov from dr to reg */ | 3093 | case 0x21: /* mov from dr to reg */ |
2503 | if (c->modrm_mod != 3) | 3094 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && |
2504 | goto cannot_emulate; | 3095 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { |
2505 | rc = emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); | 3096 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
2506 | if (rc) | 3097 | goto done; |
2507 | goto cannot_emulate; | 3098 | } |
3099 | emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); | ||
2508 | c->dst.type = OP_NONE; /* no writeback */ | 3100 | c->dst.type = OP_NONE; /* no writeback */ |
2509 | break; | 3101 | break; |
2510 | case 0x22: /* mov reg, cr */ | 3102 | case 0x22: /* mov reg, cr */ |
2511 | if (c->modrm_mod != 3) | 3103 | ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu); |
2512 | goto cannot_emulate; | ||
2513 | realmode_set_cr(ctxt->vcpu, | ||
2514 | c->modrm_reg, c->modrm_val, &ctxt->eflags); | ||
2515 | c->dst.type = OP_NONE; | 3104 | c->dst.type = OP_NONE; |
2516 | break; | 3105 | break; |
2517 | case 0x23: /* mov from reg to dr */ | 3106 | case 0x23: /* mov from reg to dr */ |
2518 | if (c->modrm_mod != 3) | 3107 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && |
2519 | goto cannot_emulate; | 3108 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { |
2520 | rc = emulator_set_dr(ctxt, c->modrm_reg, | 3109 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
2521 | c->regs[c->modrm_rm]); | 3110 | goto done; |
2522 | if (rc) | 3111 | } |
2523 | goto cannot_emulate; | 3112 | emulator_set_dr(ctxt, c->modrm_reg, c->regs[c->modrm_rm]); |
2524 | c->dst.type = OP_NONE; /* no writeback */ | 3113 | c->dst.type = OP_NONE; /* no writeback */ |
2525 | break; | 3114 | break; |
2526 | case 0x30: | 3115 | case 0x30: |
2527 | /* wrmsr */ | 3116 | /* wrmsr */ |
2528 | msr_data = (u32)c->regs[VCPU_REGS_RAX] | 3117 | msr_data = (u32)c->regs[VCPU_REGS_RAX] |
2529 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); | 3118 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); |
2530 | rc = kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data); | 3119 | if (kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { |
2531 | if (rc) { | ||
2532 | kvm_inject_gp(ctxt->vcpu, 0); | 3120 | kvm_inject_gp(ctxt->vcpu, 0); |
2533 | c->eip = kvm_rip_read(ctxt->vcpu); | 3121 | goto done; |
2534 | } | 3122 | } |
2535 | rc = X86EMUL_CONTINUE; | 3123 | rc = X86EMUL_CONTINUE; |
2536 | c->dst.type = OP_NONE; | 3124 | c->dst.type = OP_NONE; |
2537 | break; | 3125 | break; |
2538 | case 0x32: | 3126 | case 0x32: |
2539 | /* rdmsr */ | 3127 | /* rdmsr */ |
2540 | rc = kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data); | 3128 | if (kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { |
2541 | if (rc) { | ||
2542 | kvm_inject_gp(ctxt->vcpu, 0); | 3129 | kvm_inject_gp(ctxt->vcpu, 0); |
2543 | c->eip = kvm_rip_read(ctxt->vcpu); | 3130 | goto done; |
2544 | } else { | 3131 | } else { |
2545 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; | 3132 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; |
2546 | c->regs[VCPU_REGS_RDX] = msr_data >> 32; | 3133 | c->regs[VCPU_REGS_RDX] = msr_data >> 32; |
@@ -2577,7 +3164,7 @@ twobyte_insn: | |||
2577 | break; | 3164 | break; |
2578 | case 0xa1: /* pop fs */ | 3165 | case 0xa1: /* pop fs */ |
2579 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); | 3166 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); |
2580 | if (rc != 0) | 3167 | if (rc != X86EMUL_CONTINUE) |
2581 | goto done; | 3168 | goto done; |
2582 | break; | 3169 | break; |
2583 | case 0xa3: | 3170 | case 0xa3: |
@@ -2596,7 +3183,7 @@ twobyte_insn: | |||
2596 | break; | 3183 | break; |
2597 | case 0xa9: /* pop gs */ | 3184 | case 0xa9: /* pop gs */ |
2598 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); | 3185 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); |
2599 | if (rc != 0) | 3186 | if (rc != X86EMUL_CONTINUE) |
2600 | goto done; | 3187 | goto done; |
2601 | break; | 3188 | break; |
2602 | case 0xab: | 3189 | case 0xab: |
@@ -2668,16 +3255,14 @@ twobyte_insn: | |||
2668 | (u64) c->src.val; | 3255 | (u64) c->src.val; |
2669 | break; | 3256 | break; |
2670 | case 0xc7: /* Grp9 (cmpxchg8b) */ | 3257 | case 0xc7: /* Grp9 (cmpxchg8b) */ |
2671 | rc = emulate_grp9(ctxt, ops, memop); | 3258 | rc = emulate_grp9(ctxt, ops); |
2672 | if (rc != 0) | 3259 | if (rc != X86EMUL_CONTINUE) |
2673 | goto done; | 3260 | goto done; |
2674 | c->dst.type = OP_NONE; | ||
2675 | break; | 3261 | break; |
2676 | } | 3262 | } |
2677 | goto writeback; | 3263 | goto writeback; |
2678 | 3264 | ||
2679 | cannot_emulate: | 3265 | cannot_emulate: |
2680 | DPRINTF("Cannot emulate %02x\n", c->b); | 3266 | DPRINTF("Cannot emulate %02x\n", c->b); |
2681 | c->eip = saved_eip; | ||
2682 | return -1; | 3267 | return -1; |
2683 | } | 3268 | } |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index a790fa128a9f..93825ff3338f 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -33,6 +33,29 @@ | |||
33 | #include <linux/kvm_host.h> | 33 | #include <linux/kvm_host.h> |
34 | #include "trace.h" | 34 | #include "trace.h" |
35 | 35 | ||
36 | static void pic_lock(struct kvm_pic *s) | ||
37 | __acquires(&s->lock) | ||
38 | { | ||
39 | raw_spin_lock(&s->lock); | ||
40 | } | ||
41 | |||
42 | static void pic_unlock(struct kvm_pic *s) | ||
43 | __releases(&s->lock) | ||
44 | { | ||
45 | bool wakeup = s->wakeup_needed; | ||
46 | struct kvm_vcpu *vcpu; | ||
47 | |||
48 | s->wakeup_needed = false; | ||
49 | |||
50 | raw_spin_unlock(&s->lock); | ||
51 | |||
52 | if (wakeup) { | ||
53 | vcpu = s->kvm->bsp_vcpu; | ||
54 | if (vcpu) | ||
55 | kvm_vcpu_kick(vcpu); | ||
56 | } | ||
57 | } | ||
58 | |||
36 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | 59 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) |
37 | { | 60 | { |
38 | s->isr &= ~(1 << irq); | 61 | s->isr &= ~(1 << irq); |
@@ -45,19 +68,19 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | |||
45 | * Other interrupt may be delivered to PIC while lock is dropped but | 68 | * Other interrupt may be delivered to PIC while lock is dropped but |
46 | * it should be safe since PIC state is already updated at this stage. | 69 | * it should be safe since PIC state is already updated at this stage. |
47 | */ | 70 | */ |
48 | raw_spin_unlock(&s->pics_state->lock); | 71 | pic_unlock(s->pics_state); |
49 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); | 72 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); |
50 | raw_spin_lock(&s->pics_state->lock); | 73 | pic_lock(s->pics_state); |
51 | } | 74 | } |
52 | 75 | ||
53 | void kvm_pic_clear_isr_ack(struct kvm *kvm) | 76 | void kvm_pic_clear_isr_ack(struct kvm *kvm) |
54 | { | 77 | { |
55 | struct kvm_pic *s = pic_irqchip(kvm); | 78 | struct kvm_pic *s = pic_irqchip(kvm); |
56 | 79 | ||
57 | raw_spin_lock(&s->lock); | 80 | pic_lock(s); |
58 | s->pics[0].isr_ack = 0xff; | 81 | s->pics[0].isr_ack = 0xff; |
59 | s->pics[1].isr_ack = 0xff; | 82 | s->pics[1].isr_ack = 0xff; |
60 | raw_spin_unlock(&s->lock); | 83 | pic_unlock(s); |
61 | } | 84 | } |
62 | 85 | ||
63 | /* | 86 | /* |
@@ -158,9 +181,9 @@ static void pic_update_irq(struct kvm_pic *s) | |||
158 | 181 | ||
159 | void kvm_pic_update_irq(struct kvm_pic *s) | 182 | void kvm_pic_update_irq(struct kvm_pic *s) |
160 | { | 183 | { |
161 | raw_spin_lock(&s->lock); | 184 | pic_lock(s); |
162 | pic_update_irq(s); | 185 | pic_update_irq(s); |
163 | raw_spin_unlock(&s->lock); | 186 | pic_unlock(s); |
164 | } | 187 | } |
165 | 188 | ||
166 | int kvm_pic_set_irq(void *opaque, int irq, int level) | 189 | int kvm_pic_set_irq(void *opaque, int irq, int level) |
@@ -168,14 +191,14 @@ int kvm_pic_set_irq(void *opaque, int irq, int level) | |||
168 | struct kvm_pic *s = opaque; | 191 | struct kvm_pic *s = opaque; |
169 | int ret = -1; | 192 | int ret = -1; |
170 | 193 | ||
171 | raw_spin_lock(&s->lock); | 194 | pic_lock(s); |
172 | if (irq >= 0 && irq < PIC_NUM_PINS) { | 195 | if (irq >= 0 && irq < PIC_NUM_PINS) { |
173 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); | 196 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); |
174 | pic_update_irq(s); | 197 | pic_update_irq(s); |
175 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, | 198 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, |
176 | s->pics[irq >> 3].imr, ret == 0); | 199 | s->pics[irq >> 3].imr, ret == 0); |
177 | } | 200 | } |
178 | raw_spin_unlock(&s->lock); | 201 | pic_unlock(s); |
179 | 202 | ||
180 | return ret; | 203 | return ret; |
181 | } | 204 | } |
@@ -205,7 +228,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
205 | int irq, irq2, intno; | 228 | int irq, irq2, intno; |
206 | struct kvm_pic *s = pic_irqchip(kvm); | 229 | struct kvm_pic *s = pic_irqchip(kvm); |
207 | 230 | ||
208 | raw_spin_lock(&s->lock); | 231 | pic_lock(s); |
209 | irq = pic_get_irq(&s->pics[0]); | 232 | irq = pic_get_irq(&s->pics[0]); |
210 | if (irq >= 0) { | 233 | if (irq >= 0) { |
211 | pic_intack(&s->pics[0], irq); | 234 | pic_intack(&s->pics[0], irq); |
@@ -230,7 +253,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
230 | intno = s->pics[0].irq_base + irq; | 253 | intno = s->pics[0].irq_base + irq; |
231 | } | 254 | } |
232 | pic_update_irq(s); | 255 | pic_update_irq(s); |
233 | raw_spin_unlock(&s->lock); | 256 | pic_unlock(s); |
234 | 257 | ||
235 | return intno; | 258 | return intno; |
236 | } | 259 | } |
@@ -444,7 +467,7 @@ static int picdev_write(struct kvm_io_device *this, | |||
444 | printk(KERN_ERR "PIC: non byte write\n"); | 467 | printk(KERN_ERR "PIC: non byte write\n"); |
445 | return 0; | 468 | return 0; |
446 | } | 469 | } |
447 | raw_spin_lock(&s->lock); | 470 | pic_lock(s); |
448 | switch (addr) { | 471 | switch (addr) { |
449 | case 0x20: | 472 | case 0x20: |
450 | case 0x21: | 473 | case 0x21: |
@@ -457,7 +480,7 @@ static int picdev_write(struct kvm_io_device *this, | |||
457 | elcr_ioport_write(&s->pics[addr & 1], addr, data); | 480 | elcr_ioport_write(&s->pics[addr & 1], addr, data); |
458 | break; | 481 | break; |
459 | } | 482 | } |
460 | raw_spin_unlock(&s->lock); | 483 | pic_unlock(s); |
461 | return 0; | 484 | return 0; |
462 | } | 485 | } |
463 | 486 | ||
@@ -474,7 +497,7 @@ static int picdev_read(struct kvm_io_device *this, | |||
474 | printk(KERN_ERR "PIC: non byte read\n"); | 497 | printk(KERN_ERR "PIC: non byte read\n"); |
475 | return 0; | 498 | return 0; |
476 | } | 499 | } |
477 | raw_spin_lock(&s->lock); | 500 | pic_lock(s); |
478 | switch (addr) { | 501 | switch (addr) { |
479 | case 0x20: | 502 | case 0x20: |
480 | case 0x21: | 503 | case 0x21: |
@@ -488,7 +511,7 @@ static int picdev_read(struct kvm_io_device *this, | |||
488 | break; | 511 | break; |
489 | } | 512 | } |
490 | *(unsigned char *)val = data; | 513 | *(unsigned char *)val = data; |
491 | raw_spin_unlock(&s->lock); | 514 | pic_unlock(s); |
492 | return 0; | 515 | return 0; |
493 | } | 516 | } |
494 | 517 | ||
@@ -505,7 +528,7 @@ static void pic_irq_request(void *opaque, int level) | |||
505 | s->output = level; | 528 | s->output = level; |
506 | if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { | 529 | if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { |
507 | s->pics[0].isr_ack &= ~(1 << irq); | 530 | s->pics[0].isr_ack &= ~(1 << irq); |
508 | kvm_vcpu_kick(vcpu); | 531 | s->wakeup_needed = true; |
509 | } | 532 | } |
510 | } | 533 | } |
511 | 534 | ||
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 34b15915754d..cd1f362f413d 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -63,6 +63,7 @@ struct kvm_kpic_state { | |||
63 | 63 | ||
64 | struct kvm_pic { | 64 | struct kvm_pic { |
65 | raw_spinlock_t lock; | 65 | raw_spinlock_t lock; |
66 | bool wakeup_needed; | ||
66 | unsigned pending_acks; | 67 | unsigned pending_acks; |
67 | struct kvm *kvm; | 68 | struct kvm *kvm; |
68 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ | 69 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ |
diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h index 55c7524dda54..64bc6ea78d90 100644 --- a/arch/x86/kvm/kvm_timer.h +++ b/arch/x86/kvm/kvm_timer.h | |||
@@ -10,9 +10,7 @@ struct kvm_timer { | |||
10 | }; | 10 | }; |
11 | 11 | ||
12 | struct kvm_timer_ops { | 12 | struct kvm_timer_ops { |
13 | bool (*is_periodic)(struct kvm_timer *); | 13 | bool (*is_periodic)(struct kvm_timer *); |
14 | }; | 14 | }; |
15 | 15 | ||
16 | |||
17 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data); | 16 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data); |
18 | |||
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 19a8906bcaa2..81563e76e28f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -148,7 +148,6 @@ module_param(oos_shadow, bool, 0644); | |||
148 | 148 | ||
149 | #include <trace/events/kvm.h> | 149 | #include <trace/events/kvm.h> |
150 | 150 | ||
151 | #undef TRACE_INCLUDE_FILE | ||
152 | #define CREATE_TRACE_POINTS | 151 | #define CREATE_TRACE_POINTS |
153 | #include "mmutrace.h" | 152 | #include "mmutrace.h" |
154 | 153 | ||
@@ -174,12 +173,7 @@ struct kvm_shadow_walk_iterator { | |||
174 | shadow_walk_okay(&(_walker)); \ | 173 | shadow_walk_okay(&(_walker)); \ |
175 | shadow_walk_next(&(_walker))) | 174 | shadow_walk_next(&(_walker))) |
176 | 175 | ||
177 | 176 | typedef int (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp); | |
178 | struct kvm_unsync_walk { | ||
179 | int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk); | ||
180 | }; | ||
181 | |||
182 | typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); | ||
183 | 177 | ||
184 | static struct kmem_cache *pte_chain_cache; | 178 | static struct kmem_cache *pte_chain_cache; |
185 | static struct kmem_cache *rmap_desc_cache; | 179 | static struct kmem_cache *rmap_desc_cache; |
@@ -223,7 +217,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | |||
223 | } | 217 | } |
224 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); | 218 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); |
225 | 219 | ||
226 | static int is_write_protection(struct kvm_vcpu *vcpu) | 220 | static bool is_write_protection(struct kvm_vcpu *vcpu) |
227 | { | 221 | { |
228 | return kvm_read_cr0_bits(vcpu, X86_CR0_WP); | 222 | return kvm_read_cr0_bits(vcpu, X86_CR0_WP); |
229 | } | 223 | } |
@@ -327,7 +321,6 @@ static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, | |||
327 | page = alloc_page(GFP_KERNEL); | 321 | page = alloc_page(GFP_KERNEL); |
328 | if (!page) | 322 | if (!page) |
329 | return -ENOMEM; | 323 | return -ENOMEM; |
330 | set_page_private(page, 0); | ||
331 | cache->objects[cache->nobjs++] = page_address(page); | 324 | cache->objects[cache->nobjs++] = page_address(page); |
332 | } | 325 | } |
333 | return 0; | 326 | return 0; |
@@ -438,9 +431,9 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | |||
438 | int i; | 431 | int i; |
439 | 432 | ||
440 | gfn = unalias_gfn(kvm, gfn); | 433 | gfn = unalias_gfn(kvm, gfn); |
434 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
441 | for (i = PT_DIRECTORY_LEVEL; | 435 | for (i = PT_DIRECTORY_LEVEL; |
442 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 436 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
443 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
444 | write_count = slot_largepage_idx(gfn, slot, i); | 437 | write_count = slot_largepage_idx(gfn, slot, i); |
445 | *write_count -= 1; | 438 | *write_count -= 1; |
446 | WARN_ON(*write_count < 0); | 439 | WARN_ON(*write_count < 0); |
@@ -654,7 +647,6 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
654 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) | 647 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) |
655 | { | 648 | { |
656 | struct kvm_rmap_desc *desc; | 649 | struct kvm_rmap_desc *desc; |
657 | struct kvm_rmap_desc *prev_desc; | ||
658 | u64 *prev_spte; | 650 | u64 *prev_spte; |
659 | int i; | 651 | int i; |
660 | 652 | ||
@@ -666,7 +658,6 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) | |||
666 | return NULL; | 658 | return NULL; |
667 | } | 659 | } |
668 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); | 660 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); |
669 | prev_desc = NULL; | ||
670 | prev_spte = NULL; | 661 | prev_spte = NULL; |
671 | while (desc) { | 662 | while (desc) { |
672 | for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) { | 663 | for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) { |
@@ -794,7 +785,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
794 | int retval = 0; | 785 | int retval = 0; |
795 | struct kvm_memslots *slots; | 786 | struct kvm_memslots *slots; |
796 | 787 | ||
797 | slots = rcu_dereference(kvm->memslots); | 788 | slots = kvm_memslots(kvm); |
798 | 789 | ||
799 | for (i = 0; i < slots->nmemslots; i++) { | 790 | for (i = 0; i < slots->nmemslots; i++) { |
800 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | 791 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
@@ -925,7 +916,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
925 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); | 916 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); |
926 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | 917 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); |
927 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | 918 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); |
928 | INIT_LIST_HEAD(&sp->oos_link); | ||
929 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | 919 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); |
930 | sp->multimapped = 0; | 920 | sp->multimapped = 0; |
931 | sp->parent_pte = parent_pte; | 921 | sp->parent_pte = parent_pte; |
@@ -1009,8 +999,7 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp, | |||
1009 | } | 999 | } |
1010 | 1000 | ||
1011 | 1001 | ||
1012 | static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 1002 | static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) |
1013 | mmu_parent_walk_fn fn) | ||
1014 | { | 1003 | { |
1015 | struct kvm_pte_chain *pte_chain; | 1004 | struct kvm_pte_chain *pte_chain; |
1016 | struct hlist_node *node; | 1005 | struct hlist_node *node; |
@@ -1019,8 +1008,8 @@ static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
1019 | 1008 | ||
1020 | if (!sp->multimapped && sp->parent_pte) { | 1009 | if (!sp->multimapped && sp->parent_pte) { |
1021 | parent_sp = page_header(__pa(sp->parent_pte)); | 1010 | parent_sp = page_header(__pa(sp->parent_pte)); |
1022 | fn(vcpu, parent_sp); | 1011 | fn(parent_sp); |
1023 | mmu_parent_walk(vcpu, parent_sp, fn); | 1012 | mmu_parent_walk(parent_sp, fn); |
1024 | return; | 1013 | return; |
1025 | } | 1014 | } |
1026 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) | 1015 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) |
@@ -1028,8 +1017,8 @@ static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
1028 | if (!pte_chain->parent_ptes[i]) | 1017 | if (!pte_chain->parent_ptes[i]) |
1029 | break; | 1018 | break; |
1030 | parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); | 1019 | parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); |
1031 | fn(vcpu, parent_sp); | 1020 | fn(parent_sp); |
1032 | mmu_parent_walk(vcpu, parent_sp, fn); | 1021 | mmu_parent_walk(parent_sp, fn); |
1033 | } | 1022 | } |
1034 | } | 1023 | } |
1035 | 1024 | ||
@@ -1066,16 +1055,15 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) | |||
1066 | } | 1055 | } |
1067 | } | 1056 | } |
1068 | 1057 | ||
1069 | static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1058 | static int unsync_walk_fn(struct kvm_mmu_page *sp) |
1070 | { | 1059 | { |
1071 | kvm_mmu_update_parents_unsync(sp); | 1060 | kvm_mmu_update_parents_unsync(sp); |
1072 | return 1; | 1061 | return 1; |
1073 | } | 1062 | } |
1074 | 1063 | ||
1075 | static void kvm_mmu_mark_parents_unsync(struct kvm_vcpu *vcpu, | 1064 | static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) |
1076 | struct kvm_mmu_page *sp) | ||
1077 | { | 1065 | { |
1078 | mmu_parent_walk(vcpu, sp, unsync_walk_fn); | 1066 | mmu_parent_walk(sp, unsync_walk_fn); |
1079 | kvm_mmu_update_parents_unsync(sp); | 1067 | kvm_mmu_update_parents_unsync(sp); |
1080 | } | 1068 | } |
1081 | 1069 | ||
@@ -1201,6 +1189,7 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) | |||
1201 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 1189 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) |
1202 | { | 1190 | { |
1203 | WARN_ON(!sp->unsync); | 1191 | WARN_ON(!sp->unsync); |
1192 | trace_kvm_mmu_sync_page(sp); | ||
1204 | sp->unsync = 0; | 1193 | sp->unsync = 0; |
1205 | --kvm->stat.mmu_unsync; | 1194 | --kvm->stat.mmu_unsync; |
1206 | } | 1195 | } |
@@ -1209,12 +1198,11 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp); | |||
1209 | 1198 | ||
1210 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1199 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
1211 | { | 1200 | { |
1212 | if (sp->role.glevels != vcpu->arch.mmu.root_level) { | 1201 | if (sp->role.cr4_pae != !!is_pae(vcpu)) { |
1213 | kvm_mmu_zap_page(vcpu->kvm, sp); | 1202 | kvm_mmu_zap_page(vcpu->kvm, sp); |
1214 | return 1; | 1203 | return 1; |
1215 | } | 1204 | } |
1216 | 1205 | ||
1217 | trace_kvm_mmu_sync_page(sp); | ||
1218 | if (rmap_write_protect(vcpu->kvm, sp->gfn)) | 1206 | if (rmap_write_protect(vcpu->kvm, sp->gfn)) |
1219 | kvm_flush_remote_tlbs(vcpu->kvm); | 1207 | kvm_flush_remote_tlbs(vcpu->kvm); |
1220 | kvm_unlink_unsync_page(vcpu->kvm, sp); | 1208 | kvm_unlink_unsync_page(vcpu->kvm, sp); |
@@ -1331,6 +1319,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
1331 | role = vcpu->arch.mmu.base_role; | 1319 | role = vcpu->arch.mmu.base_role; |
1332 | role.level = level; | 1320 | role.level = level; |
1333 | role.direct = direct; | 1321 | role.direct = direct; |
1322 | if (role.direct) | ||
1323 | role.cr4_pae = 0; | ||
1334 | role.access = access; | 1324 | role.access = access; |
1335 | if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { | 1325 | if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { |
1336 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); | 1326 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); |
@@ -1351,7 +1341,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
1351 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); | 1341 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); |
1352 | if (sp->unsync_children) { | 1342 | if (sp->unsync_children) { |
1353 | set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); | 1343 | set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); |
1354 | kvm_mmu_mark_parents_unsync(vcpu, sp); | 1344 | kvm_mmu_mark_parents_unsync(sp); |
1355 | } | 1345 | } |
1356 | trace_kvm_mmu_get_page(sp, false); | 1346 | trace_kvm_mmu_get_page(sp, false); |
1357 | return sp; | 1347 | return sp; |
@@ -1573,13 +1563,14 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | |||
1573 | r = 0; | 1563 | r = 0; |
1574 | index = kvm_page_table_hashfn(gfn); | 1564 | index = kvm_page_table_hashfn(gfn); |
1575 | bucket = &kvm->arch.mmu_page_hash[index]; | 1565 | bucket = &kvm->arch.mmu_page_hash[index]; |
1566 | restart: | ||
1576 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) | 1567 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) |
1577 | if (sp->gfn == gfn && !sp->role.direct) { | 1568 | if (sp->gfn == gfn && !sp->role.direct) { |
1578 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, | 1569 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, |
1579 | sp->role.word); | 1570 | sp->role.word); |
1580 | r = 1; | 1571 | r = 1; |
1581 | if (kvm_mmu_zap_page(kvm, sp)) | 1572 | if (kvm_mmu_zap_page(kvm, sp)) |
1582 | n = bucket->first; | 1573 | goto restart; |
1583 | } | 1574 | } |
1584 | return r; | 1575 | return r; |
1585 | } | 1576 | } |
@@ -1593,13 +1584,14 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) | |||
1593 | 1584 | ||
1594 | index = kvm_page_table_hashfn(gfn); | 1585 | index = kvm_page_table_hashfn(gfn); |
1595 | bucket = &kvm->arch.mmu_page_hash[index]; | 1586 | bucket = &kvm->arch.mmu_page_hash[index]; |
1587 | restart: | ||
1596 | hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { | 1588 | hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { |
1597 | if (sp->gfn == gfn && !sp->role.direct | 1589 | if (sp->gfn == gfn && !sp->role.direct |
1598 | && !sp->role.invalid) { | 1590 | && !sp->role.invalid) { |
1599 | pgprintk("%s: zap %lx %x\n", | 1591 | pgprintk("%s: zap %lx %x\n", |
1600 | __func__, gfn, sp->role.word); | 1592 | __func__, gfn, sp->role.word); |
1601 | if (kvm_mmu_zap_page(kvm, sp)) | 1593 | if (kvm_mmu_zap_page(kvm, sp)) |
1602 | nn = bucket->first; | 1594 | goto restart; |
1603 | } | 1595 | } |
1604 | } | 1596 | } |
1605 | } | 1597 | } |
@@ -1626,20 +1618,6 @@ static void mmu_convert_notrap(struct kvm_mmu_page *sp) | |||
1626 | } | 1618 | } |
1627 | } | 1619 | } |
1628 | 1620 | ||
1629 | struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) | ||
1630 | { | ||
1631 | struct page *page; | ||
1632 | |||
1633 | gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); | ||
1634 | |||
1635 | if (gpa == UNMAPPED_GVA) | ||
1636 | return NULL; | ||
1637 | |||
1638 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | ||
1639 | |||
1640 | return page; | ||
1641 | } | ||
1642 | |||
1643 | /* | 1621 | /* |
1644 | * The function is based on mtrr_type_lookup() in | 1622 | * The function is based on mtrr_type_lookup() in |
1645 | * arch/x86/kernel/cpu/mtrr/generic.c | 1623 | * arch/x86/kernel/cpu/mtrr/generic.c |
@@ -1752,7 +1730,6 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
1752 | struct kvm_mmu_page *s; | 1730 | struct kvm_mmu_page *s; |
1753 | struct hlist_node *node, *n; | 1731 | struct hlist_node *node, *n; |
1754 | 1732 | ||
1755 | trace_kvm_mmu_unsync_page(sp); | ||
1756 | index = kvm_page_table_hashfn(sp->gfn); | 1733 | index = kvm_page_table_hashfn(sp->gfn); |
1757 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 1734 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
1758 | /* don't unsync if pagetable is shadowed with multiple roles */ | 1735 | /* don't unsync if pagetable is shadowed with multiple roles */ |
@@ -1762,10 +1739,11 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
1762 | if (s->role.word != sp->role.word) | 1739 | if (s->role.word != sp->role.word) |
1763 | return 1; | 1740 | return 1; |
1764 | } | 1741 | } |
1742 | trace_kvm_mmu_unsync_page(sp); | ||
1765 | ++vcpu->kvm->stat.mmu_unsync; | 1743 | ++vcpu->kvm->stat.mmu_unsync; |
1766 | sp->unsync = 1; | 1744 | sp->unsync = 1; |
1767 | 1745 | ||
1768 | kvm_mmu_mark_parents_unsync(vcpu, sp); | 1746 | kvm_mmu_mark_parents_unsync(sp); |
1769 | 1747 | ||
1770 | mmu_convert_notrap(sp); | 1748 | mmu_convert_notrap(sp); |
1771 | return 0; | 1749 | return 0; |
@@ -2081,21 +2059,23 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
2081 | hpa_t root = vcpu->arch.mmu.root_hpa; | 2059 | hpa_t root = vcpu->arch.mmu.root_hpa; |
2082 | 2060 | ||
2083 | ASSERT(!VALID_PAGE(root)); | 2061 | ASSERT(!VALID_PAGE(root)); |
2084 | if (tdp_enabled) | ||
2085 | direct = 1; | ||
2086 | if (mmu_check_root(vcpu, root_gfn)) | 2062 | if (mmu_check_root(vcpu, root_gfn)) |
2087 | return 1; | 2063 | return 1; |
2064 | if (tdp_enabled) { | ||
2065 | direct = 1; | ||
2066 | root_gfn = 0; | ||
2067 | } | ||
2068 | spin_lock(&vcpu->kvm->mmu_lock); | ||
2088 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, | 2069 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, |
2089 | PT64_ROOT_LEVEL, direct, | 2070 | PT64_ROOT_LEVEL, direct, |
2090 | ACC_ALL, NULL); | 2071 | ACC_ALL, NULL); |
2091 | root = __pa(sp->spt); | 2072 | root = __pa(sp->spt); |
2092 | ++sp->root_count; | 2073 | ++sp->root_count; |
2074 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
2093 | vcpu->arch.mmu.root_hpa = root; | 2075 | vcpu->arch.mmu.root_hpa = root; |
2094 | return 0; | 2076 | return 0; |
2095 | } | 2077 | } |
2096 | direct = !is_paging(vcpu); | 2078 | direct = !is_paging(vcpu); |
2097 | if (tdp_enabled) | ||
2098 | direct = 1; | ||
2099 | for (i = 0; i < 4; ++i) { | 2079 | for (i = 0; i < 4; ++i) { |
2100 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | 2080 | hpa_t root = vcpu->arch.mmu.pae_root[i]; |
2101 | 2081 | ||
@@ -2111,11 +2091,18 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
2111 | root_gfn = 0; | 2091 | root_gfn = 0; |
2112 | if (mmu_check_root(vcpu, root_gfn)) | 2092 | if (mmu_check_root(vcpu, root_gfn)) |
2113 | return 1; | 2093 | return 1; |
2094 | if (tdp_enabled) { | ||
2095 | direct = 1; | ||
2096 | root_gfn = i << 30; | ||
2097 | } | ||
2098 | spin_lock(&vcpu->kvm->mmu_lock); | ||
2114 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | 2099 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
2115 | PT32_ROOT_LEVEL, direct, | 2100 | PT32_ROOT_LEVEL, direct, |
2116 | ACC_ALL, NULL); | 2101 | ACC_ALL, NULL); |
2117 | root = __pa(sp->spt); | 2102 | root = __pa(sp->spt); |
2118 | ++sp->root_count; | 2103 | ++sp->root_count; |
2104 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
2105 | |||
2119 | vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; | 2106 | vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; |
2120 | } | 2107 | } |
2121 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); | 2108 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); |
@@ -2299,13 +2286,19 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
2299 | /* no rsvd bits for 2 level 4K page table entries */ | 2286 | /* no rsvd bits for 2 level 4K page table entries */ |
2300 | context->rsvd_bits_mask[0][1] = 0; | 2287 | context->rsvd_bits_mask[0][1] = 0; |
2301 | context->rsvd_bits_mask[0][0] = 0; | 2288 | context->rsvd_bits_mask[0][0] = 0; |
2289 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; | ||
2290 | |||
2291 | if (!is_pse(vcpu)) { | ||
2292 | context->rsvd_bits_mask[1][1] = 0; | ||
2293 | break; | ||
2294 | } | ||
2295 | |||
2302 | if (is_cpuid_PSE36()) | 2296 | if (is_cpuid_PSE36()) |
2303 | /* 36bits PSE 4MB page */ | 2297 | /* 36bits PSE 4MB page */ |
2304 | context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); | 2298 | context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); |
2305 | else | 2299 | else |
2306 | /* 32 bits PSE 4MB page */ | 2300 | /* 32 bits PSE 4MB page */ |
2307 | context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); | 2301 | context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); |
2308 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; | ||
2309 | break; | 2302 | break; |
2310 | case PT32E_ROOT_LEVEL: | 2303 | case PT32E_ROOT_LEVEL: |
2311 | context->rsvd_bits_mask[0][2] = | 2304 | context->rsvd_bits_mask[0][2] = |
@@ -2318,7 +2311,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
2318 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | | 2311 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | |
2319 | rsvd_bits(maxphyaddr, 62) | | 2312 | rsvd_bits(maxphyaddr, 62) | |
2320 | rsvd_bits(13, 20); /* large page */ | 2313 | rsvd_bits(13, 20); /* large page */ |
2321 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; | 2314 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; |
2322 | break; | 2315 | break; |
2323 | case PT64_ROOT_LEVEL: | 2316 | case PT64_ROOT_LEVEL: |
2324 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | | 2317 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | |
@@ -2336,7 +2329,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
2336 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | | 2329 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | |
2337 | rsvd_bits(maxphyaddr, 51) | | 2330 | rsvd_bits(maxphyaddr, 51) | |
2338 | rsvd_bits(13, 20); /* large page */ | 2331 | rsvd_bits(13, 20); /* large page */ |
2339 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; | 2332 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; |
2340 | break; | 2333 | break; |
2341 | } | 2334 | } |
2342 | } | 2335 | } |
@@ -2438,7 +2431,8 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu) | |||
2438 | else | 2431 | else |
2439 | r = paging32_init_context(vcpu); | 2432 | r = paging32_init_context(vcpu); |
2440 | 2433 | ||
2441 | vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level; | 2434 | vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); |
2435 | vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); | ||
2442 | 2436 | ||
2443 | return r; | 2437 | return r; |
2444 | } | 2438 | } |
@@ -2478,7 +2472,9 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) | |||
2478 | goto out; | 2472 | goto out; |
2479 | spin_lock(&vcpu->kvm->mmu_lock); | 2473 | spin_lock(&vcpu->kvm->mmu_lock); |
2480 | kvm_mmu_free_some_pages(vcpu); | 2474 | kvm_mmu_free_some_pages(vcpu); |
2475 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
2481 | r = mmu_alloc_roots(vcpu); | 2476 | r = mmu_alloc_roots(vcpu); |
2477 | spin_lock(&vcpu->kvm->mmu_lock); | ||
2482 | mmu_sync_roots(vcpu); | 2478 | mmu_sync_roots(vcpu); |
2483 | spin_unlock(&vcpu->kvm->mmu_lock); | 2479 | spin_unlock(&vcpu->kvm->mmu_lock); |
2484 | if (r) | 2480 | if (r) |
@@ -2527,7 +2523,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | |||
2527 | } | 2523 | } |
2528 | 2524 | ||
2529 | ++vcpu->kvm->stat.mmu_pte_updated; | 2525 | ++vcpu->kvm->stat.mmu_pte_updated; |
2530 | if (sp->role.glevels == PT32_ROOT_LEVEL) | 2526 | if (!sp->role.cr4_pae) |
2531 | paging32_update_pte(vcpu, sp, spte, new); | 2527 | paging32_update_pte(vcpu, sp, spte, new); |
2532 | else | 2528 | else |
2533 | paging64_update_pte(vcpu, sp, spte, new); | 2529 | paging64_update_pte(vcpu, sp, spte, new); |
@@ -2562,36 +2558,11 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) | |||
2562 | } | 2558 | } |
2563 | 2559 | ||
2564 | static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 2560 | static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
2565 | const u8 *new, int bytes) | 2561 | u64 gpte) |
2566 | { | 2562 | { |
2567 | gfn_t gfn; | 2563 | gfn_t gfn; |
2568 | int r; | ||
2569 | u64 gpte = 0; | ||
2570 | pfn_t pfn; | 2564 | pfn_t pfn; |
2571 | 2565 | ||
2572 | if (bytes != 4 && bytes != 8) | ||
2573 | return; | ||
2574 | |||
2575 | /* | ||
2576 | * Assume that the pte write on a page table of the same type | ||
2577 | * as the current vcpu paging mode. This is nearly always true | ||
2578 | * (might be false while changing modes). Note it is verified later | ||
2579 | * by update_pte(). | ||
2580 | */ | ||
2581 | if (is_pae(vcpu)) { | ||
2582 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | ||
2583 | if ((bytes == 4) && (gpa % 4 == 0)) { | ||
2584 | r = kvm_read_guest(vcpu->kvm, gpa & ~(u64)7, &gpte, 8); | ||
2585 | if (r) | ||
2586 | return; | ||
2587 | memcpy((void *)&gpte + (gpa % 8), new, 4); | ||
2588 | } else if ((bytes == 8) && (gpa % 8 == 0)) { | ||
2589 | memcpy((void *)&gpte, new, 8); | ||
2590 | } | ||
2591 | } else { | ||
2592 | if ((bytes == 4) && (gpa % 4 == 0)) | ||
2593 | memcpy((void *)&gpte, new, 4); | ||
2594 | } | ||
2595 | if (!is_present_gpte(gpte)) | 2566 | if (!is_present_gpte(gpte)) |
2596 | return; | 2567 | return; |
2597 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; | 2568 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; |
@@ -2640,10 +2611,46 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2640 | int flooded = 0; | 2611 | int flooded = 0; |
2641 | int npte; | 2612 | int npte; |
2642 | int r; | 2613 | int r; |
2614 | int invlpg_counter; | ||
2643 | 2615 | ||
2644 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); | 2616 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); |
2645 | mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); | 2617 | |
2618 | invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter); | ||
2619 | |||
2620 | /* | ||
2621 | * Assume that the pte write on a page table of the same type | ||
2622 | * as the current vcpu paging mode. This is nearly always true | ||
2623 | * (might be false while changing modes). Note it is verified later | ||
2624 | * by update_pte(). | ||
2625 | */ | ||
2626 | if ((is_pae(vcpu) && bytes == 4) || !new) { | ||
2627 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | ||
2628 | if (is_pae(vcpu)) { | ||
2629 | gpa &= ~(gpa_t)7; | ||
2630 | bytes = 8; | ||
2631 | } | ||
2632 | r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8)); | ||
2633 | if (r) | ||
2634 | gentry = 0; | ||
2635 | new = (const u8 *)&gentry; | ||
2636 | } | ||
2637 | |||
2638 | switch (bytes) { | ||
2639 | case 4: | ||
2640 | gentry = *(const u32 *)new; | ||
2641 | break; | ||
2642 | case 8: | ||
2643 | gentry = *(const u64 *)new; | ||
2644 | break; | ||
2645 | default: | ||
2646 | gentry = 0; | ||
2647 | break; | ||
2648 | } | ||
2649 | |||
2650 | mmu_guess_page_from_pte_write(vcpu, gpa, gentry); | ||
2646 | spin_lock(&vcpu->kvm->mmu_lock); | 2651 | spin_lock(&vcpu->kvm->mmu_lock); |
2652 | if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) | ||
2653 | gentry = 0; | ||
2647 | kvm_mmu_access_page(vcpu, gfn); | 2654 | kvm_mmu_access_page(vcpu, gfn); |
2648 | kvm_mmu_free_some_pages(vcpu); | 2655 | kvm_mmu_free_some_pages(vcpu); |
2649 | ++vcpu->kvm->stat.mmu_pte_write; | 2656 | ++vcpu->kvm->stat.mmu_pte_write; |
@@ -2662,10 +2669,12 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2662 | } | 2669 | } |
2663 | index = kvm_page_table_hashfn(gfn); | 2670 | index = kvm_page_table_hashfn(gfn); |
2664 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 2671 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
2672 | |||
2673 | restart: | ||
2665 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { | 2674 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { |
2666 | if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) | 2675 | if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) |
2667 | continue; | 2676 | continue; |
2668 | pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; | 2677 | pte_size = sp->role.cr4_pae ? 8 : 4; |
2669 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | 2678 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); |
2670 | misaligned |= bytes < 4; | 2679 | misaligned |= bytes < 4; |
2671 | if (misaligned || flooded) { | 2680 | if (misaligned || flooded) { |
@@ -2682,14 +2691,14 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2682 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", | 2691 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", |
2683 | gpa, bytes, sp->role.word); | 2692 | gpa, bytes, sp->role.word); |
2684 | if (kvm_mmu_zap_page(vcpu->kvm, sp)) | 2693 | if (kvm_mmu_zap_page(vcpu->kvm, sp)) |
2685 | n = bucket->first; | 2694 | goto restart; |
2686 | ++vcpu->kvm->stat.mmu_flooded; | 2695 | ++vcpu->kvm->stat.mmu_flooded; |
2687 | continue; | 2696 | continue; |
2688 | } | 2697 | } |
2689 | page_offset = offset; | 2698 | page_offset = offset; |
2690 | level = sp->role.level; | 2699 | level = sp->role.level; |
2691 | npte = 1; | 2700 | npte = 1; |
2692 | if (sp->role.glevels == PT32_ROOT_LEVEL) { | 2701 | if (!sp->role.cr4_pae) { |
2693 | page_offset <<= 1; /* 32->64 */ | 2702 | page_offset <<= 1; /* 32->64 */ |
2694 | /* | 2703 | /* |
2695 | * A 32-bit pde maps 4MB while the shadow pdes map | 2704 | * A 32-bit pde maps 4MB while the shadow pdes map |
@@ -2707,20 +2716,11 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2707 | continue; | 2716 | continue; |
2708 | } | 2717 | } |
2709 | spte = &sp->spt[page_offset / sizeof(*spte)]; | 2718 | spte = &sp->spt[page_offset / sizeof(*spte)]; |
2710 | if ((gpa & (pte_size - 1)) || (bytes < pte_size)) { | ||
2711 | gentry = 0; | ||
2712 | r = kvm_read_guest_atomic(vcpu->kvm, | ||
2713 | gpa & ~(u64)(pte_size - 1), | ||
2714 | &gentry, pte_size); | ||
2715 | new = (const void *)&gentry; | ||
2716 | if (r < 0) | ||
2717 | new = NULL; | ||
2718 | } | ||
2719 | while (npte--) { | 2719 | while (npte--) { |
2720 | entry = *spte; | 2720 | entry = *spte; |
2721 | mmu_pte_write_zap_pte(vcpu, sp, spte); | 2721 | mmu_pte_write_zap_pte(vcpu, sp, spte); |
2722 | if (new) | 2722 | if (gentry) |
2723 | mmu_pte_write_new_pte(vcpu, sp, spte, new); | 2723 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); |
2724 | mmu_pte_write_flush_tlb(vcpu, entry, *spte); | 2724 | mmu_pte_write_flush_tlb(vcpu, entry, *spte); |
2725 | ++spte; | 2725 | ++spte; |
2726 | } | 2726 | } |
@@ -2900,22 +2900,23 @@ void kvm_mmu_zap_all(struct kvm *kvm) | |||
2900 | struct kvm_mmu_page *sp, *node; | 2900 | struct kvm_mmu_page *sp, *node; |
2901 | 2901 | ||
2902 | spin_lock(&kvm->mmu_lock); | 2902 | spin_lock(&kvm->mmu_lock); |
2903 | restart: | ||
2903 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) | 2904 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) |
2904 | if (kvm_mmu_zap_page(kvm, sp)) | 2905 | if (kvm_mmu_zap_page(kvm, sp)) |
2905 | node = container_of(kvm->arch.active_mmu_pages.next, | 2906 | goto restart; |
2906 | struct kvm_mmu_page, link); | 2907 | |
2907 | spin_unlock(&kvm->mmu_lock); | 2908 | spin_unlock(&kvm->mmu_lock); |
2908 | 2909 | ||
2909 | kvm_flush_remote_tlbs(kvm); | 2910 | kvm_flush_remote_tlbs(kvm); |
2910 | } | 2911 | } |
2911 | 2912 | ||
2912 | static void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm) | 2913 | static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm) |
2913 | { | 2914 | { |
2914 | struct kvm_mmu_page *page; | 2915 | struct kvm_mmu_page *page; |
2915 | 2916 | ||
2916 | page = container_of(kvm->arch.active_mmu_pages.prev, | 2917 | page = container_of(kvm->arch.active_mmu_pages.prev, |
2917 | struct kvm_mmu_page, link); | 2918 | struct kvm_mmu_page, link); |
2918 | kvm_mmu_zap_page(kvm, page); | 2919 | return kvm_mmu_zap_page(kvm, page) + 1; |
2919 | } | 2920 | } |
2920 | 2921 | ||
2921 | static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | 2922 | static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) |
@@ -2927,7 +2928,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | |||
2927 | spin_lock(&kvm_lock); | 2928 | spin_lock(&kvm_lock); |
2928 | 2929 | ||
2929 | list_for_each_entry(kvm, &vm_list, vm_list) { | 2930 | list_for_each_entry(kvm, &vm_list, vm_list) { |
2930 | int npages, idx; | 2931 | int npages, idx, freed_pages; |
2931 | 2932 | ||
2932 | idx = srcu_read_lock(&kvm->srcu); | 2933 | idx = srcu_read_lock(&kvm->srcu); |
2933 | spin_lock(&kvm->mmu_lock); | 2934 | spin_lock(&kvm->mmu_lock); |
@@ -2935,8 +2936,8 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | |||
2935 | kvm->arch.n_free_mmu_pages; | 2936 | kvm->arch.n_free_mmu_pages; |
2936 | cache_count += npages; | 2937 | cache_count += npages; |
2937 | if (!kvm_freed && nr_to_scan > 0 && npages > 0) { | 2938 | if (!kvm_freed && nr_to_scan > 0 && npages > 0) { |
2938 | kvm_mmu_remove_one_alloc_mmu_page(kvm); | 2939 | freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm); |
2939 | cache_count--; | 2940 | cache_count -= freed_pages; |
2940 | kvm_freed = kvm; | 2941 | kvm_freed = kvm; |
2941 | } | 2942 | } |
2942 | nr_to_scan--; | 2943 | nr_to_scan--; |
@@ -3011,7 +3012,8 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) | |||
3011 | unsigned int nr_pages = 0; | 3012 | unsigned int nr_pages = 0; |
3012 | struct kvm_memslots *slots; | 3013 | struct kvm_memslots *slots; |
3013 | 3014 | ||
3014 | slots = rcu_dereference(kvm->memslots); | 3015 | slots = kvm_memslots(kvm); |
3016 | |||
3015 | for (i = 0; i < slots->nmemslots; i++) | 3017 | for (i = 0; i < slots->nmemslots; i++) |
3016 | nr_pages += slots->memslots[i].npages; | 3018 | nr_pages += slots->memslots[i].npages; |
3017 | 3019 | ||
@@ -3174,8 +3176,7 @@ static gva_t canonicalize(gva_t gva) | |||
3174 | } | 3176 | } |
3175 | 3177 | ||
3176 | 3178 | ||
3177 | typedef void (*inspect_spte_fn) (struct kvm *kvm, struct kvm_mmu_page *sp, | 3179 | typedef void (*inspect_spte_fn) (struct kvm *kvm, u64 *sptep); |
3178 | u64 *sptep); | ||
3179 | 3180 | ||
3180 | static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, | 3181 | static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, |
3181 | inspect_spte_fn fn) | 3182 | inspect_spte_fn fn) |
@@ -3191,7 +3192,7 @@ static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
3191 | child = page_header(ent & PT64_BASE_ADDR_MASK); | 3192 | child = page_header(ent & PT64_BASE_ADDR_MASK); |
3192 | __mmu_spte_walk(kvm, child, fn); | 3193 | __mmu_spte_walk(kvm, child, fn); |
3193 | } else | 3194 | } else |
3194 | fn(kvm, sp, &sp->spt[i]); | 3195 | fn(kvm, &sp->spt[i]); |
3195 | } | 3196 | } |
3196 | } | 3197 | } |
3197 | } | 3198 | } |
@@ -3282,11 +3283,13 @@ static void audit_mappings(struct kvm_vcpu *vcpu) | |||
3282 | 3283 | ||
3283 | static int count_rmaps(struct kvm_vcpu *vcpu) | 3284 | static int count_rmaps(struct kvm_vcpu *vcpu) |
3284 | { | 3285 | { |
3286 | struct kvm *kvm = vcpu->kvm; | ||
3287 | struct kvm_memslots *slots; | ||
3285 | int nmaps = 0; | 3288 | int nmaps = 0; |
3286 | int i, j, k, idx; | 3289 | int i, j, k, idx; |
3287 | 3290 | ||
3288 | idx = srcu_read_lock(&kvm->srcu); | 3291 | idx = srcu_read_lock(&kvm->srcu); |
3289 | slots = rcu_dereference(kvm->memslots); | 3292 | slots = kvm_memslots(kvm); |
3290 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 3293 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
3291 | struct kvm_memory_slot *m = &slots->memslots[i]; | 3294 | struct kvm_memory_slot *m = &slots->memslots[i]; |
3292 | struct kvm_rmap_desc *d; | 3295 | struct kvm_rmap_desc *d; |
@@ -3315,7 +3318,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu) | |||
3315 | return nmaps; | 3318 | return nmaps; |
3316 | } | 3319 | } |
3317 | 3320 | ||
3318 | void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep) | 3321 | void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) |
3319 | { | 3322 | { |
3320 | unsigned long *rmapp; | 3323 | unsigned long *rmapp; |
3321 | struct kvm_mmu_page *rev_sp; | 3324 | struct kvm_mmu_page *rev_sp; |
@@ -3331,14 +3334,14 @@ void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep) | |||
3331 | printk(KERN_ERR "%s: no memslot for gfn %ld\n", | 3334 | printk(KERN_ERR "%s: no memslot for gfn %ld\n", |
3332 | audit_msg, gfn); | 3335 | audit_msg, gfn); |
3333 | printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n", | 3336 | printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n", |
3334 | audit_msg, sptep - rev_sp->spt, | 3337 | audit_msg, (long int)(sptep - rev_sp->spt), |
3335 | rev_sp->gfn); | 3338 | rev_sp->gfn); |
3336 | dump_stack(); | 3339 | dump_stack(); |
3337 | return; | 3340 | return; |
3338 | } | 3341 | } |
3339 | 3342 | ||
3340 | rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt], | 3343 | rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt], |
3341 | is_large_pte(*sptep)); | 3344 | rev_sp->role.level); |
3342 | if (!*rmapp) { | 3345 | if (!*rmapp) { |
3343 | if (!printk_ratelimit()) | 3346 | if (!printk_ratelimit()) |
3344 | return; | 3347 | return; |
@@ -3373,7 +3376,7 @@ static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu) | |||
3373 | continue; | 3376 | continue; |
3374 | if (!(ent & PT_WRITABLE_MASK)) | 3377 | if (!(ent & PT_WRITABLE_MASK)) |
3375 | continue; | 3378 | continue; |
3376 | inspect_spte_has_rmap(vcpu->kvm, sp, &pt[i]); | 3379 | inspect_spte_has_rmap(vcpu->kvm, &pt[i]); |
3377 | } | 3380 | } |
3378 | } | 3381 | } |
3379 | return; | 3382 | return; |
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 3e4a5c6ca2a9..42f07b1bfbc9 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
@@ -6,14 +6,12 @@ | |||
6 | 6 | ||
7 | #undef TRACE_SYSTEM | 7 | #undef TRACE_SYSTEM |
8 | #define TRACE_SYSTEM kvmmmu | 8 | #define TRACE_SYSTEM kvmmmu |
9 | #define TRACE_INCLUDE_PATH . | ||
10 | #define TRACE_INCLUDE_FILE mmutrace | ||
11 | 9 | ||
12 | #define KVM_MMU_PAGE_FIELDS \ | 10 | #define KVM_MMU_PAGE_FIELDS \ |
13 | __field(__u64, gfn) \ | 11 | __field(__u64, gfn) \ |
14 | __field(__u32, role) \ | 12 | __field(__u32, role) \ |
15 | __field(__u32, root_count) \ | 13 | __field(__u32, root_count) \ |
16 | __field(__u32, unsync) | 14 | __field(bool, unsync) |
17 | 15 | ||
18 | #define KVM_MMU_PAGE_ASSIGN(sp) \ | 16 | #define KVM_MMU_PAGE_ASSIGN(sp) \ |
19 | __entry->gfn = sp->gfn; \ | 17 | __entry->gfn = sp->gfn; \ |
@@ -30,14 +28,14 @@ | |||
30 | \ | 28 | \ |
31 | role.word = __entry->role; \ | 29 | role.word = __entry->role; \ |
32 | \ | 30 | \ |
33 | trace_seq_printf(p, "sp gfn %llx %u/%u q%u%s %s%s %spge" \ | 31 | trace_seq_printf(p, "sp gfn %llx %u%s q%u%s %s%s" \ |
34 | " %snxe root %u %s%c", \ | 32 | " %snxe root %u %s%c", \ |
35 | __entry->gfn, role.level, role.glevels, \ | 33 | __entry->gfn, role.level, \ |
34 | role.cr4_pae ? " pae" : "", \ | ||
36 | role.quadrant, \ | 35 | role.quadrant, \ |
37 | role.direct ? " direct" : "", \ | 36 | role.direct ? " direct" : "", \ |
38 | access_str[role.access], \ | 37 | access_str[role.access], \ |
39 | role.invalid ? " invalid" : "", \ | 38 | role.invalid ? " invalid" : "", \ |
40 | role.cr4_pge ? "" : "!", \ | ||
41 | role.nxe ? "" : "!", \ | 39 | role.nxe ? "" : "!", \ |
42 | __entry->root_count, \ | 40 | __entry->root_count, \ |
43 | __entry->unsync ? "unsync" : "sync", 0); \ | 41 | __entry->unsync ? "unsync" : "sync", 0); \ |
@@ -94,15 +92,15 @@ TRACE_EVENT( | |||
94 | TP_printk("pte %llx level %u", __entry->pte, __entry->level) | 92 | TP_printk("pte %llx level %u", __entry->pte, __entry->level) |
95 | ); | 93 | ); |
96 | 94 | ||
97 | /* We set a pte accessed bit */ | 95 | DECLARE_EVENT_CLASS(kvm_mmu_set_bit_class, |
98 | TRACE_EVENT( | 96 | |
99 | kvm_mmu_set_accessed_bit, | ||
100 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), | 97 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), |
98 | |||
101 | TP_ARGS(table_gfn, index, size), | 99 | TP_ARGS(table_gfn, index, size), |
102 | 100 | ||
103 | TP_STRUCT__entry( | 101 | TP_STRUCT__entry( |
104 | __field(__u64, gpa) | 102 | __field(__u64, gpa) |
105 | ), | 103 | ), |
106 | 104 | ||
107 | TP_fast_assign( | 105 | TP_fast_assign( |
108 | __entry->gpa = ((u64)table_gfn << PAGE_SHIFT) | 106 | __entry->gpa = ((u64)table_gfn << PAGE_SHIFT) |
@@ -112,22 +110,20 @@ TRACE_EVENT( | |||
112 | TP_printk("gpa %llx", __entry->gpa) | 110 | TP_printk("gpa %llx", __entry->gpa) |
113 | ); | 111 | ); |
114 | 112 | ||
115 | /* We set a pte dirty bit */ | 113 | /* We set a pte accessed bit */ |
116 | TRACE_EVENT( | 114 | DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_accessed_bit, |
117 | kvm_mmu_set_dirty_bit, | 115 | |
118 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), | 116 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), |
119 | TP_ARGS(table_gfn, index, size), | ||
120 | 117 | ||
121 | TP_STRUCT__entry( | 118 | TP_ARGS(table_gfn, index, size) |
122 | __field(__u64, gpa) | 119 | ); |
123 | ), | ||
124 | 120 | ||
125 | TP_fast_assign( | 121 | /* We set a pte dirty bit */ |
126 | __entry->gpa = ((u64)table_gfn << PAGE_SHIFT) | 122 | DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_dirty_bit, |
127 | + index * size; | ||
128 | ), | ||
129 | 123 | ||
130 | TP_printk("gpa %llx", __entry->gpa) | 124 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), |
125 | |||
126 | TP_ARGS(table_gfn, index, size) | ||
131 | ); | 127 | ); |
132 | 128 | ||
133 | TRACE_EVENT( | 129 | TRACE_EVENT( |
@@ -166,55 +162,45 @@ TRACE_EVENT( | |||
166 | __entry->created ? "new" : "existing") | 162 | __entry->created ? "new" : "existing") |
167 | ); | 163 | ); |
168 | 164 | ||
169 | TRACE_EVENT( | 165 | DECLARE_EVENT_CLASS(kvm_mmu_page_class, |
170 | kvm_mmu_sync_page, | 166 | |
171 | TP_PROTO(struct kvm_mmu_page *sp), | 167 | TP_PROTO(struct kvm_mmu_page *sp), |
172 | TP_ARGS(sp), | 168 | TP_ARGS(sp), |
173 | 169 | ||
174 | TP_STRUCT__entry( | 170 | TP_STRUCT__entry( |
175 | KVM_MMU_PAGE_FIELDS | 171 | KVM_MMU_PAGE_FIELDS |
176 | ), | 172 | ), |
177 | 173 | ||
178 | TP_fast_assign( | 174 | TP_fast_assign( |
179 | KVM_MMU_PAGE_ASSIGN(sp) | 175 | KVM_MMU_PAGE_ASSIGN(sp) |
180 | ), | 176 | ), |
181 | 177 | ||
182 | TP_printk("%s", KVM_MMU_PAGE_PRINTK()) | 178 | TP_printk("%s", KVM_MMU_PAGE_PRINTK()) |
183 | ); | 179 | ); |
184 | 180 | ||
185 | TRACE_EVENT( | 181 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_sync_page, |
186 | kvm_mmu_unsync_page, | ||
187 | TP_PROTO(struct kvm_mmu_page *sp), | 182 | TP_PROTO(struct kvm_mmu_page *sp), |
188 | TP_ARGS(sp), | ||
189 | |||
190 | TP_STRUCT__entry( | ||
191 | KVM_MMU_PAGE_FIELDS | ||
192 | ), | ||
193 | 183 | ||
194 | TP_fast_assign( | 184 | TP_ARGS(sp) |
195 | KVM_MMU_PAGE_ASSIGN(sp) | ||
196 | ), | ||
197 | |||
198 | TP_printk("%s", KVM_MMU_PAGE_PRINTK()) | ||
199 | ); | 185 | ); |
200 | 186 | ||
201 | TRACE_EVENT( | 187 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_unsync_page, |
202 | kvm_mmu_zap_page, | ||
203 | TP_PROTO(struct kvm_mmu_page *sp), | 188 | TP_PROTO(struct kvm_mmu_page *sp), |
204 | TP_ARGS(sp), | ||
205 | 189 | ||
206 | TP_STRUCT__entry( | 190 | TP_ARGS(sp) |
207 | KVM_MMU_PAGE_FIELDS | 191 | ); |
208 | ), | ||
209 | 192 | ||
210 | TP_fast_assign( | 193 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_zap_page, |
211 | KVM_MMU_PAGE_ASSIGN(sp) | 194 | TP_PROTO(struct kvm_mmu_page *sp), |
212 | ), | ||
213 | 195 | ||
214 | TP_printk("%s", KVM_MMU_PAGE_PRINTK()) | 196 | TP_ARGS(sp) |
215 | ); | 197 | ); |
216 | |||
217 | #endif /* _TRACE_KVMMMU_H */ | 198 | #endif /* _TRACE_KVMMMU_H */ |
218 | 199 | ||
200 | #undef TRACE_INCLUDE_PATH | ||
201 | #define TRACE_INCLUDE_PATH . | ||
202 | #undef TRACE_INCLUDE_FILE | ||
203 | #define TRACE_INCLUDE_FILE mmutrace | ||
204 | |||
219 | /* This part must be outside protection */ | 205 | /* This part must be outside protection */ |
220 | #include <trace/define_trace.h> | 206 | #include <trace/define_trace.h> |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 81eab9a50e6a..89d66ca4d87c 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -170,7 +170,7 @@ walk: | |||
170 | goto access_error; | 170 | goto access_error; |
171 | 171 | ||
172 | #if PTTYPE == 64 | 172 | #if PTTYPE == 64 |
173 | if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK)) | 173 | if (fetch_fault && (pte & PT64_NX_MASK)) |
174 | goto access_error; | 174 | goto access_error; |
175 | #endif | 175 | #endif |
176 | 176 | ||
@@ -190,10 +190,10 @@ walk: | |||
190 | 190 | ||
191 | if ((walker->level == PT_PAGE_TABLE_LEVEL) || | 191 | if ((walker->level == PT_PAGE_TABLE_LEVEL) || |
192 | ((walker->level == PT_DIRECTORY_LEVEL) && | 192 | ((walker->level == PT_DIRECTORY_LEVEL) && |
193 | (pte & PT_PAGE_SIZE_MASK) && | 193 | is_large_pte(pte) && |
194 | (PTTYPE == 64 || is_pse(vcpu))) || | 194 | (PTTYPE == 64 || is_pse(vcpu))) || |
195 | ((walker->level == PT_PDPE_LEVEL) && | 195 | ((walker->level == PT_PDPE_LEVEL) && |
196 | (pte & PT_PAGE_SIZE_MASK) && | 196 | is_large_pte(pte) && |
197 | is_long_mode(vcpu))) { | 197 | is_long_mode(vcpu))) { |
198 | int lvl = walker->level; | 198 | int lvl = walker->level; |
199 | 199 | ||
@@ -258,11 +258,17 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
258 | pt_element_t gpte; | 258 | pt_element_t gpte; |
259 | unsigned pte_access; | 259 | unsigned pte_access; |
260 | pfn_t pfn; | 260 | pfn_t pfn; |
261 | u64 new_spte; | ||
261 | 262 | ||
262 | gpte = *(const pt_element_t *)pte; | 263 | gpte = *(const pt_element_t *)pte; |
263 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { | 264 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { |
264 | if (!is_present_gpte(gpte)) | 265 | if (!is_present_gpte(gpte)) { |
265 | __set_spte(spte, shadow_notrap_nonpresent_pte); | 266 | if (page->unsync) |
267 | new_spte = shadow_trap_nonpresent_pte; | ||
268 | else | ||
269 | new_spte = shadow_notrap_nonpresent_pte; | ||
270 | __set_spte(spte, new_spte); | ||
271 | } | ||
266 | return; | 272 | return; |
267 | } | 273 | } |
268 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); | 274 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
@@ -457,6 +463,7 @@ out_unlock: | |||
457 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | 463 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) |
458 | { | 464 | { |
459 | struct kvm_shadow_walk_iterator iterator; | 465 | struct kvm_shadow_walk_iterator iterator; |
466 | gpa_t pte_gpa = -1; | ||
460 | int level; | 467 | int level; |
461 | u64 *sptep; | 468 | u64 *sptep; |
462 | int need_flush = 0; | 469 | int need_flush = 0; |
@@ -467,9 +474,16 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
467 | level = iterator.level; | 474 | level = iterator.level; |
468 | sptep = iterator.sptep; | 475 | sptep = iterator.sptep; |
469 | 476 | ||
470 | if (level == PT_PAGE_TABLE_LEVEL || | 477 | if (is_last_spte(*sptep, level)) { |
471 | ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || | 478 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); |
472 | ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { | 479 | int offset, shift; |
480 | |||
481 | shift = PAGE_SHIFT - | ||
482 | (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level; | ||
483 | offset = sp->role.quadrant << shift; | ||
484 | |||
485 | pte_gpa = (sp->gfn << PAGE_SHIFT) + offset; | ||
486 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); | ||
473 | 487 | ||
474 | if (is_shadow_present_pte(*sptep)) { | 488 | if (is_shadow_present_pte(*sptep)) { |
475 | rmap_remove(vcpu->kvm, sptep); | 489 | rmap_remove(vcpu->kvm, sptep); |
@@ -487,7 +501,17 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
487 | 501 | ||
488 | if (need_flush) | 502 | if (need_flush) |
489 | kvm_flush_remote_tlbs(vcpu->kvm); | 503 | kvm_flush_remote_tlbs(vcpu->kvm); |
504 | |||
505 | atomic_inc(&vcpu->kvm->arch.invlpg_counter); | ||
506 | |||
490 | spin_unlock(&vcpu->kvm->mmu_lock); | 507 | spin_unlock(&vcpu->kvm->mmu_lock); |
508 | |||
509 | if (pte_gpa == -1) | ||
510 | return; | ||
511 | |||
512 | if (mmu_topup_memory_caches(vcpu)) | ||
513 | return; | ||
514 | kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0); | ||
491 | } | 515 | } |
492 | 516 | ||
493 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, | 517 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, |
@@ -551,12 +575,15 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
551 | { | 575 | { |
552 | int i, offset, nr_present; | 576 | int i, offset, nr_present; |
553 | bool reset_host_protection; | 577 | bool reset_host_protection; |
578 | gpa_t first_pte_gpa; | ||
554 | 579 | ||
555 | offset = nr_present = 0; | 580 | offset = nr_present = 0; |
556 | 581 | ||
557 | if (PTTYPE == 32) | 582 | if (PTTYPE == 32) |
558 | offset = sp->role.quadrant << PT64_LEVEL_BITS; | 583 | offset = sp->role.quadrant << PT64_LEVEL_BITS; |
559 | 584 | ||
585 | first_pte_gpa = gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t); | ||
586 | |||
560 | for (i = 0; i < PT64_ENT_PER_PAGE; i++) { | 587 | for (i = 0; i < PT64_ENT_PER_PAGE; i++) { |
561 | unsigned pte_access; | 588 | unsigned pte_access; |
562 | pt_element_t gpte; | 589 | pt_element_t gpte; |
@@ -566,8 +593,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
566 | if (!is_shadow_present_pte(sp->spt[i])) | 593 | if (!is_shadow_present_pte(sp->spt[i])) |
567 | continue; | 594 | continue; |
568 | 595 | ||
569 | pte_gpa = gfn_to_gpa(sp->gfn); | 596 | pte_gpa = first_pte_gpa + i * sizeof(pt_element_t); |
570 | pte_gpa += (i+offset) * sizeof(pt_element_t); | ||
571 | 597 | ||
572 | if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, | 598 | if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, |
573 | sizeof(pt_element_t))) | 599 | sizeof(pt_element_t))) |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 737361fcd503..96dc232bfc56 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -44,10 +44,11 @@ MODULE_LICENSE("GPL"); | |||
44 | #define SEG_TYPE_LDT 2 | 44 | #define SEG_TYPE_LDT 2 |
45 | #define SEG_TYPE_BUSY_TSS16 3 | 45 | #define SEG_TYPE_BUSY_TSS16 3 |
46 | 46 | ||
47 | #define SVM_FEATURE_NPT (1 << 0) | 47 | #define SVM_FEATURE_NPT (1 << 0) |
48 | #define SVM_FEATURE_LBRV (1 << 1) | 48 | #define SVM_FEATURE_LBRV (1 << 1) |
49 | #define SVM_FEATURE_SVML (1 << 2) | 49 | #define SVM_FEATURE_SVML (1 << 2) |
50 | #define SVM_FEATURE_PAUSE_FILTER (1 << 10) | 50 | #define SVM_FEATURE_NRIP (1 << 3) |
51 | #define SVM_FEATURE_PAUSE_FILTER (1 << 10) | ||
51 | 52 | ||
52 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ | 53 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ |
53 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ | 54 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ |
@@ -70,6 +71,7 @@ struct kvm_vcpu; | |||
70 | struct nested_state { | 71 | struct nested_state { |
71 | struct vmcb *hsave; | 72 | struct vmcb *hsave; |
72 | u64 hsave_msr; | 73 | u64 hsave_msr; |
74 | u64 vm_cr_msr; | ||
73 | u64 vmcb; | 75 | u64 vmcb; |
74 | 76 | ||
75 | /* These are the merged vectors */ | 77 | /* These are the merged vectors */ |
@@ -77,6 +79,7 @@ struct nested_state { | |||
77 | 79 | ||
78 | /* gpa pointers to the real vectors */ | 80 | /* gpa pointers to the real vectors */ |
79 | u64 vmcb_msrpm; | 81 | u64 vmcb_msrpm; |
82 | u64 vmcb_iopm; | ||
80 | 83 | ||
81 | /* A VMEXIT is required but not yet emulated */ | 84 | /* A VMEXIT is required but not yet emulated */ |
82 | bool exit_required; | 85 | bool exit_required; |
@@ -91,6 +94,9 @@ struct nested_state { | |||
91 | 94 | ||
92 | }; | 95 | }; |
93 | 96 | ||
97 | #define MSRPM_OFFSETS 16 | ||
98 | static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; | ||
99 | |||
94 | struct vcpu_svm { | 100 | struct vcpu_svm { |
95 | struct kvm_vcpu vcpu; | 101 | struct kvm_vcpu vcpu; |
96 | struct vmcb *vmcb; | 102 | struct vmcb *vmcb; |
@@ -110,13 +116,39 @@ struct vcpu_svm { | |||
110 | struct nested_state nested; | 116 | struct nested_state nested; |
111 | 117 | ||
112 | bool nmi_singlestep; | 118 | bool nmi_singlestep; |
119 | |||
120 | unsigned int3_injected; | ||
121 | unsigned long int3_rip; | ||
122 | }; | ||
123 | |||
124 | #define MSR_INVALID 0xffffffffU | ||
125 | |||
126 | static struct svm_direct_access_msrs { | ||
127 | u32 index; /* Index of the MSR */ | ||
128 | bool always; /* True if intercept is always on */ | ||
129 | } direct_access_msrs[] = { | ||
130 | { .index = MSR_K6_STAR, .always = true }, | ||
131 | { .index = MSR_IA32_SYSENTER_CS, .always = true }, | ||
132 | #ifdef CONFIG_X86_64 | ||
133 | { .index = MSR_GS_BASE, .always = true }, | ||
134 | { .index = MSR_FS_BASE, .always = true }, | ||
135 | { .index = MSR_KERNEL_GS_BASE, .always = true }, | ||
136 | { .index = MSR_LSTAR, .always = true }, | ||
137 | { .index = MSR_CSTAR, .always = true }, | ||
138 | { .index = MSR_SYSCALL_MASK, .always = true }, | ||
139 | #endif | ||
140 | { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, | ||
141 | { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, | ||
142 | { .index = MSR_IA32_LASTINTFROMIP, .always = false }, | ||
143 | { .index = MSR_IA32_LASTINTTOIP, .always = false }, | ||
144 | { .index = MSR_INVALID, .always = false }, | ||
113 | }; | 145 | }; |
114 | 146 | ||
115 | /* enable NPT for AMD64 and X86 with PAE */ | 147 | /* enable NPT for AMD64 and X86 with PAE */ |
116 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | 148 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) |
117 | static bool npt_enabled = true; | 149 | static bool npt_enabled = true; |
118 | #else | 150 | #else |
119 | static bool npt_enabled = false; | 151 | static bool npt_enabled; |
120 | #endif | 152 | #endif |
121 | static int npt = 1; | 153 | static int npt = 1; |
122 | 154 | ||
@@ -129,6 +161,7 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu); | |||
129 | static void svm_complete_interrupts(struct vcpu_svm *svm); | 161 | static void svm_complete_interrupts(struct vcpu_svm *svm); |
130 | 162 | ||
131 | static int nested_svm_exit_handled(struct vcpu_svm *svm); | 163 | static int nested_svm_exit_handled(struct vcpu_svm *svm); |
164 | static int nested_svm_intercept(struct vcpu_svm *svm); | ||
132 | static int nested_svm_vmexit(struct vcpu_svm *svm); | 165 | static int nested_svm_vmexit(struct vcpu_svm *svm); |
133 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | 166 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, |
134 | bool has_error_code, u32 error_code); | 167 | bool has_error_code, u32 error_code); |
@@ -163,8 +196,8 @@ static unsigned long iopm_base; | |||
163 | struct kvm_ldttss_desc { | 196 | struct kvm_ldttss_desc { |
164 | u16 limit0; | 197 | u16 limit0; |
165 | u16 base0; | 198 | u16 base0; |
166 | unsigned base1 : 8, type : 5, dpl : 2, p : 1; | 199 | unsigned base1:8, type:5, dpl:2, p:1; |
167 | unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; | 200 | unsigned limit1:4, zero0:3, g:1, base2:8; |
168 | u32 base3; | 201 | u32 base3; |
169 | u32 zero1; | 202 | u32 zero1; |
170 | } __attribute__((packed)); | 203 | } __attribute__((packed)); |
@@ -194,6 +227,27 @@ static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; | |||
194 | #define MSRS_RANGE_SIZE 2048 | 227 | #define MSRS_RANGE_SIZE 2048 |
195 | #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) | 228 | #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) |
196 | 229 | ||
230 | static u32 svm_msrpm_offset(u32 msr) | ||
231 | { | ||
232 | u32 offset; | ||
233 | int i; | ||
234 | |||
235 | for (i = 0; i < NUM_MSR_MAPS; i++) { | ||
236 | if (msr < msrpm_ranges[i] || | ||
237 | msr >= msrpm_ranges[i] + MSRS_IN_RANGE) | ||
238 | continue; | ||
239 | |||
240 | offset = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */ | ||
241 | offset += (i * MSRS_RANGE_SIZE); /* add range offset */ | ||
242 | |||
243 | /* Now we have the u8 offset - but need the u32 offset */ | ||
244 | return offset / 4; | ||
245 | } | ||
246 | |||
247 | /* MSR not in any range */ | ||
248 | return MSR_INVALID; | ||
249 | } | ||
250 | |||
197 | #define MAX_INST_SIZE 15 | 251 | #define MAX_INST_SIZE 15 |
198 | 252 | ||
199 | static inline u32 svm_has(u32 feat) | 253 | static inline u32 svm_has(u32 feat) |
@@ -213,7 +267,7 @@ static inline void stgi(void) | |||
213 | 267 | ||
214 | static inline void invlpga(unsigned long addr, u32 asid) | 268 | static inline void invlpga(unsigned long addr, u32 asid) |
215 | { | 269 | { |
216 | asm volatile (__ex(SVM_INVLPGA) :: "a"(addr), "c"(asid)); | 270 | asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); |
217 | } | 271 | } |
218 | 272 | ||
219 | static inline void force_new_asid(struct kvm_vcpu *vcpu) | 273 | static inline void force_new_asid(struct kvm_vcpu *vcpu) |
@@ -235,23 +289,6 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
235 | vcpu->arch.efer = efer; | 289 | vcpu->arch.efer = efer; |
236 | } | 290 | } |
237 | 291 | ||
238 | static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | ||
239 | bool has_error_code, u32 error_code) | ||
240 | { | ||
241 | struct vcpu_svm *svm = to_svm(vcpu); | ||
242 | |||
243 | /* If we are within a nested VM we'd better #VMEXIT and let the | ||
244 | guest handle the exception */ | ||
245 | if (nested_svm_check_exception(svm, nr, has_error_code, error_code)) | ||
246 | return; | ||
247 | |||
248 | svm->vmcb->control.event_inj = nr | ||
249 | | SVM_EVTINJ_VALID | ||
250 | | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) | ||
251 | | SVM_EVTINJ_TYPE_EXEPT; | ||
252 | svm->vmcb->control.event_inj_err = error_code; | ||
253 | } | ||
254 | |||
255 | static int is_external_interrupt(u32 info) | 292 | static int is_external_interrupt(u32 info) |
256 | { | 293 | { |
257 | info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; | 294 | info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; |
@@ -264,7 +301,7 @@ static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | |||
264 | u32 ret = 0; | 301 | u32 ret = 0; |
265 | 302 | ||
266 | if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) | 303 | if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) |
267 | ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS; | 304 | ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS; |
268 | return ret & mask; | 305 | return ret & mask; |
269 | } | 306 | } |
270 | 307 | ||
@@ -283,6 +320,9 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
283 | { | 320 | { |
284 | struct vcpu_svm *svm = to_svm(vcpu); | 321 | struct vcpu_svm *svm = to_svm(vcpu); |
285 | 322 | ||
323 | if (svm->vmcb->control.next_rip != 0) | ||
324 | svm->next_rip = svm->vmcb->control.next_rip; | ||
325 | |||
286 | if (!svm->next_rip) { | 326 | if (!svm->next_rip) { |
287 | if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != | 327 | if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != |
288 | EMULATE_DONE) | 328 | EMULATE_DONE) |
@@ -297,6 +337,43 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
297 | svm_set_interrupt_shadow(vcpu, 0); | 337 | svm_set_interrupt_shadow(vcpu, 0); |
298 | } | 338 | } |
299 | 339 | ||
340 | static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | ||
341 | bool has_error_code, u32 error_code, | ||
342 | bool reinject) | ||
343 | { | ||
344 | struct vcpu_svm *svm = to_svm(vcpu); | ||
345 | |||
346 | /* | ||
347 | * If we are within a nested VM we'd better #VMEXIT and let the guest | ||
348 | * handle the exception | ||
349 | */ | ||
350 | if (!reinject && | ||
351 | nested_svm_check_exception(svm, nr, has_error_code, error_code)) | ||
352 | return; | ||
353 | |||
354 | if (nr == BP_VECTOR && !svm_has(SVM_FEATURE_NRIP)) { | ||
355 | unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); | ||
356 | |||
357 | /* | ||
358 | * For guest debugging where we have to reinject #BP if some | ||
359 | * INT3 is guest-owned: | ||
360 | * Emulate nRIP by moving RIP forward. Will fail if injection | ||
361 | * raises a fault that is not intercepted. Still better than | ||
362 | * failing in all cases. | ||
363 | */ | ||
364 | skip_emulated_instruction(&svm->vcpu); | ||
365 | rip = kvm_rip_read(&svm->vcpu); | ||
366 | svm->int3_rip = rip + svm->vmcb->save.cs.base; | ||
367 | svm->int3_injected = rip - old_rip; | ||
368 | } | ||
369 | |||
370 | svm->vmcb->control.event_inj = nr | ||
371 | | SVM_EVTINJ_VALID | ||
372 | | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) | ||
373 | | SVM_EVTINJ_TYPE_EXEPT; | ||
374 | svm->vmcb->control.event_inj_err = error_code; | ||
375 | } | ||
376 | |||
300 | static int has_svm(void) | 377 | static int has_svm(void) |
301 | { | 378 | { |
302 | const char *msg; | 379 | const char *msg; |
@@ -319,7 +396,7 @@ static int svm_hardware_enable(void *garbage) | |||
319 | 396 | ||
320 | struct svm_cpu_data *sd; | 397 | struct svm_cpu_data *sd; |
321 | uint64_t efer; | 398 | uint64_t efer; |
322 | struct descriptor_table gdt_descr; | 399 | struct desc_ptr gdt_descr; |
323 | struct desc_struct *gdt; | 400 | struct desc_struct *gdt; |
324 | int me = raw_smp_processor_id(); | 401 | int me = raw_smp_processor_id(); |
325 | 402 | ||
@@ -344,8 +421,8 @@ static int svm_hardware_enable(void *garbage) | |||
344 | sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; | 421 | sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; |
345 | sd->next_asid = sd->max_asid + 1; | 422 | sd->next_asid = sd->max_asid + 1; |
346 | 423 | ||
347 | kvm_get_gdt(&gdt_descr); | 424 | native_store_gdt(&gdt_descr); |
348 | gdt = (struct desc_struct *)gdt_descr.base; | 425 | gdt = (struct desc_struct *)gdt_descr.address; |
349 | sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); | 426 | sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); |
350 | 427 | ||
351 | wrmsrl(MSR_EFER, efer | EFER_SVME); | 428 | wrmsrl(MSR_EFER, efer | EFER_SVME); |
@@ -391,42 +468,98 @@ err_1: | |||
391 | 468 | ||
392 | } | 469 | } |
393 | 470 | ||
471 | static bool valid_msr_intercept(u32 index) | ||
472 | { | ||
473 | int i; | ||
474 | |||
475 | for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) | ||
476 | if (direct_access_msrs[i].index == index) | ||
477 | return true; | ||
478 | |||
479 | return false; | ||
480 | } | ||
481 | |||
394 | static void set_msr_interception(u32 *msrpm, unsigned msr, | 482 | static void set_msr_interception(u32 *msrpm, unsigned msr, |
395 | int read, int write) | 483 | int read, int write) |
396 | { | 484 | { |
485 | u8 bit_read, bit_write; | ||
486 | unsigned long tmp; | ||
487 | u32 offset; | ||
488 | |||
489 | /* | ||
490 | * If this warning triggers extend the direct_access_msrs list at the | ||
491 | * beginning of the file | ||
492 | */ | ||
493 | WARN_ON(!valid_msr_intercept(msr)); | ||
494 | |||
495 | offset = svm_msrpm_offset(msr); | ||
496 | bit_read = 2 * (msr & 0x0f); | ||
497 | bit_write = 2 * (msr & 0x0f) + 1; | ||
498 | tmp = msrpm[offset]; | ||
499 | |||
500 | BUG_ON(offset == MSR_INVALID); | ||
501 | |||
502 | read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp); | ||
503 | write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp); | ||
504 | |||
505 | msrpm[offset] = tmp; | ||
506 | } | ||
507 | |||
508 | static void svm_vcpu_init_msrpm(u32 *msrpm) | ||
509 | { | ||
397 | int i; | 510 | int i; |
398 | 511 | ||
399 | for (i = 0; i < NUM_MSR_MAPS; i++) { | 512 | memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); |
400 | if (msr >= msrpm_ranges[i] && | 513 | |
401 | msr < msrpm_ranges[i] + MSRS_IN_RANGE) { | 514 | for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { |
402 | u32 msr_offset = (i * MSRS_IN_RANGE + msr - | 515 | if (!direct_access_msrs[i].always) |
403 | msrpm_ranges[i]) * 2; | 516 | continue; |
404 | 517 | ||
405 | u32 *base = msrpm + (msr_offset / 32); | 518 | set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1); |
406 | u32 msr_shift = msr_offset % 32; | 519 | } |
407 | u32 mask = ((write) ? 0 : 2) | ((read) ? 0 : 1); | 520 | } |
408 | *base = (*base & ~(0x3 << msr_shift)) | | 521 | |
409 | (mask << msr_shift); | 522 | static void add_msr_offset(u32 offset) |
523 | { | ||
524 | int i; | ||
525 | |||
526 | for (i = 0; i < MSRPM_OFFSETS; ++i) { | ||
527 | |||
528 | /* Offset already in list? */ | ||
529 | if (msrpm_offsets[i] == offset) | ||
410 | return; | 530 | return; |
411 | } | 531 | |
532 | /* Slot used by another offset? */ | ||
533 | if (msrpm_offsets[i] != MSR_INVALID) | ||
534 | continue; | ||
535 | |||
536 | /* Add offset to list */ | ||
537 | msrpm_offsets[i] = offset; | ||
538 | |||
539 | return; | ||
412 | } | 540 | } |
541 | |||
542 | /* | ||
543 | * If this BUG triggers the msrpm_offsets table has an overflow. Just | ||
544 | * increase MSRPM_OFFSETS in this case. | ||
545 | */ | ||
413 | BUG(); | 546 | BUG(); |
414 | } | 547 | } |
415 | 548 | ||
416 | static void svm_vcpu_init_msrpm(u32 *msrpm) | 549 | static void init_msrpm_offsets(void) |
417 | { | 550 | { |
418 | memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); | 551 | int i; |
419 | 552 | ||
420 | #ifdef CONFIG_X86_64 | 553 | memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets)); |
421 | set_msr_interception(msrpm, MSR_GS_BASE, 1, 1); | 554 | |
422 | set_msr_interception(msrpm, MSR_FS_BASE, 1, 1); | 555 | for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { |
423 | set_msr_interception(msrpm, MSR_KERNEL_GS_BASE, 1, 1); | 556 | u32 offset; |
424 | set_msr_interception(msrpm, MSR_LSTAR, 1, 1); | 557 | |
425 | set_msr_interception(msrpm, MSR_CSTAR, 1, 1); | 558 | offset = svm_msrpm_offset(direct_access_msrs[i].index); |
426 | set_msr_interception(msrpm, MSR_SYSCALL_MASK, 1, 1); | 559 | BUG_ON(offset == MSR_INVALID); |
427 | #endif | 560 | |
428 | set_msr_interception(msrpm, MSR_K6_STAR, 1, 1); | 561 | add_msr_offset(offset); |
429 | set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1); | 562 | } |
430 | } | 563 | } |
431 | 564 | ||
432 | static void svm_enable_lbrv(struct vcpu_svm *svm) | 565 | static void svm_enable_lbrv(struct vcpu_svm *svm) |
@@ -467,6 +600,8 @@ static __init int svm_hardware_setup(void) | |||
467 | memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); | 600 | memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); |
468 | iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; | 601 | iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; |
469 | 602 | ||
603 | init_msrpm_offsets(); | ||
604 | |||
470 | if (boot_cpu_has(X86_FEATURE_NX)) | 605 | if (boot_cpu_has(X86_FEATURE_NX)) |
471 | kvm_enable_efer_bits(EFER_NX); | 606 | kvm_enable_efer_bits(EFER_NX); |
472 | 607 | ||
@@ -523,7 +658,7 @@ static void init_seg(struct vmcb_seg *seg) | |||
523 | { | 658 | { |
524 | seg->selector = 0; | 659 | seg->selector = 0; |
525 | seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | | 660 | seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | |
526 | SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ | 661 | SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ |
527 | seg->limit = 0xffff; | 662 | seg->limit = 0xffff; |
528 | seg->base = 0; | 663 | seg->base = 0; |
529 | } | 664 | } |
@@ -543,16 +678,16 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
543 | 678 | ||
544 | svm->vcpu.fpu_active = 1; | 679 | svm->vcpu.fpu_active = 1; |
545 | 680 | ||
546 | control->intercept_cr_read = INTERCEPT_CR0_MASK | | 681 | control->intercept_cr_read = INTERCEPT_CR0_MASK | |
547 | INTERCEPT_CR3_MASK | | 682 | INTERCEPT_CR3_MASK | |
548 | INTERCEPT_CR4_MASK; | 683 | INTERCEPT_CR4_MASK; |
549 | 684 | ||
550 | control->intercept_cr_write = INTERCEPT_CR0_MASK | | 685 | control->intercept_cr_write = INTERCEPT_CR0_MASK | |
551 | INTERCEPT_CR3_MASK | | 686 | INTERCEPT_CR3_MASK | |
552 | INTERCEPT_CR4_MASK | | 687 | INTERCEPT_CR4_MASK | |
553 | INTERCEPT_CR8_MASK; | 688 | INTERCEPT_CR8_MASK; |
554 | 689 | ||
555 | control->intercept_dr_read = INTERCEPT_DR0_MASK | | 690 | control->intercept_dr_read = INTERCEPT_DR0_MASK | |
556 | INTERCEPT_DR1_MASK | | 691 | INTERCEPT_DR1_MASK | |
557 | INTERCEPT_DR2_MASK | | 692 | INTERCEPT_DR2_MASK | |
558 | INTERCEPT_DR3_MASK | | 693 | INTERCEPT_DR3_MASK | |
@@ -561,7 +696,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
561 | INTERCEPT_DR6_MASK | | 696 | INTERCEPT_DR6_MASK | |
562 | INTERCEPT_DR7_MASK; | 697 | INTERCEPT_DR7_MASK; |
563 | 698 | ||
564 | control->intercept_dr_write = INTERCEPT_DR0_MASK | | 699 | control->intercept_dr_write = INTERCEPT_DR0_MASK | |
565 | INTERCEPT_DR1_MASK | | 700 | INTERCEPT_DR1_MASK | |
566 | INTERCEPT_DR2_MASK | | 701 | INTERCEPT_DR2_MASK | |
567 | INTERCEPT_DR3_MASK | | 702 | INTERCEPT_DR3_MASK | |
@@ -575,7 +710,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
575 | (1 << MC_VECTOR); | 710 | (1 << MC_VECTOR); |
576 | 711 | ||
577 | 712 | ||
578 | control->intercept = (1ULL << INTERCEPT_INTR) | | 713 | control->intercept = (1ULL << INTERCEPT_INTR) | |
579 | (1ULL << INTERCEPT_NMI) | | 714 | (1ULL << INTERCEPT_NMI) | |
580 | (1ULL << INTERCEPT_SMI) | | 715 | (1ULL << INTERCEPT_SMI) | |
581 | (1ULL << INTERCEPT_SELECTIVE_CR0) | | 716 | (1ULL << INTERCEPT_SELECTIVE_CR0) | |
@@ -636,7 +771,8 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
636 | save->rip = 0x0000fff0; | 771 | save->rip = 0x0000fff0; |
637 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; | 772 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; |
638 | 773 | ||
639 | /* This is the guest-visible cr0 value. | 774 | /* |
775 | * This is the guest-visible cr0 value. | ||
640 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. | 776 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. |
641 | */ | 777 | */ |
642 | svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; | 778 | svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
@@ -729,6 +865,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
729 | svm_vcpu_init_msrpm(svm->msrpm); | 865 | svm_vcpu_init_msrpm(svm->msrpm); |
730 | 866 | ||
731 | svm->nested.msrpm = page_address(nested_msrpm_pages); | 867 | svm->nested.msrpm = page_address(nested_msrpm_pages); |
868 | svm_vcpu_init_msrpm(svm->nested.msrpm); | ||
732 | 869 | ||
733 | svm->vmcb = page_address(page); | 870 | svm->vmcb = page_address(page); |
734 | clear_page(svm->vmcb); | 871 | clear_page(svm->vmcb); |
@@ -882,7 +1019,8 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, | |||
882 | var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; | 1019 | var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; |
883 | var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; | 1020 | var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; |
884 | 1021 | ||
885 | /* AMD's VMCB does not have an explicit unusable field, so emulate it | 1022 | /* |
1023 | * AMD's VMCB does not have an explicit unusable field, so emulate it | ||
886 | * for cross vendor migration purposes by "not present" | 1024 | * for cross vendor migration purposes by "not present" |
887 | */ | 1025 | */ |
888 | var->unusable = !var->present || (var->type == 0); | 1026 | var->unusable = !var->present || (var->type == 0); |
@@ -918,7 +1056,8 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, | |||
918 | var->type |= 0x1; | 1056 | var->type |= 0x1; |
919 | break; | 1057 | break; |
920 | case VCPU_SREG_SS: | 1058 | case VCPU_SREG_SS: |
921 | /* On AMD CPUs sometimes the DB bit in the segment | 1059 | /* |
1060 | * On AMD CPUs sometimes the DB bit in the segment | ||
922 | * descriptor is left as 1, although the whole segment has | 1061 | * descriptor is left as 1, although the whole segment has |
923 | * been made unusable. Clear it here to pass an Intel VMX | 1062 | * been made unusable. Clear it here to pass an Intel VMX |
924 | * entry check when cross vendor migrating. | 1063 | * entry check when cross vendor migrating. |
@@ -936,36 +1075,36 @@ static int svm_get_cpl(struct kvm_vcpu *vcpu) | |||
936 | return save->cpl; | 1075 | return save->cpl; |
937 | } | 1076 | } |
938 | 1077 | ||
939 | static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1078 | static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
940 | { | 1079 | { |
941 | struct vcpu_svm *svm = to_svm(vcpu); | 1080 | struct vcpu_svm *svm = to_svm(vcpu); |
942 | 1081 | ||
943 | dt->limit = svm->vmcb->save.idtr.limit; | 1082 | dt->size = svm->vmcb->save.idtr.limit; |
944 | dt->base = svm->vmcb->save.idtr.base; | 1083 | dt->address = svm->vmcb->save.idtr.base; |
945 | } | 1084 | } |
946 | 1085 | ||
947 | static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1086 | static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
948 | { | 1087 | { |
949 | struct vcpu_svm *svm = to_svm(vcpu); | 1088 | struct vcpu_svm *svm = to_svm(vcpu); |
950 | 1089 | ||
951 | svm->vmcb->save.idtr.limit = dt->limit; | 1090 | svm->vmcb->save.idtr.limit = dt->size; |
952 | svm->vmcb->save.idtr.base = dt->base ; | 1091 | svm->vmcb->save.idtr.base = dt->address ; |
953 | } | 1092 | } |
954 | 1093 | ||
955 | static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1094 | static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
956 | { | 1095 | { |
957 | struct vcpu_svm *svm = to_svm(vcpu); | 1096 | struct vcpu_svm *svm = to_svm(vcpu); |
958 | 1097 | ||
959 | dt->limit = svm->vmcb->save.gdtr.limit; | 1098 | dt->size = svm->vmcb->save.gdtr.limit; |
960 | dt->base = svm->vmcb->save.gdtr.base; | 1099 | dt->address = svm->vmcb->save.gdtr.base; |
961 | } | 1100 | } |
962 | 1101 | ||
963 | static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1102 | static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
964 | { | 1103 | { |
965 | struct vcpu_svm *svm = to_svm(vcpu); | 1104 | struct vcpu_svm *svm = to_svm(vcpu); |
966 | 1105 | ||
967 | svm->vmcb->save.gdtr.limit = dt->limit; | 1106 | svm->vmcb->save.gdtr.limit = dt->size; |
968 | svm->vmcb->save.gdtr.base = dt->base ; | 1107 | svm->vmcb->save.gdtr.base = dt->address ; |
969 | } | 1108 | } |
970 | 1109 | ||
971 | static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | 1110 | static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) |
@@ -978,6 +1117,7 @@ static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | |||
978 | 1117 | ||
979 | static void update_cr0_intercept(struct vcpu_svm *svm) | 1118 | static void update_cr0_intercept(struct vcpu_svm *svm) |
980 | { | 1119 | { |
1120 | struct vmcb *vmcb = svm->vmcb; | ||
981 | ulong gcr0 = svm->vcpu.arch.cr0; | 1121 | ulong gcr0 = svm->vcpu.arch.cr0; |
982 | u64 *hcr0 = &svm->vmcb->save.cr0; | 1122 | u64 *hcr0 = &svm->vmcb->save.cr0; |
983 | 1123 | ||
@@ -989,11 +1129,25 @@ static void update_cr0_intercept(struct vcpu_svm *svm) | |||
989 | 1129 | ||
990 | 1130 | ||
991 | if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { | 1131 | if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { |
992 | svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; | 1132 | vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; |
993 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; | 1133 | vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; |
1134 | if (is_nested(svm)) { | ||
1135 | struct vmcb *hsave = svm->nested.hsave; | ||
1136 | |||
1137 | hsave->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; | ||
1138 | hsave->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; | ||
1139 | vmcb->control.intercept_cr_read |= svm->nested.intercept_cr_read; | ||
1140 | vmcb->control.intercept_cr_write |= svm->nested.intercept_cr_write; | ||
1141 | } | ||
994 | } else { | 1142 | } else { |
995 | svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; | 1143 | svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; |
996 | svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; | 1144 | svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; |
1145 | if (is_nested(svm)) { | ||
1146 | struct vmcb *hsave = svm->nested.hsave; | ||
1147 | |||
1148 | hsave->control.intercept_cr_read |= INTERCEPT_CR0_MASK; | ||
1149 | hsave->control.intercept_cr_write |= INTERCEPT_CR0_MASK; | ||
1150 | } | ||
997 | } | 1151 | } |
998 | } | 1152 | } |
999 | 1153 | ||
@@ -1001,6 +1155,27 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1001 | { | 1155 | { |
1002 | struct vcpu_svm *svm = to_svm(vcpu); | 1156 | struct vcpu_svm *svm = to_svm(vcpu); |
1003 | 1157 | ||
1158 | if (is_nested(svm)) { | ||
1159 | /* | ||
1160 | * We are here because we run in nested mode, the host kvm | ||
1161 | * intercepts cr0 writes but the l1 hypervisor does not. | ||
1162 | * But the L1 hypervisor may intercept selective cr0 writes. | ||
1163 | * This needs to be checked here. | ||
1164 | */ | ||
1165 | unsigned long old, new; | ||
1166 | |||
1167 | /* Remove bits that would trigger a real cr0 write intercept */ | ||
1168 | old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK; | ||
1169 | new = cr0 & SVM_CR0_SELECTIVE_MASK; | ||
1170 | |||
1171 | if (old == new) { | ||
1172 | /* cr0 write with ts and mp unchanged */ | ||
1173 | svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; | ||
1174 | if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) | ||
1175 | return; | ||
1176 | } | ||
1177 | } | ||
1178 | |||
1004 | #ifdef CONFIG_X86_64 | 1179 | #ifdef CONFIG_X86_64 |
1005 | if (vcpu->arch.efer & EFER_LME) { | 1180 | if (vcpu->arch.efer & EFER_LME) { |
1006 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { | 1181 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { |
@@ -1134,70 +1309,11 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) | |||
1134 | svm->vmcb->control.asid = sd->next_asid++; | 1309 | svm->vmcb->control.asid = sd->next_asid++; |
1135 | } | 1310 | } |
1136 | 1311 | ||
1137 | static int svm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *dest) | 1312 | static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) |
1138 | { | 1313 | { |
1139 | struct vcpu_svm *svm = to_svm(vcpu); | 1314 | struct vcpu_svm *svm = to_svm(vcpu); |
1140 | 1315 | ||
1141 | switch (dr) { | 1316 | svm->vmcb->save.dr7 = value; |
1142 | case 0 ... 3: | ||
1143 | *dest = vcpu->arch.db[dr]; | ||
1144 | break; | ||
1145 | case 4: | ||
1146 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1147 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1148 | /* fall through */ | ||
1149 | case 6: | ||
1150 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | ||
1151 | *dest = vcpu->arch.dr6; | ||
1152 | else | ||
1153 | *dest = svm->vmcb->save.dr6; | ||
1154 | break; | ||
1155 | case 5: | ||
1156 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1157 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1158 | /* fall through */ | ||
1159 | case 7: | ||
1160 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | ||
1161 | *dest = vcpu->arch.dr7; | ||
1162 | else | ||
1163 | *dest = svm->vmcb->save.dr7; | ||
1164 | break; | ||
1165 | } | ||
1166 | |||
1167 | return EMULATE_DONE; | ||
1168 | } | ||
1169 | |||
1170 | static int svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value) | ||
1171 | { | ||
1172 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1173 | |||
1174 | switch (dr) { | ||
1175 | case 0 ... 3: | ||
1176 | vcpu->arch.db[dr] = value; | ||
1177 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | ||
1178 | vcpu->arch.eff_db[dr] = value; | ||
1179 | break; | ||
1180 | case 4: | ||
1181 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1182 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1183 | /* fall through */ | ||
1184 | case 6: | ||
1185 | vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1; | ||
1186 | break; | ||
1187 | case 5: | ||
1188 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1189 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1190 | /* fall through */ | ||
1191 | case 7: | ||
1192 | vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1; | ||
1193 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | ||
1194 | svm->vmcb->save.dr7 = vcpu->arch.dr7; | ||
1195 | vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK); | ||
1196 | } | ||
1197 | break; | ||
1198 | } | ||
1199 | |||
1200 | return EMULATE_DONE; | ||
1201 | } | 1317 | } |
1202 | 1318 | ||
1203 | static int pf_interception(struct vcpu_svm *svm) | 1319 | static int pf_interception(struct vcpu_svm *svm) |
@@ -1234,7 +1350,7 @@ static int db_interception(struct vcpu_svm *svm) | |||
1234 | } | 1350 | } |
1235 | 1351 | ||
1236 | if (svm->vcpu.guest_debug & | 1352 | if (svm->vcpu.guest_debug & |
1237 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){ | 1353 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) { |
1238 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 1354 | kvm_run->exit_reason = KVM_EXIT_DEBUG; |
1239 | kvm_run->debug.arch.pc = | 1355 | kvm_run->debug.arch.pc = |
1240 | svm->vmcb->save.cs.base + svm->vmcb->save.rip; | 1356 | svm->vmcb->save.cs.base + svm->vmcb->save.rip; |
@@ -1268,7 +1384,22 @@ static int ud_interception(struct vcpu_svm *svm) | |||
1268 | static void svm_fpu_activate(struct kvm_vcpu *vcpu) | 1384 | static void svm_fpu_activate(struct kvm_vcpu *vcpu) |
1269 | { | 1385 | { |
1270 | struct vcpu_svm *svm = to_svm(vcpu); | 1386 | struct vcpu_svm *svm = to_svm(vcpu); |
1271 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); | 1387 | u32 excp; |
1388 | |||
1389 | if (is_nested(svm)) { | ||
1390 | u32 h_excp, n_excp; | ||
1391 | |||
1392 | h_excp = svm->nested.hsave->control.intercept_exceptions; | ||
1393 | n_excp = svm->nested.intercept_exceptions; | ||
1394 | h_excp &= ~(1 << NM_VECTOR); | ||
1395 | excp = h_excp | n_excp; | ||
1396 | } else { | ||
1397 | excp = svm->vmcb->control.intercept_exceptions; | ||
1398 | excp &= ~(1 << NM_VECTOR); | ||
1399 | } | ||
1400 | |||
1401 | svm->vmcb->control.intercept_exceptions = excp; | ||
1402 | |||
1272 | svm->vcpu.fpu_active = 1; | 1403 | svm->vcpu.fpu_active = 1; |
1273 | update_cr0_intercept(svm); | 1404 | update_cr0_intercept(svm); |
1274 | } | 1405 | } |
@@ -1309,29 +1440,23 @@ static int shutdown_interception(struct vcpu_svm *svm) | |||
1309 | 1440 | ||
1310 | static int io_interception(struct vcpu_svm *svm) | 1441 | static int io_interception(struct vcpu_svm *svm) |
1311 | { | 1442 | { |
1443 | struct kvm_vcpu *vcpu = &svm->vcpu; | ||
1312 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ | 1444 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ |
1313 | int size, in, string; | 1445 | int size, in, string; |
1314 | unsigned port; | 1446 | unsigned port; |
1315 | 1447 | ||
1316 | ++svm->vcpu.stat.io_exits; | 1448 | ++svm->vcpu.stat.io_exits; |
1317 | |||
1318 | svm->next_rip = svm->vmcb->control.exit_info_2; | ||
1319 | |||
1320 | string = (io_info & SVM_IOIO_STR_MASK) != 0; | 1449 | string = (io_info & SVM_IOIO_STR_MASK) != 0; |
1321 | |||
1322 | if (string) { | ||
1323 | if (emulate_instruction(&svm->vcpu, | ||
1324 | 0, 0, 0) == EMULATE_DO_MMIO) | ||
1325 | return 0; | ||
1326 | return 1; | ||
1327 | } | ||
1328 | |||
1329 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; | 1450 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; |
1451 | if (string || in) | ||
1452 | return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); | ||
1453 | |||
1330 | port = io_info >> 16; | 1454 | port = io_info >> 16; |
1331 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; | 1455 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; |
1332 | 1456 | svm->next_rip = svm->vmcb->control.exit_info_2; | |
1333 | skip_emulated_instruction(&svm->vcpu); | 1457 | skip_emulated_instruction(&svm->vcpu); |
1334 | return kvm_emulate_pio(&svm->vcpu, in, size, port); | 1458 | |
1459 | return kvm_fast_pio_out(vcpu, size, port); | ||
1335 | } | 1460 | } |
1336 | 1461 | ||
1337 | static int nmi_interception(struct vcpu_svm *svm) | 1462 | static int nmi_interception(struct vcpu_svm *svm) |
@@ -1384,6 +1509,8 @@ static int nested_svm_check_permissions(struct vcpu_svm *svm) | |||
1384 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | 1509 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, |
1385 | bool has_error_code, u32 error_code) | 1510 | bool has_error_code, u32 error_code) |
1386 | { | 1511 | { |
1512 | int vmexit; | ||
1513 | |||
1387 | if (!is_nested(svm)) | 1514 | if (!is_nested(svm)) |
1388 | return 0; | 1515 | return 0; |
1389 | 1516 | ||
@@ -1392,21 +1519,28 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | |||
1392 | svm->vmcb->control.exit_info_1 = error_code; | 1519 | svm->vmcb->control.exit_info_1 = error_code; |
1393 | svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; | 1520 | svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; |
1394 | 1521 | ||
1395 | return nested_svm_exit_handled(svm); | 1522 | vmexit = nested_svm_intercept(svm); |
1523 | if (vmexit == NESTED_EXIT_DONE) | ||
1524 | svm->nested.exit_required = true; | ||
1525 | |||
1526 | return vmexit; | ||
1396 | } | 1527 | } |
1397 | 1528 | ||
1398 | static inline int nested_svm_intr(struct vcpu_svm *svm) | 1529 | /* This function returns true if it is save to enable the irq window */ |
1530 | static inline bool nested_svm_intr(struct vcpu_svm *svm) | ||
1399 | { | 1531 | { |
1400 | if (!is_nested(svm)) | 1532 | if (!is_nested(svm)) |
1401 | return 0; | 1533 | return true; |
1402 | 1534 | ||
1403 | if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) | 1535 | if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) |
1404 | return 0; | 1536 | return true; |
1405 | 1537 | ||
1406 | if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) | 1538 | if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) |
1407 | return 0; | 1539 | return false; |
1408 | 1540 | ||
1409 | svm->vmcb->control.exit_code = SVM_EXIT_INTR; | 1541 | svm->vmcb->control.exit_code = SVM_EXIT_INTR; |
1542 | svm->vmcb->control.exit_info_1 = 0; | ||
1543 | svm->vmcb->control.exit_info_2 = 0; | ||
1410 | 1544 | ||
1411 | if (svm->nested.intercept & 1ULL) { | 1545 | if (svm->nested.intercept & 1ULL) { |
1412 | /* | 1546 | /* |
@@ -1417,21 +1551,40 @@ static inline int nested_svm_intr(struct vcpu_svm *svm) | |||
1417 | */ | 1551 | */ |
1418 | svm->nested.exit_required = true; | 1552 | svm->nested.exit_required = true; |
1419 | trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); | 1553 | trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); |
1420 | return 1; | 1554 | return false; |
1421 | } | 1555 | } |
1422 | 1556 | ||
1423 | return 0; | 1557 | return true; |
1558 | } | ||
1559 | |||
1560 | /* This function returns true if it is save to enable the nmi window */ | ||
1561 | static inline bool nested_svm_nmi(struct vcpu_svm *svm) | ||
1562 | { | ||
1563 | if (!is_nested(svm)) | ||
1564 | return true; | ||
1565 | |||
1566 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI))) | ||
1567 | return true; | ||
1568 | |||
1569 | svm->vmcb->control.exit_code = SVM_EXIT_NMI; | ||
1570 | svm->nested.exit_required = true; | ||
1571 | |||
1572 | return false; | ||
1424 | } | 1573 | } |
1425 | 1574 | ||
1426 | static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx) | 1575 | static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page) |
1427 | { | 1576 | { |
1428 | struct page *page; | 1577 | struct page *page; |
1429 | 1578 | ||
1579 | might_sleep(); | ||
1580 | |||
1430 | page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); | 1581 | page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); |
1431 | if (is_error_page(page)) | 1582 | if (is_error_page(page)) |
1432 | goto error; | 1583 | goto error; |
1433 | 1584 | ||
1434 | return kmap_atomic(page, idx); | 1585 | *_page = page; |
1586 | |||
1587 | return kmap(page); | ||
1435 | 1588 | ||
1436 | error: | 1589 | error: |
1437 | kvm_release_page_clean(page); | 1590 | kvm_release_page_clean(page); |
@@ -1440,61 +1593,55 @@ error: | |||
1440 | return NULL; | 1593 | return NULL; |
1441 | } | 1594 | } |
1442 | 1595 | ||
1443 | static void nested_svm_unmap(void *addr, enum km_type idx) | 1596 | static void nested_svm_unmap(struct page *page) |
1444 | { | 1597 | { |
1445 | struct page *page; | 1598 | kunmap(page); |
1599 | kvm_release_page_dirty(page); | ||
1600 | } | ||
1446 | 1601 | ||
1447 | if (!addr) | 1602 | static int nested_svm_intercept_ioio(struct vcpu_svm *svm) |
1448 | return; | 1603 | { |
1604 | unsigned port; | ||
1605 | u8 val, bit; | ||
1606 | u64 gpa; | ||
1449 | 1607 | ||
1450 | page = kmap_atomic_to_page(addr); | 1608 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) |
1609 | return NESTED_EXIT_HOST; | ||
1451 | 1610 | ||
1452 | kunmap_atomic(addr, idx); | 1611 | port = svm->vmcb->control.exit_info_1 >> 16; |
1453 | kvm_release_page_dirty(page); | 1612 | gpa = svm->nested.vmcb_iopm + (port / 8); |
1613 | bit = port % 8; | ||
1614 | val = 0; | ||
1615 | |||
1616 | if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1)) | ||
1617 | val &= (1 << bit); | ||
1618 | |||
1619 | return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; | ||
1454 | } | 1620 | } |
1455 | 1621 | ||
1456 | static bool nested_svm_exit_handled_msr(struct vcpu_svm *svm) | 1622 | static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) |
1457 | { | 1623 | { |
1458 | u32 param = svm->vmcb->control.exit_info_1 & 1; | 1624 | u32 offset, msr, value; |
1459 | u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | 1625 | int write, mask; |
1460 | bool ret = false; | ||
1461 | u32 t0, t1; | ||
1462 | u8 *msrpm; | ||
1463 | 1626 | ||
1464 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) | 1627 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) |
1465 | return false; | 1628 | return NESTED_EXIT_HOST; |
1466 | 1629 | ||
1467 | msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); | 1630 | msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; |
1631 | offset = svm_msrpm_offset(msr); | ||
1632 | write = svm->vmcb->control.exit_info_1 & 1; | ||
1633 | mask = 1 << ((2 * (msr & 0xf)) + write); | ||
1468 | 1634 | ||
1469 | if (!msrpm) | 1635 | if (offset == MSR_INVALID) |
1470 | goto out; | 1636 | return NESTED_EXIT_DONE; |
1471 | 1637 | ||
1472 | switch (msr) { | 1638 | /* Offset is in 32 bit units but need in 8 bit units */ |
1473 | case 0 ... 0x1fff: | 1639 | offset *= 4; |
1474 | t0 = (msr * 2) % 8; | ||
1475 | t1 = msr / 8; | ||
1476 | break; | ||
1477 | case 0xc0000000 ... 0xc0001fff: | ||
1478 | t0 = (8192 + msr - 0xc0000000) * 2; | ||
1479 | t1 = (t0 / 8); | ||
1480 | t0 %= 8; | ||
1481 | break; | ||
1482 | case 0xc0010000 ... 0xc0011fff: | ||
1483 | t0 = (16384 + msr - 0xc0010000) * 2; | ||
1484 | t1 = (t0 / 8); | ||
1485 | t0 %= 8; | ||
1486 | break; | ||
1487 | default: | ||
1488 | ret = true; | ||
1489 | goto out; | ||
1490 | } | ||
1491 | 1640 | ||
1492 | ret = msrpm[t1] & ((1 << param) << t0); | 1641 | if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4)) |
1493 | 1642 | return NESTED_EXIT_DONE; | |
1494 | out: | ||
1495 | nested_svm_unmap(msrpm, KM_USER0); | ||
1496 | 1643 | ||
1497 | return ret; | 1644 | return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; |
1498 | } | 1645 | } |
1499 | 1646 | ||
1500 | static int nested_svm_exit_special(struct vcpu_svm *svm) | 1647 | static int nested_svm_exit_special(struct vcpu_svm *svm) |
@@ -1504,17 +1651,21 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) | |||
1504 | switch (exit_code) { | 1651 | switch (exit_code) { |
1505 | case SVM_EXIT_INTR: | 1652 | case SVM_EXIT_INTR: |
1506 | case SVM_EXIT_NMI: | 1653 | case SVM_EXIT_NMI: |
1654 | case SVM_EXIT_EXCP_BASE + MC_VECTOR: | ||
1507 | return NESTED_EXIT_HOST; | 1655 | return NESTED_EXIT_HOST; |
1508 | /* For now we are always handling NPFs when using them */ | ||
1509 | case SVM_EXIT_NPF: | 1656 | case SVM_EXIT_NPF: |
1657 | /* For now we are always handling NPFs when using them */ | ||
1510 | if (npt_enabled) | 1658 | if (npt_enabled) |
1511 | return NESTED_EXIT_HOST; | 1659 | return NESTED_EXIT_HOST; |
1512 | break; | 1660 | break; |
1513 | /* When we're shadowing, trap PFs */ | ||
1514 | case SVM_EXIT_EXCP_BASE + PF_VECTOR: | 1661 | case SVM_EXIT_EXCP_BASE + PF_VECTOR: |
1662 | /* When we're shadowing, trap PFs */ | ||
1515 | if (!npt_enabled) | 1663 | if (!npt_enabled) |
1516 | return NESTED_EXIT_HOST; | 1664 | return NESTED_EXIT_HOST; |
1517 | break; | 1665 | break; |
1666 | case SVM_EXIT_EXCP_BASE + NM_VECTOR: | ||
1667 | nm_interception(svm); | ||
1668 | break; | ||
1518 | default: | 1669 | default: |
1519 | break; | 1670 | break; |
1520 | } | 1671 | } |
@@ -1525,7 +1676,7 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) | |||
1525 | /* | 1676 | /* |
1526 | * If this function returns true, this #vmexit was already handled | 1677 | * If this function returns true, this #vmexit was already handled |
1527 | */ | 1678 | */ |
1528 | static int nested_svm_exit_handled(struct vcpu_svm *svm) | 1679 | static int nested_svm_intercept(struct vcpu_svm *svm) |
1529 | { | 1680 | { |
1530 | u32 exit_code = svm->vmcb->control.exit_code; | 1681 | u32 exit_code = svm->vmcb->control.exit_code; |
1531 | int vmexit = NESTED_EXIT_HOST; | 1682 | int vmexit = NESTED_EXIT_HOST; |
@@ -1534,6 +1685,9 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) | |||
1534 | case SVM_EXIT_MSR: | 1685 | case SVM_EXIT_MSR: |
1535 | vmexit = nested_svm_exit_handled_msr(svm); | 1686 | vmexit = nested_svm_exit_handled_msr(svm); |
1536 | break; | 1687 | break; |
1688 | case SVM_EXIT_IOIO: | ||
1689 | vmexit = nested_svm_intercept_ioio(svm); | ||
1690 | break; | ||
1537 | case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { | 1691 | case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { |
1538 | u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); | 1692 | u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); |
1539 | if (svm->nested.intercept_cr_read & cr_bits) | 1693 | if (svm->nested.intercept_cr_read & cr_bits) |
@@ -1564,6 +1718,10 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) | |||
1564 | vmexit = NESTED_EXIT_DONE; | 1718 | vmexit = NESTED_EXIT_DONE; |
1565 | break; | 1719 | break; |
1566 | } | 1720 | } |
1721 | case SVM_EXIT_ERR: { | ||
1722 | vmexit = NESTED_EXIT_DONE; | ||
1723 | break; | ||
1724 | } | ||
1567 | default: { | 1725 | default: { |
1568 | u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); | 1726 | u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); |
1569 | if (svm->nested.intercept & exit_bits) | 1727 | if (svm->nested.intercept & exit_bits) |
@@ -1571,9 +1729,17 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) | |||
1571 | } | 1729 | } |
1572 | } | 1730 | } |
1573 | 1731 | ||
1574 | if (vmexit == NESTED_EXIT_DONE) { | 1732 | return vmexit; |
1733 | } | ||
1734 | |||
1735 | static int nested_svm_exit_handled(struct vcpu_svm *svm) | ||
1736 | { | ||
1737 | int vmexit; | ||
1738 | |||
1739 | vmexit = nested_svm_intercept(svm); | ||
1740 | |||
1741 | if (vmexit == NESTED_EXIT_DONE) | ||
1575 | nested_svm_vmexit(svm); | 1742 | nested_svm_vmexit(svm); |
1576 | } | ||
1577 | 1743 | ||
1578 | return vmexit; | 1744 | return vmexit; |
1579 | } | 1745 | } |
@@ -1615,6 +1781,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1615 | struct vmcb *nested_vmcb; | 1781 | struct vmcb *nested_vmcb; |
1616 | struct vmcb *hsave = svm->nested.hsave; | 1782 | struct vmcb *hsave = svm->nested.hsave; |
1617 | struct vmcb *vmcb = svm->vmcb; | 1783 | struct vmcb *vmcb = svm->vmcb; |
1784 | struct page *page; | ||
1618 | 1785 | ||
1619 | trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, | 1786 | trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, |
1620 | vmcb->control.exit_info_1, | 1787 | vmcb->control.exit_info_1, |
@@ -1622,10 +1789,13 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1622 | vmcb->control.exit_int_info, | 1789 | vmcb->control.exit_int_info, |
1623 | vmcb->control.exit_int_info_err); | 1790 | vmcb->control.exit_int_info_err); |
1624 | 1791 | ||
1625 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); | 1792 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page); |
1626 | if (!nested_vmcb) | 1793 | if (!nested_vmcb) |
1627 | return 1; | 1794 | return 1; |
1628 | 1795 | ||
1796 | /* Exit nested SVM mode */ | ||
1797 | svm->nested.vmcb = 0; | ||
1798 | |||
1629 | /* Give the current vmcb to the guest */ | 1799 | /* Give the current vmcb to the guest */ |
1630 | disable_gif(svm); | 1800 | disable_gif(svm); |
1631 | 1801 | ||
@@ -1635,9 +1805,10 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1635 | nested_vmcb->save.ds = vmcb->save.ds; | 1805 | nested_vmcb->save.ds = vmcb->save.ds; |
1636 | nested_vmcb->save.gdtr = vmcb->save.gdtr; | 1806 | nested_vmcb->save.gdtr = vmcb->save.gdtr; |
1637 | nested_vmcb->save.idtr = vmcb->save.idtr; | 1807 | nested_vmcb->save.idtr = vmcb->save.idtr; |
1638 | if (npt_enabled) | 1808 | nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu); |
1639 | nested_vmcb->save.cr3 = vmcb->save.cr3; | 1809 | nested_vmcb->save.cr3 = svm->vcpu.arch.cr3; |
1640 | nested_vmcb->save.cr2 = vmcb->save.cr2; | 1810 | nested_vmcb->save.cr2 = vmcb->save.cr2; |
1811 | nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; | ||
1641 | nested_vmcb->save.rflags = vmcb->save.rflags; | 1812 | nested_vmcb->save.rflags = vmcb->save.rflags; |
1642 | nested_vmcb->save.rip = vmcb->save.rip; | 1813 | nested_vmcb->save.rip = vmcb->save.rip; |
1643 | nested_vmcb->save.rsp = vmcb->save.rsp; | 1814 | nested_vmcb->save.rsp = vmcb->save.rsp; |
@@ -1709,10 +1880,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1709 | svm->vmcb->save.cpl = 0; | 1880 | svm->vmcb->save.cpl = 0; |
1710 | svm->vmcb->control.exit_int_info = 0; | 1881 | svm->vmcb->control.exit_int_info = 0; |
1711 | 1882 | ||
1712 | /* Exit nested SVM mode */ | 1883 | nested_svm_unmap(page); |
1713 | svm->nested.vmcb = 0; | ||
1714 | |||
1715 | nested_svm_unmap(nested_vmcb, KM_USER0); | ||
1716 | 1884 | ||
1717 | kvm_mmu_reset_context(&svm->vcpu); | 1885 | kvm_mmu_reset_context(&svm->vcpu); |
1718 | kvm_mmu_load(&svm->vcpu); | 1886 | kvm_mmu_load(&svm->vcpu); |
@@ -1722,19 +1890,33 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1722 | 1890 | ||
1723 | static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) | 1891 | static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) |
1724 | { | 1892 | { |
1725 | u32 *nested_msrpm; | 1893 | /* |
1894 | * This function merges the msr permission bitmaps of kvm and the | ||
1895 | * nested vmcb. It is omptimized in that it only merges the parts where | ||
1896 | * the kvm msr permission bitmap may contain zero bits | ||
1897 | */ | ||
1726 | int i; | 1898 | int i; |
1727 | 1899 | ||
1728 | nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); | 1900 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) |
1729 | if (!nested_msrpm) | 1901 | return true; |
1730 | return false; | ||
1731 | 1902 | ||
1732 | for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++) | 1903 | for (i = 0; i < MSRPM_OFFSETS; i++) { |
1733 | svm->nested.msrpm[i] = svm->msrpm[i] | nested_msrpm[i]; | 1904 | u32 value, p; |
1905 | u64 offset; | ||
1734 | 1906 | ||
1735 | svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); | 1907 | if (msrpm_offsets[i] == 0xffffffff) |
1908 | break; | ||
1909 | |||
1910 | p = msrpm_offsets[i]; | ||
1911 | offset = svm->nested.vmcb_msrpm + (p * 4); | ||
1912 | |||
1913 | if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4)) | ||
1914 | return false; | ||
1915 | |||
1916 | svm->nested.msrpm[p] = svm->msrpm[p] | value; | ||
1917 | } | ||
1736 | 1918 | ||
1737 | nested_svm_unmap(nested_msrpm, KM_USER0); | 1919 | svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); |
1738 | 1920 | ||
1739 | return true; | 1921 | return true; |
1740 | } | 1922 | } |
@@ -1744,26 +1926,34 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1744 | struct vmcb *nested_vmcb; | 1926 | struct vmcb *nested_vmcb; |
1745 | struct vmcb *hsave = svm->nested.hsave; | 1927 | struct vmcb *hsave = svm->nested.hsave; |
1746 | struct vmcb *vmcb = svm->vmcb; | 1928 | struct vmcb *vmcb = svm->vmcb; |
1929 | struct page *page; | ||
1930 | u64 vmcb_gpa; | ||
1747 | 1931 | ||
1748 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); | 1932 | vmcb_gpa = svm->vmcb->save.rax; |
1933 | |||
1934 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); | ||
1749 | if (!nested_vmcb) | 1935 | if (!nested_vmcb) |
1750 | return false; | 1936 | return false; |
1751 | 1937 | ||
1752 | /* nested_vmcb is our indicator if nested SVM is activated */ | 1938 | trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, vmcb_gpa, |
1753 | svm->nested.vmcb = svm->vmcb->save.rax; | ||
1754 | |||
1755 | trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb, | ||
1756 | nested_vmcb->save.rip, | 1939 | nested_vmcb->save.rip, |
1757 | nested_vmcb->control.int_ctl, | 1940 | nested_vmcb->control.int_ctl, |
1758 | nested_vmcb->control.event_inj, | 1941 | nested_vmcb->control.event_inj, |
1759 | nested_vmcb->control.nested_ctl); | 1942 | nested_vmcb->control.nested_ctl); |
1760 | 1943 | ||
1944 | trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr_read, | ||
1945 | nested_vmcb->control.intercept_cr_write, | ||
1946 | nested_vmcb->control.intercept_exceptions, | ||
1947 | nested_vmcb->control.intercept); | ||
1948 | |||
1761 | /* Clear internal status */ | 1949 | /* Clear internal status */ |
1762 | kvm_clear_exception_queue(&svm->vcpu); | 1950 | kvm_clear_exception_queue(&svm->vcpu); |
1763 | kvm_clear_interrupt_queue(&svm->vcpu); | 1951 | kvm_clear_interrupt_queue(&svm->vcpu); |
1764 | 1952 | ||
1765 | /* Save the old vmcb, so we don't need to pick what we save, but | 1953 | /* |
1766 | can restore everything when a VMEXIT occurs */ | 1954 | * Save the old vmcb, so we don't need to pick what we save, but can |
1955 | * restore everything when a VMEXIT occurs | ||
1956 | */ | ||
1767 | hsave->save.es = vmcb->save.es; | 1957 | hsave->save.es = vmcb->save.es; |
1768 | hsave->save.cs = vmcb->save.cs; | 1958 | hsave->save.cs = vmcb->save.cs; |
1769 | hsave->save.ss = vmcb->save.ss; | 1959 | hsave->save.ss = vmcb->save.ss; |
@@ -1803,14 +1993,17 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1803 | if (npt_enabled) { | 1993 | if (npt_enabled) { |
1804 | svm->vmcb->save.cr3 = nested_vmcb->save.cr3; | 1994 | svm->vmcb->save.cr3 = nested_vmcb->save.cr3; |
1805 | svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; | 1995 | svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; |
1806 | } else { | 1996 | } else |
1807 | kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); | 1997 | kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); |
1808 | kvm_mmu_reset_context(&svm->vcpu); | 1998 | |
1809 | } | 1999 | /* Guest paging mode is active - reset mmu */ |
2000 | kvm_mmu_reset_context(&svm->vcpu); | ||
2001 | |||
1810 | svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; | 2002 | svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; |
1811 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); | 2003 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); |
1812 | kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); | 2004 | kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); |
1813 | kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); | 2005 | kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); |
2006 | |||
1814 | /* In case we don't even reach vcpu_run, the fields are not updated */ | 2007 | /* In case we don't even reach vcpu_run, the fields are not updated */ |
1815 | svm->vmcb->save.rax = nested_vmcb->save.rax; | 2008 | svm->vmcb->save.rax = nested_vmcb->save.rax; |
1816 | svm->vmcb->save.rsp = nested_vmcb->save.rsp; | 2009 | svm->vmcb->save.rsp = nested_vmcb->save.rsp; |
@@ -1819,22 +2012,8 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1819 | svm->vmcb->save.dr6 = nested_vmcb->save.dr6; | 2012 | svm->vmcb->save.dr6 = nested_vmcb->save.dr6; |
1820 | svm->vmcb->save.cpl = nested_vmcb->save.cpl; | 2013 | svm->vmcb->save.cpl = nested_vmcb->save.cpl; |
1821 | 2014 | ||
1822 | /* We don't want a nested guest to be more powerful than the guest, | 2015 | svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL; |
1823 | so all intercepts are ORed */ | 2016 | svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL; |
1824 | svm->vmcb->control.intercept_cr_read |= | ||
1825 | nested_vmcb->control.intercept_cr_read; | ||
1826 | svm->vmcb->control.intercept_cr_write |= | ||
1827 | nested_vmcb->control.intercept_cr_write; | ||
1828 | svm->vmcb->control.intercept_dr_read |= | ||
1829 | nested_vmcb->control.intercept_dr_read; | ||
1830 | svm->vmcb->control.intercept_dr_write |= | ||
1831 | nested_vmcb->control.intercept_dr_write; | ||
1832 | svm->vmcb->control.intercept_exceptions |= | ||
1833 | nested_vmcb->control.intercept_exceptions; | ||
1834 | |||
1835 | svm->vmcb->control.intercept |= nested_vmcb->control.intercept; | ||
1836 | |||
1837 | svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa; | ||
1838 | 2017 | ||
1839 | /* cache intercepts */ | 2018 | /* cache intercepts */ |
1840 | svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read; | 2019 | svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read; |
@@ -1851,13 +2030,43 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1851 | else | 2030 | else |
1852 | svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; | 2031 | svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; |
1853 | 2032 | ||
2033 | if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { | ||
2034 | /* We only want the cr8 intercept bits of the guest */ | ||
2035 | svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR8_MASK; | ||
2036 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; | ||
2037 | } | ||
2038 | |||
2039 | /* We don't want to see VMMCALLs from a nested guest */ | ||
2040 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMMCALL); | ||
2041 | |||
2042 | /* | ||
2043 | * We don't want a nested guest to be more powerful than the guest, so | ||
2044 | * all intercepts are ORed | ||
2045 | */ | ||
2046 | svm->vmcb->control.intercept_cr_read |= | ||
2047 | nested_vmcb->control.intercept_cr_read; | ||
2048 | svm->vmcb->control.intercept_cr_write |= | ||
2049 | nested_vmcb->control.intercept_cr_write; | ||
2050 | svm->vmcb->control.intercept_dr_read |= | ||
2051 | nested_vmcb->control.intercept_dr_read; | ||
2052 | svm->vmcb->control.intercept_dr_write |= | ||
2053 | nested_vmcb->control.intercept_dr_write; | ||
2054 | svm->vmcb->control.intercept_exceptions |= | ||
2055 | nested_vmcb->control.intercept_exceptions; | ||
2056 | |||
2057 | svm->vmcb->control.intercept |= nested_vmcb->control.intercept; | ||
2058 | |||
2059 | svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl; | ||
1854 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; | 2060 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; |
1855 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; | 2061 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; |
1856 | svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; | 2062 | svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; |
1857 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; | 2063 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; |
1858 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; | 2064 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; |
1859 | 2065 | ||
1860 | nested_svm_unmap(nested_vmcb, KM_USER0); | 2066 | nested_svm_unmap(page); |
2067 | |||
2068 | /* nested_vmcb is our indicator if nested SVM is activated */ | ||
2069 | svm->nested.vmcb = vmcb_gpa; | ||
1861 | 2070 | ||
1862 | enable_gif(svm); | 2071 | enable_gif(svm); |
1863 | 2072 | ||
@@ -1883,6 +2092,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) | |||
1883 | static int vmload_interception(struct vcpu_svm *svm) | 2092 | static int vmload_interception(struct vcpu_svm *svm) |
1884 | { | 2093 | { |
1885 | struct vmcb *nested_vmcb; | 2094 | struct vmcb *nested_vmcb; |
2095 | struct page *page; | ||
1886 | 2096 | ||
1887 | if (nested_svm_check_permissions(svm)) | 2097 | if (nested_svm_check_permissions(svm)) |
1888 | return 1; | 2098 | return 1; |
@@ -1890,12 +2100,12 @@ static int vmload_interception(struct vcpu_svm *svm) | |||
1890 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 2100 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
1891 | skip_emulated_instruction(&svm->vcpu); | 2101 | skip_emulated_instruction(&svm->vcpu); |
1892 | 2102 | ||
1893 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); | 2103 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); |
1894 | if (!nested_vmcb) | 2104 | if (!nested_vmcb) |
1895 | return 1; | 2105 | return 1; |
1896 | 2106 | ||
1897 | nested_svm_vmloadsave(nested_vmcb, svm->vmcb); | 2107 | nested_svm_vmloadsave(nested_vmcb, svm->vmcb); |
1898 | nested_svm_unmap(nested_vmcb, KM_USER0); | 2108 | nested_svm_unmap(page); |
1899 | 2109 | ||
1900 | return 1; | 2110 | return 1; |
1901 | } | 2111 | } |
@@ -1903,6 +2113,7 @@ static int vmload_interception(struct vcpu_svm *svm) | |||
1903 | static int vmsave_interception(struct vcpu_svm *svm) | 2113 | static int vmsave_interception(struct vcpu_svm *svm) |
1904 | { | 2114 | { |
1905 | struct vmcb *nested_vmcb; | 2115 | struct vmcb *nested_vmcb; |
2116 | struct page *page; | ||
1906 | 2117 | ||
1907 | if (nested_svm_check_permissions(svm)) | 2118 | if (nested_svm_check_permissions(svm)) |
1908 | return 1; | 2119 | return 1; |
@@ -1910,12 +2121,12 @@ static int vmsave_interception(struct vcpu_svm *svm) | |||
1910 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 2121 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
1911 | skip_emulated_instruction(&svm->vcpu); | 2122 | skip_emulated_instruction(&svm->vcpu); |
1912 | 2123 | ||
1913 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); | 2124 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); |
1914 | if (!nested_vmcb) | 2125 | if (!nested_vmcb) |
1915 | return 1; | 2126 | return 1; |
1916 | 2127 | ||
1917 | nested_svm_vmloadsave(svm->vmcb, nested_vmcb); | 2128 | nested_svm_vmloadsave(svm->vmcb, nested_vmcb); |
1918 | nested_svm_unmap(nested_vmcb, KM_USER0); | 2129 | nested_svm_unmap(page); |
1919 | 2130 | ||
1920 | return 1; | 2131 | return 1; |
1921 | } | 2132 | } |
@@ -2018,6 +2229,8 @@ static int task_switch_interception(struct vcpu_svm *svm) | |||
2018 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK; | 2229 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK; |
2019 | uint32_t idt_v = | 2230 | uint32_t idt_v = |
2020 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID; | 2231 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID; |
2232 | bool has_error_code = false; | ||
2233 | u32 error_code = 0; | ||
2021 | 2234 | ||
2022 | tss_selector = (u16)svm->vmcb->control.exit_info_1; | 2235 | tss_selector = (u16)svm->vmcb->control.exit_info_1; |
2023 | 2236 | ||
@@ -2038,6 +2251,12 @@ static int task_switch_interception(struct vcpu_svm *svm) | |||
2038 | svm->vcpu.arch.nmi_injected = false; | 2251 | svm->vcpu.arch.nmi_injected = false; |
2039 | break; | 2252 | break; |
2040 | case SVM_EXITINTINFO_TYPE_EXEPT: | 2253 | case SVM_EXITINTINFO_TYPE_EXEPT: |
2254 | if (svm->vmcb->control.exit_info_2 & | ||
2255 | (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) { | ||
2256 | has_error_code = true; | ||
2257 | error_code = | ||
2258 | (u32)svm->vmcb->control.exit_info_2; | ||
2259 | } | ||
2041 | kvm_clear_exception_queue(&svm->vcpu); | 2260 | kvm_clear_exception_queue(&svm->vcpu); |
2042 | break; | 2261 | break; |
2043 | case SVM_EXITINTINFO_TYPE_INTR: | 2262 | case SVM_EXITINTINFO_TYPE_INTR: |
@@ -2054,7 +2273,14 @@ static int task_switch_interception(struct vcpu_svm *svm) | |||
2054 | (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) | 2273 | (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) |
2055 | skip_emulated_instruction(&svm->vcpu); | 2274 | skip_emulated_instruction(&svm->vcpu); |
2056 | 2275 | ||
2057 | return kvm_task_switch(&svm->vcpu, tss_selector, reason); | 2276 | if (kvm_task_switch(&svm->vcpu, tss_selector, reason, |
2277 | has_error_code, error_code) == EMULATE_FAIL) { | ||
2278 | svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
2279 | svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
2280 | svm->vcpu.run->internal.ndata = 0; | ||
2281 | return 0; | ||
2282 | } | ||
2283 | return 1; | ||
2058 | } | 2284 | } |
2059 | 2285 | ||
2060 | static int cpuid_interception(struct vcpu_svm *svm) | 2286 | static int cpuid_interception(struct vcpu_svm *svm) |
@@ -2145,9 +2371,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
2145 | case MSR_IA32_SYSENTER_ESP: | 2371 | case MSR_IA32_SYSENTER_ESP: |
2146 | *data = svm->sysenter_esp; | 2372 | *data = svm->sysenter_esp; |
2147 | break; | 2373 | break; |
2148 | /* Nobody will change the following 5 values in the VMCB so | 2374 | /* |
2149 | we can safely return them on rdmsr. They will always be 0 | 2375 | * Nobody will change the following 5 values in the VMCB so we can |
2150 | until LBRV is implemented. */ | 2376 | * safely return them on rdmsr. They will always be 0 until LBRV is |
2377 | * implemented. | ||
2378 | */ | ||
2151 | case MSR_IA32_DEBUGCTLMSR: | 2379 | case MSR_IA32_DEBUGCTLMSR: |
2152 | *data = svm->vmcb->save.dbgctl; | 2380 | *data = svm->vmcb->save.dbgctl; |
2153 | break; | 2381 | break; |
@@ -2167,7 +2395,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
2167 | *data = svm->nested.hsave_msr; | 2395 | *data = svm->nested.hsave_msr; |
2168 | break; | 2396 | break; |
2169 | case MSR_VM_CR: | 2397 | case MSR_VM_CR: |
2170 | *data = 0; | 2398 | *data = svm->nested.vm_cr_msr; |
2171 | break; | 2399 | break; |
2172 | case MSR_IA32_UCODE_REV: | 2400 | case MSR_IA32_UCODE_REV: |
2173 | *data = 0x01000065; | 2401 | *data = 0x01000065; |
@@ -2197,6 +2425,31 @@ static int rdmsr_interception(struct vcpu_svm *svm) | |||
2197 | return 1; | 2425 | return 1; |
2198 | } | 2426 | } |
2199 | 2427 | ||
2428 | static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data) | ||
2429 | { | ||
2430 | struct vcpu_svm *svm = to_svm(vcpu); | ||
2431 | int svm_dis, chg_mask; | ||
2432 | |||
2433 | if (data & ~SVM_VM_CR_VALID_MASK) | ||
2434 | return 1; | ||
2435 | |||
2436 | chg_mask = SVM_VM_CR_VALID_MASK; | ||
2437 | |||
2438 | if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK) | ||
2439 | chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK); | ||
2440 | |||
2441 | svm->nested.vm_cr_msr &= ~chg_mask; | ||
2442 | svm->nested.vm_cr_msr |= (data & chg_mask); | ||
2443 | |||
2444 | svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK; | ||
2445 | |||
2446 | /* check for svm_disable while efer.svme is set */ | ||
2447 | if (svm_dis && (vcpu->arch.efer & EFER_SVME)) | ||
2448 | return 1; | ||
2449 | |||
2450 | return 0; | ||
2451 | } | ||
2452 | |||
2200 | static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | 2453 | static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) |
2201 | { | 2454 | { |
2202 | struct vcpu_svm *svm = to_svm(vcpu); | 2455 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -2263,6 +2516,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
2263 | svm->nested.hsave_msr = data; | 2516 | svm->nested.hsave_msr = data; |
2264 | break; | 2517 | break; |
2265 | case MSR_VM_CR: | 2518 | case MSR_VM_CR: |
2519 | return svm_set_vm_cr(vcpu, data); | ||
2266 | case MSR_VM_IGNNE: | 2520 | case MSR_VM_IGNNE: |
2267 | pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); | 2521 | pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); |
2268 | break; | 2522 | break; |
@@ -2326,16 +2580,16 @@ static int pause_interception(struct vcpu_svm *svm) | |||
2326 | } | 2580 | } |
2327 | 2581 | ||
2328 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | 2582 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { |
2329 | [SVM_EXIT_READ_CR0] = emulate_on_interception, | 2583 | [SVM_EXIT_READ_CR0] = emulate_on_interception, |
2330 | [SVM_EXIT_READ_CR3] = emulate_on_interception, | 2584 | [SVM_EXIT_READ_CR3] = emulate_on_interception, |
2331 | [SVM_EXIT_READ_CR4] = emulate_on_interception, | 2585 | [SVM_EXIT_READ_CR4] = emulate_on_interception, |
2332 | [SVM_EXIT_READ_CR8] = emulate_on_interception, | 2586 | [SVM_EXIT_READ_CR8] = emulate_on_interception, |
2333 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, | 2587 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, |
2334 | [SVM_EXIT_WRITE_CR0] = emulate_on_interception, | 2588 | [SVM_EXIT_WRITE_CR0] = emulate_on_interception, |
2335 | [SVM_EXIT_WRITE_CR3] = emulate_on_interception, | 2589 | [SVM_EXIT_WRITE_CR3] = emulate_on_interception, |
2336 | [SVM_EXIT_WRITE_CR4] = emulate_on_interception, | 2590 | [SVM_EXIT_WRITE_CR4] = emulate_on_interception, |
2337 | [SVM_EXIT_WRITE_CR8] = cr8_write_interception, | 2591 | [SVM_EXIT_WRITE_CR8] = cr8_write_interception, |
2338 | [SVM_EXIT_READ_DR0] = emulate_on_interception, | 2592 | [SVM_EXIT_READ_DR0] = emulate_on_interception, |
2339 | [SVM_EXIT_READ_DR1] = emulate_on_interception, | 2593 | [SVM_EXIT_READ_DR1] = emulate_on_interception, |
2340 | [SVM_EXIT_READ_DR2] = emulate_on_interception, | 2594 | [SVM_EXIT_READ_DR2] = emulate_on_interception, |
2341 | [SVM_EXIT_READ_DR3] = emulate_on_interception, | 2595 | [SVM_EXIT_READ_DR3] = emulate_on_interception, |
@@ -2354,15 +2608,14 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
2354 | [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, | 2608 | [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, |
2355 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, | 2609 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, |
2356 | [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, | 2610 | [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, |
2357 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, | 2611 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, |
2358 | [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, | 2612 | [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, |
2359 | [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, | 2613 | [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, |
2360 | [SVM_EXIT_INTR] = intr_interception, | 2614 | [SVM_EXIT_INTR] = intr_interception, |
2361 | [SVM_EXIT_NMI] = nmi_interception, | 2615 | [SVM_EXIT_NMI] = nmi_interception, |
2362 | [SVM_EXIT_SMI] = nop_on_interception, | 2616 | [SVM_EXIT_SMI] = nop_on_interception, |
2363 | [SVM_EXIT_INIT] = nop_on_interception, | 2617 | [SVM_EXIT_INIT] = nop_on_interception, |
2364 | [SVM_EXIT_VINTR] = interrupt_window_interception, | 2618 | [SVM_EXIT_VINTR] = interrupt_window_interception, |
2365 | /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */ | ||
2366 | [SVM_EXIT_CPUID] = cpuid_interception, | 2619 | [SVM_EXIT_CPUID] = cpuid_interception, |
2367 | [SVM_EXIT_IRET] = iret_interception, | 2620 | [SVM_EXIT_IRET] = iret_interception, |
2368 | [SVM_EXIT_INVD] = emulate_on_interception, | 2621 | [SVM_EXIT_INVD] = emulate_on_interception, |
@@ -2370,7 +2623,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
2370 | [SVM_EXIT_HLT] = halt_interception, | 2623 | [SVM_EXIT_HLT] = halt_interception, |
2371 | [SVM_EXIT_INVLPG] = invlpg_interception, | 2624 | [SVM_EXIT_INVLPG] = invlpg_interception, |
2372 | [SVM_EXIT_INVLPGA] = invlpga_interception, | 2625 | [SVM_EXIT_INVLPGA] = invlpga_interception, |
2373 | [SVM_EXIT_IOIO] = io_interception, | 2626 | [SVM_EXIT_IOIO] = io_interception, |
2374 | [SVM_EXIT_MSR] = msr_interception, | 2627 | [SVM_EXIT_MSR] = msr_interception, |
2375 | [SVM_EXIT_TASK_SWITCH] = task_switch_interception, | 2628 | [SVM_EXIT_TASK_SWITCH] = task_switch_interception, |
2376 | [SVM_EXIT_SHUTDOWN] = shutdown_interception, | 2629 | [SVM_EXIT_SHUTDOWN] = shutdown_interception, |
@@ -2393,7 +2646,12 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
2393 | struct kvm_run *kvm_run = vcpu->run; | 2646 | struct kvm_run *kvm_run = vcpu->run; |
2394 | u32 exit_code = svm->vmcb->control.exit_code; | 2647 | u32 exit_code = svm->vmcb->control.exit_code; |
2395 | 2648 | ||
2396 | trace_kvm_exit(exit_code, svm->vmcb->save.rip); | 2649 | trace_kvm_exit(exit_code, vcpu); |
2650 | |||
2651 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK)) | ||
2652 | vcpu->arch.cr0 = svm->vmcb->save.cr0; | ||
2653 | if (npt_enabled) | ||
2654 | vcpu->arch.cr3 = svm->vmcb->save.cr3; | ||
2397 | 2655 | ||
2398 | if (unlikely(svm->nested.exit_required)) { | 2656 | if (unlikely(svm->nested.exit_required)) { |
2399 | nested_svm_vmexit(svm); | 2657 | nested_svm_vmexit(svm); |
@@ -2422,11 +2680,6 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
2422 | 2680 | ||
2423 | svm_complete_interrupts(svm); | 2681 | svm_complete_interrupts(svm); |
2424 | 2682 | ||
2425 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK)) | ||
2426 | vcpu->arch.cr0 = svm->vmcb->save.cr0; | ||
2427 | if (npt_enabled) | ||
2428 | vcpu->arch.cr3 = svm->vmcb->save.cr3; | ||
2429 | |||
2430 | if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { | 2683 | if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { |
2431 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 2684 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
2432 | kvm_run->fail_entry.hardware_entry_failure_reason | 2685 | kvm_run->fail_entry.hardware_entry_failure_reason |
@@ -2511,6 +2764,9 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | |||
2511 | { | 2764 | { |
2512 | struct vcpu_svm *svm = to_svm(vcpu); | 2765 | struct vcpu_svm *svm = to_svm(vcpu); |
2513 | 2766 | ||
2767 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | ||
2768 | return; | ||
2769 | |||
2514 | if (irr == -1) | 2770 | if (irr == -1) |
2515 | return; | 2771 | return; |
2516 | 2772 | ||
@@ -2522,8 +2778,12 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu) | |||
2522 | { | 2778 | { |
2523 | struct vcpu_svm *svm = to_svm(vcpu); | 2779 | struct vcpu_svm *svm = to_svm(vcpu); |
2524 | struct vmcb *vmcb = svm->vmcb; | 2780 | struct vmcb *vmcb = svm->vmcb; |
2525 | return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && | 2781 | int ret; |
2526 | !(svm->vcpu.arch.hflags & HF_NMI_MASK); | 2782 | ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && |
2783 | !(svm->vcpu.arch.hflags & HF_NMI_MASK); | ||
2784 | ret = ret && gif_set(svm) && nested_svm_nmi(svm); | ||
2785 | |||
2786 | return ret; | ||
2527 | } | 2787 | } |
2528 | 2788 | ||
2529 | static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) | 2789 | static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) |
@@ -2568,13 +2828,13 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) | |||
2568 | { | 2828 | { |
2569 | struct vcpu_svm *svm = to_svm(vcpu); | 2829 | struct vcpu_svm *svm = to_svm(vcpu); |
2570 | 2830 | ||
2571 | nested_svm_intr(svm); | 2831 | /* |
2572 | 2832 | * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes | |
2573 | /* In case GIF=0 we can't rely on the CPU to tell us when | 2833 | * 1, because that's a separate STGI/VMRUN intercept. The next time we |
2574 | * GIF becomes 1, because that's a separate STGI/VMRUN intercept. | 2834 | * get that intercept, this function will be called again though and |
2575 | * The next time we get that intercept, this function will be | 2835 | * we'll get the vintr intercept. |
2576 | * called again though and we'll get the vintr intercept. */ | 2836 | */ |
2577 | if (gif_set(svm)) { | 2837 | if (gif_set(svm) && nested_svm_intr(svm)) { |
2578 | svm_set_vintr(svm); | 2838 | svm_set_vintr(svm); |
2579 | svm_inject_irq(svm, 0x0); | 2839 | svm_inject_irq(svm, 0x0); |
2580 | } | 2840 | } |
@@ -2588,9 +2848,10 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
2588 | == HF_NMI_MASK) | 2848 | == HF_NMI_MASK) |
2589 | return; /* IRET will cause a vm exit */ | 2849 | return; /* IRET will cause a vm exit */ |
2590 | 2850 | ||
2591 | /* Something prevents NMI from been injected. Single step over | 2851 | /* |
2592 | possible problem (IRET or exception injection or interrupt | 2852 | * Something prevents NMI from been injected. Single step over possible |
2593 | shadow) */ | 2853 | * problem (IRET or exception injection or interrupt shadow) |
2854 | */ | ||
2594 | svm->nmi_singlestep = true; | 2855 | svm->nmi_singlestep = true; |
2595 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); | 2856 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); |
2596 | update_db_intercept(vcpu); | 2857 | update_db_intercept(vcpu); |
@@ -2614,6 +2875,9 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) | |||
2614 | { | 2875 | { |
2615 | struct vcpu_svm *svm = to_svm(vcpu); | 2876 | struct vcpu_svm *svm = to_svm(vcpu); |
2616 | 2877 | ||
2878 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | ||
2879 | return; | ||
2880 | |||
2617 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { | 2881 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { |
2618 | int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; | 2882 | int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; |
2619 | kvm_set_cr8(vcpu, cr8); | 2883 | kvm_set_cr8(vcpu, cr8); |
@@ -2625,6 +2889,9 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) | |||
2625 | struct vcpu_svm *svm = to_svm(vcpu); | 2889 | struct vcpu_svm *svm = to_svm(vcpu); |
2626 | u64 cr8; | 2890 | u64 cr8; |
2627 | 2891 | ||
2892 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | ||
2893 | return; | ||
2894 | |||
2628 | cr8 = kvm_get_cr8(vcpu); | 2895 | cr8 = kvm_get_cr8(vcpu); |
2629 | svm->vmcb->control.int_ctl &= ~V_TPR_MASK; | 2896 | svm->vmcb->control.int_ctl &= ~V_TPR_MASK; |
2630 | svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; | 2897 | svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; |
@@ -2635,6 +2902,9 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) | |||
2635 | u8 vector; | 2902 | u8 vector; |
2636 | int type; | 2903 | int type; |
2637 | u32 exitintinfo = svm->vmcb->control.exit_int_info; | 2904 | u32 exitintinfo = svm->vmcb->control.exit_int_info; |
2905 | unsigned int3_injected = svm->int3_injected; | ||
2906 | |||
2907 | svm->int3_injected = 0; | ||
2638 | 2908 | ||
2639 | if (svm->vcpu.arch.hflags & HF_IRET_MASK) | 2909 | if (svm->vcpu.arch.hflags & HF_IRET_MASK) |
2640 | svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); | 2910 | svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); |
@@ -2654,18 +2924,25 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) | |||
2654 | svm->vcpu.arch.nmi_injected = true; | 2924 | svm->vcpu.arch.nmi_injected = true; |
2655 | break; | 2925 | break; |
2656 | case SVM_EXITINTINFO_TYPE_EXEPT: | 2926 | case SVM_EXITINTINFO_TYPE_EXEPT: |
2657 | /* In case of software exception do not reinject an exception | 2927 | /* |
2658 | vector, but re-execute and instruction instead */ | 2928 | * In case of software exceptions, do not reinject the vector, |
2659 | if (is_nested(svm)) | 2929 | * but re-execute the instruction instead. Rewind RIP first |
2660 | break; | 2930 | * if we emulated INT3 before. |
2661 | if (kvm_exception_is_soft(vector)) | 2931 | */ |
2932 | if (kvm_exception_is_soft(vector)) { | ||
2933 | if (vector == BP_VECTOR && int3_injected && | ||
2934 | kvm_is_linear_rip(&svm->vcpu, svm->int3_rip)) | ||
2935 | kvm_rip_write(&svm->vcpu, | ||
2936 | kvm_rip_read(&svm->vcpu) - | ||
2937 | int3_injected); | ||
2662 | break; | 2938 | break; |
2939 | } | ||
2663 | if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { | 2940 | if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { |
2664 | u32 err = svm->vmcb->control.exit_int_info_err; | 2941 | u32 err = svm->vmcb->control.exit_int_info_err; |
2665 | kvm_queue_exception_e(&svm->vcpu, vector, err); | 2942 | kvm_requeue_exception_e(&svm->vcpu, vector, err); |
2666 | 2943 | ||
2667 | } else | 2944 | } else |
2668 | kvm_queue_exception(&svm->vcpu, vector); | 2945 | kvm_requeue_exception(&svm->vcpu, vector); |
2669 | break; | 2946 | break; |
2670 | case SVM_EXITINTINFO_TYPE_INTR: | 2947 | case SVM_EXITINTINFO_TYPE_INTR: |
2671 | kvm_queue_interrupt(&svm->vcpu, vector, false); | 2948 | kvm_queue_interrupt(&svm->vcpu, vector, false); |
@@ -2688,6 +2965,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
2688 | u16 gs_selector; | 2965 | u16 gs_selector; |
2689 | u16 ldt_selector; | 2966 | u16 ldt_selector; |
2690 | 2967 | ||
2968 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; | ||
2969 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; | ||
2970 | svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; | ||
2971 | |||
2691 | /* | 2972 | /* |
2692 | * A vmexit emulation is required before the vcpu can be executed | 2973 | * A vmexit emulation is required before the vcpu can be executed |
2693 | * again. | 2974 | * again. |
@@ -2695,10 +2976,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
2695 | if (unlikely(svm->nested.exit_required)) | 2976 | if (unlikely(svm->nested.exit_required)) |
2696 | return; | 2977 | return; |
2697 | 2978 | ||
2698 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; | ||
2699 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; | ||
2700 | svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; | ||
2701 | |||
2702 | pre_svm_run(svm); | 2979 | pre_svm_run(svm); |
2703 | 2980 | ||
2704 | sync_lapic_to_cr8(vcpu); | 2981 | sync_lapic_to_cr8(vcpu); |
@@ -2879,25 +3156,39 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) | |||
2879 | { | 3156 | { |
2880 | } | 3157 | } |
2881 | 3158 | ||
3159 | static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | ||
3160 | { | ||
3161 | switch (func) { | ||
3162 | case 0x8000000A: | ||
3163 | entry->eax = 1; /* SVM revision 1 */ | ||
3164 | entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper | ||
3165 | ASID emulation to nested SVM */ | ||
3166 | entry->ecx = 0; /* Reserved */ | ||
3167 | entry->edx = 0; /* Do not support any additional features */ | ||
3168 | |||
3169 | break; | ||
3170 | } | ||
3171 | } | ||
3172 | |||
2882 | static const struct trace_print_flags svm_exit_reasons_str[] = { | 3173 | static const struct trace_print_flags svm_exit_reasons_str[] = { |
2883 | { SVM_EXIT_READ_CR0, "read_cr0" }, | 3174 | { SVM_EXIT_READ_CR0, "read_cr0" }, |
2884 | { SVM_EXIT_READ_CR3, "read_cr3" }, | 3175 | { SVM_EXIT_READ_CR3, "read_cr3" }, |
2885 | { SVM_EXIT_READ_CR4, "read_cr4" }, | 3176 | { SVM_EXIT_READ_CR4, "read_cr4" }, |
2886 | { SVM_EXIT_READ_CR8, "read_cr8" }, | 3177 | { SVM_EXIT_READ_CR8, "read_cr8" }, |
2887 | { SVM_EXIT_WRITE_CR0, "write_cr0" }, | 3178 | { SVM_EXIT_WRITE_CR0, "write_cr0" }, |
2888 | { SVM_EXIT_WRITE_CR3, "write_cr3" }, | 3179 | { SVM_EXIT_WRITE_CR3, "write_cr3" }, |
2889 | { SVM_EXIT_WRITE_CR4, "write_cr4" }, | 3180 | { SVM_EXIT_WRITE_CR4, "write_cr4" }, |
2890 | { SVM_EXIT_WRITE_CR8, "write_cr8" }, | 3181 | { SVM_EXIT_WRITE_CR8, "write_cr8" }, |
2891 | { SVM_EXIT_READ_DR0, "read_dr0" }, | 3182 | { SVM_EXIT_READ_DR0, "read_dr0" }, |
2892 | { SVM_EXIT_READ_DR1, "read_dr1" }, | 3183 | { SVM_EXIT_READ_DR1, "read_dr1" }, |
2893 | { SVM_EXIT_READ_DR2, "read_dr2" }, | 3184 | { SVM_EXIT_READ_DR2, "read_dr2" }, |
2894 | { SVM_EXIT_READ_DR3, "read_dr3" }, | 3185 | { SVM_EXIT_READ_DR3, "read_dr3" }, |
2895 | { SVM_EXIT_WRITE_DR0, "write_dr0" }, | 3186 | { SVM_EXIT_WRITE_DR0, "write_dr0" }, |
2896 | { SVM_EXIT_WRITE_DR1, "write_dr1" }, | 3187 | { SVM_EXIT_WRITE_DR1, "write_dr1" }, |
2897 | { SVM_EXIT_WRITE_DR2, "write_dr2" }, | 3188 | { SVM_EXIT_WRITE_DR2, "write_dr2" }, |
2898 | { SVM_EXIT_WRITE_DR3, "write_dr3" }, | 3189 | { SVM_EXIT_WRITE_DR3, "write_dr3" }, |
2899 | { SVM_EXIT_WRITE_DR5, "write_dr5" }, | 3190 | { SVM_EXIT_WRITE_DR5, "write_dr5" }, |
2900 | { SVM_EXIT_WRITE_DR7, "write_dr7" }, | 3191 | { SVM_EXIT_WRITE_DR7, "write_dr7" }, |
2901 | { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, | 3192 | { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, |
2902 | { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, | 3193 | { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, |
2903 | { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, | 3194 | { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, |
@@ -2946,8 +3237,10 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) | |||
2946 | { | 3237 | { |
2947 | struct vcpu_svm *svm = to_svm(vcpu); | 3238 | struct vcpu_svm *svm = to_svm(vcpu); |
2948 | 3239 | ||
2949 | update_cr0_intercept(svm); | ||
2950 | svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; | 3240 | svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; |
3241 | if (is_nested(svm)) | ||
3242 | svm->nested.hsave->control.intercept_exceptions |= 1 << NM_VECTOR; | ||
3243 | update_cr0_intercept(svm); | ||
2951 | } | 3244 | } |
2952 | 3245 | ||
2953 | static struct kvm_x86_ops svm_x86_ops = { | 3246 | static struct kvm_x86_ops svm_x86_ops = { |
@@ -2986,8 +3279,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
2986 | .set_idt = svm_set_idt, | 3279 | .set_idt = svm_set_idt, |
2987 | .get_gdt = svm_get_gdt, | 3280 | .get_gdt = svm_get_gdt, |
2988 | .set_gdt = svm_set_gdt, | 3281 | .set_gdt = svm_set_gdt, |
2989 | .get_dr = svm_get_dr, | 3282 | .set_dr7 = svm_set_dr7, |
2990 | .set_dr = svm_set_dr, | ||
2991 | .cache_reg = svm_cache_reg, | 3283 | .cache_reg = svm_cache_reg, |
2992 | .get_rflags = svm_get_rflags, | 3284 | .get_rflags = svm_get_rflags, |
2993 | .set_rflags = svm_set_rflags, | 3285 | .set_rflags = svm_set_rflags, |
@@ -3023,12 +3315,14 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
3023 | .cpuid_update = svm_cpuid_update, | 3315 | .cpuid_update = svm_cpuid_update, |
3024 | 3316 | ||
3025 | .rdtscp_supported = svm_rdtscp_supported, | 3317 | .rdtscp_supported = svm_rdtscp_supported, |
3318 | |||
3319 | .set_supported_cpuid = svm_set_supported_cpuid, | ||
3026 | }; | 3320 | }; |
3027 | 3321 | ||
3028 | static int __init svm_init(void) | 3322 | static int __init svm_init(void) |
3029 | { | 3323 | { |
3030 | return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm), | 3324 | return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm), |
3031 | THIS_MODULE); | 3325 | __alignof__(struct vcpu_svm), THIS_MODULE); |
3032 | } | 3326 | } |
3033 | 3327 | ||
3034 | static void __exit svm_exit(void) | 3328 | static void __exit svm_exit(void) |
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c index eea40439066c..4ddadb1a5ffe 100644 --- a/arch/x86/kvm/timer.c +++ b/arch/x86/kvm/timer.c | |||
@@ -12,7 +12,8 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) | |||
12 | /* | 12 | /* |
13 | * There is a race window between reading and incrementing, but we do | 13 | * There is a race window between reading and incrementing, but we do |
14 | * not care about potentially loosing timer events in the !reinject | 14 | * not care about potentially loosing timer events in the !reinject |
15 | * case anyway. | 15 | * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked |
16 | * in vcpu_enter_guest. | ||
16 | */ | 17 | */ |
17 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { | 18 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { |
18 | atomic_inc(&ktimer->pending); | 19 | atomic_inc(&ktimer->pending); |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 6ad30a29f044..a6544b8e7c0f 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -5,8 +5,6 @@ | |||
5 | 5 | ||
6 | #undef TRACE_SYSTEM | 6 | #undef TRACE_SYSTEM |
7 | #define TRACE_SYSTEM kvm | 7 | #define TRACE_SYSTEM kvm |
8 | #define TRACE_INCLUDE_PATH arch/x86/kvm | ||
9 | #define TRACE_INCLUDE_FILE trace | ||
10 | 8 | ||
11 | /* | 9 | /* |
12 | * Tracepoint for guest mode entry. | 10 | * Tracepoint for guest mode entry. |
@@ -184,8 +182,8 @@ TRACE_EVENT(kvm_apic, | |||
184 | * Tracepoint for kvm guest exit: | 182 | * Tracepoint for kvm guest exit: |
185 | */ | 183 | */ |
186 | TRACE_EVENT(kvm_exit, | 184 | TRACE_EVENT(kvm_exit, |
187 | TP_PROTO(unsigned int exit_reason, unsigned long guest_rip), | 185 | TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu), |
188 | TP_ARGS(exit_reason, guest_rip), | 186 | TP_ARGS(exit_reason, vcpu), |
189 | 187 | ||
190 | TP_STRUCT__entry( | 188 | TP_STRUCT__entry( |
191 | __field( unsigned int, exit_reason ) | 189 | __field( unsigned int, exit_reason ) |
@@ -194,7 +192,7 @@ TRACE_EVENT(kvm_exit, | |||
194 | 192 | ||
195 | TP_fast_assign( | 193 | TP_fast_assign( |
196 | __entry->exit_reason = exit_reason; | 194 | __entry->exit_reason = exit_reason; |
197 | __entry->guest_rip = guest_rip; | 195 | __entry->guest_rip = kvm_rip_read(vcpu); |
198 | ), | 196 | ), |
199 | 197 | ||
200 | TP_printk("reason %s rip 0x%lx", | 198 | TP_printk("reason %s rip 0x%lx", |
@@ -221,6 +219,38 @@ TRACE_EVENT(kvm_inj_virq, | |||
221 | TP_printk("irq %u", __entry->irq) | 219 | TP_printk("irq %u", __entry->irq) |
222 | ); | 220 | ); |
223 | 221 | ||
222 | #define EXS(x) { x##_VECTOR, "#" #x } | ||
223 | |||
224 | #define kvm_trace_sym_exc \ | ||
225 | EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \ | ||
226 | EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \ | ||
227 | EXS(MF), EXS(MC) | ||
228 | |||
229 | /* | ||
230 | * Tracepoint for kvm interrupt injection: | ||
231 | */ | ||
232 | TRACE_EVENT(kvm_inj_exception, | ||
233 | TP_PROTO(unsigned exception, bool has_error, unsigned error_code), | ||
234 | TP_ARGS(exception, has_error, error_code), | ||
235 | |||
236 | TP_STRUCT__entry( | ||
237 | __field( u8, exception ) | ||
238 | __field( u8, has_error ) | ||
239 | __field( u32, error_code ) | ||
240 | ), | ||
241 | |||
242 | TP_fast_assign( | ||
243 | __entry->exception = exception; | ||
244 | __entry->has_error = has_error; | ||
245 | __entry->error_code = error_code; | ||
246 | ), | ||
247 | |||
248 | TP_printk("%s (0x%x)", | ||
249 | __print_symbolic(__entry->exception, kvm_trace_sym_exc), | ||
250 | /* FIXME: don't print error_code if not present */ | ||
251 | __entry->has_error ? __entry->error_code : 0) | ||
252 | ); | ||
253 | |||
224 | /* | 254 | /* |
225 | * Tracepoint for page fault. | 255 | * Tracepoint for page fault. |
226 | */ | 256 | */ |
@@ -413,12 +443,34 @@ TRACE_EVENT(kvm_nested_vmrun, | |||
413 | ), | 443 | ), |
414 | 444 | ||
415 | TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x " | 445 | TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x " |
416 | "event_inj: 0x%08x npt: %s\n", | 446 | "event_inj: 0x%08x npt: %s", |
417 | __entry->rip, __entry->vmcb, __entry->nested_rip, | 447 | __entry->rip, __entry->vmcb, __entry->nested_rip, |
418 | __entry->int_ctl, __entry->event_inj, | 448 | __entry->int_ctl, __entry->event_inj, |
419 | __entry->npt ? "on" : "off") | 449 | __entry->npt ? "on" : "off") |
420 | ); | 450 | ); |
421 | 451 | ||
452 | TRACE_EVENT(kvm_nested_intercepts, | ||
453 | TP_PROTO(__u16 cr_read, __u16 cr_write, __u32 exceptions, __u64 intercept), | ||
454 | TP_ARGS(cr_read, cr_write, exceptions, intercept), | ||
455 | |||
456 | TP_STRUCT__entry( | ||
457 | __field( __u16, cr_read ) | ||
458 | __field( __u16, cr_write ) | ||
459 | __field( __u32, exceptions ) | ||
460 | __field( __u64, intercept ) | ||
461 | ), | ||
462 | |||
463 | TP_fast_assign( | ||
464 | __entry->cr_read = cr_read; | ||
465 | __entry->cr_write = cr_write; | ||
466 | __entry->exceptions = exceptions; | ||
467 | __entry->intercept = intercept; | ||
468 | ), | ||
469 | |||
470 | TP_printk("cr_read: %04x cr_write: %04x excp: %08x intercept: %016llx", | ||
471 | __entry->cr_read, __entry->cr_write, __entry->exceptions, | ||
472 | __entry->intercept) | ||
473 | ); | ||
422 | /* | 474 | /* |
423 | * Tracepoint for #VMEXIT while nested | 475 | * Tracepoint for #VMEXIT while nested |
424 | */ | 476 | */ |
@@ -447,7 +499,7 @@ TRACE_EVENT(kvm_nested_vmexit, | |||
447 | __entry->exit_int_info_err = exit_int_info_err; | 499 | __entry->exit_int_info_err = exit_int_info_err; |
448 | ), | 500 | ), |
449 | TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " | 501 | TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " |
450 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", | 502 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", |
451 | __entry->rip, | 503 | __entry->rip, |
452 | ftrace_print_symbols_seq(p, __entry->exit_code, | 504 | ftrace_print_symbols_seq(p, __entry->exit_code, |
453 | kvm_x86_ops->exit_reasons_str), | 505 | kvm_x86_ops->exit_reasons_str), |
@@ -482,7 +534,7 @@ TRACE_EVENT(kvm_nested_vmexit_inject, | |||
482 | ), | 534 | ), |
483 | 535 | ||
484 | TP_printk("reason: %s ext_inf1: 0x%016llx " | 536 | TP_printk("reason: %s ext_inf1: 0x%016llx " |
485 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", | 537 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", |
486 | ftrace_print_symbols_seq(p, __entry->exit_code, | 538 | ftrace_print_symbols_seq(p, __entry->exit_code, |
487 | kvm_x86_ops->exit_reasons_str), | 539 | kvm_x86_ops->exit_reasons_str), |
488 | __entry->exit_info1, __entry->exit_info2, | 540 | __entry->exit_info1, __entry->exit_info2, |
@@ -504,7 +556,7 @@ TRACE_EVENT(kvm_nested_intr_vmexit, | |||
504 | __entry->rip = rip | 556 | __entry->rip = rip |
505 | ), | 557 | ), |
506 | 558 | ||
507 | TP_printk("rip: 0x%016llx\n", __entry->rip) | 559 | TP_printk("rip: 0x%016llx", __entry->rip) |
508 | ); | 560 | ); |
509 | 561 | ||
510 | /* | 562 | /* |
@@ -526,7 +578,7 @@ TRACE_EVENT(kvm_invlpga, | |||
526 | __entry->address = address; | 578 | __entry->address = address; |
527 | ), | 579 | ), |
528 | 580 | ||
529 | TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n", | 581 | TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx", |
530 | __entry->rip, __entry->asid, __entry->address) | 582 | __entry->rip, __entry->asid, __entry->address) |
531 | ); | 583 | ); |
532 | 584 | ||
@@ -547,11 +599,102 @@ TRACE_EVENT(kvm_skinit, | |||
547 | __entry->slb = slb; | 599 | __entry->slb = slb; |
548 | ), | 600 | ), |
549 | 601 | ||
550 | TP_printk("rip: 0x%016llx slb: 0x%08x\n", | 602 | TP_printk("rip: 0x%016llx slb: 0x%08x", |
551 | __entry->rip, __entry->slb) | 603 | __entry->rip, __entry->slb) |
552 | ); | 604 | ); |
553 | 605 | ||
606 | #define __print_insn(insn, ilen) ({ \ | ||
607 | int i; \ | ||
608 | const char *ret = p->buffer + p->len; \ | ||
609 | \ | ||
610 | for (i = 0; i < ilen; ++i) \ | ||
611 | trace_seq_printf(p, " %02x", insn[i]); \ | ||
612 | trace_seq_printf(p, "%c", 0); \ | ||
613 | ret; \ | ||
614 | }) | ||
615 | |||
616 | #define KVM_EMUL_INSN_F_CR0_PE (1 << 0) | ||
617 | #define KVM_EMUL_INSN_F_EFL_VM (1 << 1) | ||
618 | #define KVM_EMUL_INSN_F_CS_D (1 << 2) | ||
619 | #define KVM_EMUL_INSN_F_CS_L (1 << 3) | ||
620 | |||
621 | #define kvm_trace_symbol_emul_flags \ | ||
622 | { 0, "real" }, \ | ||
623 | { KVM_EMUL_INSN_F_CR0_PE \ | ||
624 | | KVM_EMUL_INSN_F_EFL_VM, "vm16" }, \ | ||
625 | { KVM_EMUL_INSN_F_CR0_PE, "prot16" }, \ | ||
626 | { KVM_EMUL_INSN_F_CR0_PE \ | ||
627 | | KVM_EMUL_INSN_F_CS_D, "prot32" }, \ | ||
628 | { KVM_EMUL_INSN_F_CR0_PE \ | ||
629 | | KVM_EMUL_INSN_F_CS_L, "prot64" } | ||
630 | |||
631 | #define kei_decode_mode(mode) ({ \ | ||
632 | u8 flags = 0xff; \ | ||
633 | switch (mode) { \ | ||
634 | case X86EMUL_MODE_REAL: \ | ||
635 | flags = 0; \ | ||
636 | break; \ | ||
637 | case X86EMUL_MODE_VM86: \ | ||
638 | flags = KVM_EMUL_INSN_F_EFL_VM; \ | ||
639 | break; \ | ||
640 | case X86EMUL_MODE_PROT16: \ | ||
641 | flags = KVM_EMUL_INSN_F_CR0_PE; \ | ||
642 | break; \ | ||
643 | case X86EMUL_MODE_PROT32: \ | ||
644 | flags = KVM_EMUL_INSN_F_CR0_PE \ | ||
645 | | KVM_EMUL_INSN_F_CS_D; \ | ||
646 | break; \ | ||
647 | case X86EMUL_MODE_PROT64: \ | ||
648 | flags = KVM_EMUL_INSN_F_CR0_PE \ | ||
649 | | KVM_EMUL_INSN_F_CS_L; \ | ||
650 | break; \ | ||
651 | } \ | ||
652 | flags; \ | ||
653 | }) | ||
654 | |||
655 | TRACE_EVENT(kvm_emulate_insn, | ||
656 | TP_PROTO(struct kvm_vcpu *vcpu, __u8 failed), | ||
657 | TP_ARGS(vcpu, failed), | ||
658 | |||
659 | TP_STRUCT__entry( | ||
660 | __field( __u64, rip ) | ||
661 | __field( __u32, csbase ) | ||
662 | __field( __u8, len ) | ||
663 | __array( __u8, insn, 15 ) | ||
664 | __field( __u8, flags ) | ||
665 | __field( __u8, failed ) | ||
666 | ), | ||
667 | |||
668 | TP_fast_assign( | ||
669 | __entry->rip = vcpu->arch.emulate_ctxt.decode.fetch.start; | ||
670 | __entry->csbase = kvm_x86_ops->get_segment_base(vcpu, VCPU_SREG_CS); | ||
671 | __entry->len = vcpu->arch.emulate_ctxt.decode.eip | ||
672 | - vcpu->arch.emulate_ctxt.decode.fetch.start; | ||
673 | memcpy(__entry->insn, | ||
674 | vcpu->arch.emulate_ctxt.decode.fetch.data, | ||
675 | 15); | ||
676 | __entry->flags = kei_decode_mode(vcpu->arch.emulate_ctxt.mode); | ||
677 | __entry->failed = failed; | ||
678 | ), | ||
679 | |||
680 | TP_printk("%x:%llx:%s (%s)%s", | ||
681 | __entry->csbase, __entry->rip, | ||
682 | __print_insn(__entry->insn, __entry->len), | ||
683 | __print_symbolic(__entry->flags, | ||
684 | kvm_trace_symbol_emul_flags), | ||
685 | __entry->failed ? " failed" : "" | ||
686 | ) | ||
687 | ); | ||
688 | |||
689 | #define trace_kvm_emulate_insn_start(vcpu) trace_kvm_emulate_insn(vcpu, 0) | ||
690 | #define trace_kvm_emulate_insn_failed(vcpu) trace_kvm_emulate_insn(vcpu, 1) | ||
691 | |||
554 | #endif /* _TRACE_KVM_H */ | 692 | #endif /* _TRACE_KVM_H */ |
555 | 693 | ||
694 | #undef TRACE_INCLUDE_PATH | ||
695 | #define TRACE_INCLUDE_PATH arch/x86/kvm | ||
696 | #undef TRACE_INCLUDE_FILE | ||
697 | #define TRACE_INCLUDE_FILE trace | ||
698 | |||
556 | /* This part must be outside protection */ | 699 | /* This part must be outside protection */ |
557 | #include <trace/define_trace.h> | 700 | #include <trace/define_trace.h> |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index edca080407a5..859a01a07dbf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/moduleparam.h> | 27 | #include <linux/moduleparam.h> |
28 | #include <linux/ftrace_event.h> | 28 | #include <linux/ftrace_event.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <linux/tboot.h> | ||
30 | #include "kvm_cache_regs.h" | 31 | #include "kvm_cache_regs.h" |
31 | #include "x86.h" | 32 | #include "x86.h" |
32 | 33 | ||
@@ -98,6 +99,8 @@ module_param(ple_gap, int, S_IRUGO); | |||
98 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; | 99 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; |
99 | module_param(ple_window, int, S_IRUGO); | 100 | module_param(ple_window, int, S_IRUGO); |
100 | 101 | ||
102 | #define NR_AUTOLOAD_MSRS 1 | ||
103 | |||
101 | struct vmcs { | 104 | struct vmcs { |
102 | u32 revision_id; | 105 | u32 revision_id; |
103 | u32 abort; | 106 | u32 abort; |
@@ -125,6 +128,11 @@ struct vcpu_vmx { | |||
125 | u64 msr_guest_kernel_gs_base; | 128 | u64 msr_guest_kernel_gs_base; |
126 | #endif | 129 | #endif |
127 | struct vmcs *vmcs; | 130 | struct vmcs *vmcs; |
131 | struct msr_autoload { | ||
132 | unsigned nr; | ||
133 | struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS]; | ||
134 | struct vmx_msr_entry host[NR_AUTOLOAD_MSRS]; | ||
135 | } msr_autoload; | ||
128 | struct { | 136 | struct { |
129 | int loaded; | 137 | int loaded; |
130 | u16 fs_sel, gs_sel, ldt_sel; | 138 | u16 fs_sel, gs_sel, ldt_sel; |
@@ -234,56 +242,56 @@ static const u32 vmx_msr_index[] = { | |||
234 | }; | 242 | }; |
235 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) | 243 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) |
236 | 244 | ||
237 | static inline int is_page_fault(u32 intr_info) | 245 | static inline bool is_page_fault(u32 intr_info) |
238 | { | 246 | { |
239 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 247 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
240 | INTR_INFO_VALID_MASK)) == | 248 | INTR_INFO_VALID_MASK)) == |
241 | (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); | 249 | (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); |
242 | } | 250 | } |
243 | 251 | ||
244 | static inline int is_no_device(u32 intr_info) | 252 | static inline bool is_no_device(u32 intr_info) |
245 | { | 253 | { |
246 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 254 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
247 | INTR_INFO_VALID_MASK)) == | 255 | INTR_INFO_VALID_MASK)) == |
248 | (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); | 256 | (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); |
249 | } | 257 | } |
250 | 258 | ||
251 | static inline int is_invalid_opcode(u32 intr_info) | 259 | static inline bool is_invalid_opcode(u32 intr_info) |
252 | { | 260 | { |
253 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 261 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
254 | INTR_INFO_VALID_MASK)) == | 262 | INTR_INFO_VALID_MASK)) == |
255 | (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); | 263 | (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); |
256 | } | 264 | } |
257 | 265 | ||
258 | static inline int is_external_interrupt(u32 intr_info) | 266 | static inline bool is_external_interrupt(u32 intr_info) |
259 | { | 267 | { |
260 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) | 268 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) |
261 | == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); | 269 | == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); |
262 | } | 270 | } |
263 | 271 | ||
264 | static inline int is_machine_check(u32 intr_info) | 272 | static inline bool is_machine_check(u32 intr_info) |
265 | { | 273 | { |
266 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 274 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
267 | INTR_INFO_VALID_MASK)) == | 275 | INTR_INFO_VALID_MASK)) == |
268 | (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); | 276 | (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); |
269 | } | 277 | } |
270 | 278 | ||
271 | static inline int cpu_has_vmx_msr_bitmap(void) | 279 | static inline bool cpu_has_vmx_msr_bitmap(void) |
272 | { | 280 | { |
273 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; | 281 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; |
274 | } | 282 | } |
275 | 283 | ||
276 | static inline int cpu_has_vmx_tpr_shadow(void) | 284 | static inline bool cpu_has_vmx_tpr_shadow(void) |
277 | { | 285 | { |
278 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; | 286 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; |
279 | } | 287 | } |
280 | 288 | ||
281 | static inline int vm_need_tpr_shadow(struct kvm *kvm) | 289 | static inline bool vm_need_tpr_shadow(struct kvm *kvm) |
282 | { | 290 | { |
283 | return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); | 291 | return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); |
284 | } | 292 | } |
285 | 293 | ||
286 | static inline int cpu_has_secondary_exec_ctrls(void) | 294 | static inline bool cpu_has_secondary_exec_ctrls(void) |
287 | { | 295 | { |
288 | return vmcs_config.cpu_based_exec_ctrl & | 296 | return vmcs_config.cpu_based_exec_ctrl & |
289 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 297 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
@@ -303,80 +311,80 @@ static inline bool cpu_has_vmx_flexpriority(void) | |||
303 | 311 | ||
304 | static inline bool cpu_has_vmx_ept_execute_only(void) | 312 | static inline bool cpu_has_vmx_ept_execute_only(void) |
305 | { | 313 | { |
306 | return !!(vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT); | 314 | return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT; |
307 | } | 315 | } |
308 | 316 | ||
309 | static inline bool cpu_has_vmx_eptp_uncacheable(void) | 317 | static inline bool cpu_has_vmx_eptp_uncacheable(void) |
310 | { | 318 | { |
311 | return !!(vmx_capability.ept & VMX_EPTP_UC_BIT); | 319 | return vmx_capability.ept & VMX_EPTP_UC_BIT; |
312 | } | 320 | } |
313 | 321 | ||
314 | static inline bool cpu_has_vmx_eptp_writeback(void) | 322 | static inline bool cpu_has_vmx_eptp_writeback(void) |
315 | { | 323 | { |
316 | return !!(vmx_capability.ept & VMX_EPTP_WB_BIT); | 324 | return vmx_capability.ept & VMX_EPTP_WB_BIT; |
317 | } | 325 | } |
318 | 326 | ||
319 | static inline bool cpu_has_vmx_ept_2m_page(void) | 327 | static inline bool cpu_has_vmx_ept_2m_page(void) |
320 | { | 328 | { |
321 | return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); | 329 | return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT; |
322 | } | 330 | } |
323 | 331 | ||
324 | static inline bool cpu_has_vmx_ept_1g_page(void) | 332 | static inline bool cpu_has_vmx_ept_1g_page(void) |
325 | { | 333 | { |
326 | return !!(vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT); | 334 | return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT; |
327 | } | 335 | } |
328 | 336 | ||
329 | static inline int cpu_has_vmx_invept_individual_addr(void) | 337 | static inline bool cpu_has_vmx_invept_individual_addr(void) |
330 | { | 338 | { |
331 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); | 339 | return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; |
332 | } | 340 | } |
333 | 341 | ||
334 | static inline int cpu_has_vmx_invept_context(void) | 342 | static inline bool cpu_has_vmx_invept_context(void) |
335 | { | 343 | { |
336 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT); | 344 | return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT; |
337 | } | 345 | } |
338 | 346 | ||
339 | static inline int cpu_has_vmx_invept_global(void) | 347 | static inline bool cpu_has_vmx_invept_global(void) |
340 | { | 348 | { |
341 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT); | 349 | return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT; |
342 | } | 350 | } |
343 | 351 | ||
344 | static inline int cpu_has_vmx_ept(void) | 352 | static inline bool cpu_has_vmx_ept(void) |
345 | { | 353 | { |
346 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 354 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
347 | SECONDARY_EXEC_ENABLE_EPT; | 355 | SECONDARY_EXEC_ENABLE_EPT; |
348 | } | 356 | } |
349 | 357 | ||
350 | static inline int cpu_has_vmx_unrestricted_guest(void) | 358 | static inline bool cpu_has_vmx_unrestricted_guest(void) |
351 | { | 359 | { |
352 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 360 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
353 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | 361 | SECONDARY_EXEC_UNRESTRICTED_GUEST; |
354 | } | 362 | } |
355 | 363 | ||
356 | static inline int cpu_has_vmx_ple(void) | 364 | static inline bool cpu_has_vmx_ple(void) |
357 | { | 365 | { |
358 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 366 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
359 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; | 367 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; |
360 | } | 368 | } |
361 | 369 | ||
362 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) | 370 | static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm) |
363 | { | 371 | { |
364 | return flexpriority_enabled && irqchip_in_kernel(kvm); | 372 | return flexpriority_enabled && irqchip_in_kernel(kvm); |
365 | } | 373 | } |
366 | 374 | ||
367 | static inline int cpu_has_vmx_vpid(void) | 375 | static inline bool cpu_has_vmx_vpid(void) |
368 | { | 376 | { |
369 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 377 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
370 | SECONDARY_EXEC_ENABLE_VPID; | 378 | SECONDARY_EXEC_ENABLE_VPID; |
371 | } | 379 | } |
372 | 380 | ||
373 | static inline int cpu_has_vmx_rdtscp(void) | 381 | static inline bool cpu_has_vmx_rdtscp(void) |
374 | { | 382 | { |
375 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 383 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
376 | SECONDARY_EXEC_RDTSCP; | 384 | SECONDARY_EXEC_RDTSCP; |
377 | } | 385 | } |
378 | 386 | ||
379 | static inline int cpu_has_virtual_nmis(void) | 387 | static inline bool cpu_has_virtual_nmis(void) |
380 | { | 388 | { |
381 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; | 389 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; |
382 | } | 390 | } |
@@ -595,16 +603,56 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
595 | vmcs_write32(EXCEPTION_BITMAP, eb); | 603 | vmcs_write32(EXCEPTION_BITMAP, eb); |
596 | } | 604 | } |
597 | 605 | ||
606 | static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) | ||
607 | { | ||
608 | unsigned i; | ||
609 | struct msr_autoload *m = &vmx->msr_autoload; | ||
610 | |||
611 | for (i = 0; i < m->nr; ++i) | ||
612 | if (m->guest[i].index == msr) | ||
613 | break; | ||
614 | |||
615 | if (i == m->nr) | ||
616 | return; | ||
617 | --m->nr; | ||
618 | m->guest[i] = m->guest[m->nr]; | ||
619 | m->host[i] = m->host[m->nr]; | ||
620 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr); | ||
621 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); | ||
622 | } | ||
623 | |||
624 | static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, | ||
625 | u64 guest_val, u64 host_val) | ||
626 | { | ||
627 | unsigned i; | ||
628 | struct msr_autoload *m = &vmx->msr_autoload; | ||
629 | |||
630 | for (i = 0; i < m->nr; ++i) | ||
631 | if (m->guest[i].index == msr) | ||
632 | break; | ||
633 | |||
634 | if (i == m->nr) { | ||
635 | ++m->nr; | ||
636 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr); | ||
637 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); | ||
638 | } | ||
639 | |||
640 | m->guest[i].index = msr; | ||
641 | m->guest[i].value = guest_val; | ||
642 | m->host[i].index = msr; | ||
643 | m->host[i].value = host_val; | ||
644 | } | ||
645 | |||
598 | static void reload_tss(void) | 646 | static void reload_tss(void) |
599 | { | 647 | { |
600 | /* | 648 | /* |
601 | * VT restores TR but not its size. Useless. | 649 | * VT restores TR but not its size. Useless. |
602 | */ | 650 | */ |
603 | struct descriptor_table gdt; | 651 | struct desc_ptr gdt; |
604 | struct desc_struct *descs; | 652 | struct desc_struct *descs; |
605 | 653 | ||
606 | kvm_get_gdt(&gdt); | 654 | native_store_gdt(&gdt); |
607 | descs = (void *)gdt.base; | 655 | descs = (void *)gdt.address; |
608 | descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ | 656 | descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ |
609 | load_TR_desc(); | 657 | load_TR_desc(); |
610 | } | 658 | } |
@@ -631,9 +679,57 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) | |||
631 | guest_efer |= host_efer & ignore_bits; | 679 | guest_efer |= host_efer & ignore_bits; |
632 | vmx->guest_msrs[efer_offset].data = guest_efer; | 680 | vmx->guest_msrs[efer_offset].data = guest_efer; |
633 | vmx->guest_msrs[efer_offset].mask = ~ignore_bits; | 681 | vmx->guest_msrs[efer_offset].mask = ~ignore_bits; |
682 | |||
683 | clear_atomic_switch_msr(vmx, MSR_EFER); | ||
684 | /* On ept, can't emulate nx, and must switch nx atomically */ | ||
685 | if (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX)) { | ||
686 | guest_efer = vmx->vcpu.arch.efer; | ||
687 | if (!(guest_efer & EFER_LMA)) | ||
688 | guest_efer &= ~EFER_LME; | ||
689 | add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer); | ||
690 | return false; | ||
691 | } | ||
692 | |||
634 | return true; | 693 | return true; |
635 | } | 694 | } |
636 | 695 | ||
696 | static unsigned long segment_base(u16 selector) | ||
697 | { | ||
698 | struct desc_ptr gdt; | ||
699 | struct desc_struct *d; | ||
700 | unsigned long table_base; | ||
701 | unsigned long v; | ||
702 | |||
703 | if (!(selector & ~3)) | ||
704 | return 0; | ||
705 | |||
706 | native_store_gdt(&gdt); | ||
707 | table_base = gdt.address; | ||
708 | |||
709 | if (selector & 4) { /* from ldt */ | ||
710 | u16 ldt_selector = kvm_read_ldt(); | ||
711 | |||
712 | if (!(ldt_selector & ~3)) | ||
713 | return 0; | ||
714 | |||
715 | table_base = segment_base(ldt_selector); | ||
716 | } | ||
717 | d = (struct desc_struct *)(table_base + (selector & ~7)); | ||
718 | v = get_desc_base(d); | ||
719 | #ifdef CONFIG_X86_64 | ||
720 | if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) | ||
721 | v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; | ||
722 | #endif | ||
723 | return v; | ||
724 | } | ||
725 | |||
726 | static inline unsigned long kvm_read_tr_base(void) | ||
727 | { | ||
728 | u16 tr; | ||
729 | asm("str %0" : "=g"(tr)); | ||
730 | return segment_base(tr); | ||
731 | } | ||
732 | |||
637 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) | 733 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) |
638 | { | 734 | { |
639 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 735 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -758,7 +854,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
758 | } | 854 | } |
759 | 855 | ||
760 | if (vcpu->cpu != cpu) { | 856 | if (vcpu->cpu != cpu) { |
761 | struct descriptor_table dt; | 857 | struct desc_ptr dt; |
762 | unsigned long sysenter_esp; | 858 | unsigned long sysenter_esp; |
763 | 859 | ||
764 | vcpu->cpu = cpu; | 860 | vcpu->cpu = cpu; |
@@ -767,8 +863,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
767 | * processors. | 863 | * processors. |
768 | */ | 864 | */ |
769 | vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ | 865 | vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ |
770 | kvm_get_gdt(&dt); | 866 | native_store_gdt(&dt); |
771 | vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */ | 867 | vmcs_writel(HOST_GDTR_BASE, dt.address); /* 22.2.4 */ |
772 | 868 | ||
773 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); | 869 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); |
774 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ | 870 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ |
@@ -846,9 +942,9 @@ static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | |||
846 | int ret = 0; | 942 | int ret = 0; |
847 | 943 | ||
848 | if (interruptibility & GUEST_INTR_STATE_STI) | 944 | if (interruptibility & GUEST_INTR_STATE_STI) |
849 | ret |= X86_SHADOW_INT_STI; | 945 | ret |= KVM_X86_SHADOW_INT_STI; |
850 | if (interruptibility & GUEST_INTR_STATE_MOV_SS) | 946 | if (interruptibility & GUEST_INTR_STATE_MOV_SS) |
851 | ret |= X86_SHADOW_INT_MOV_SS; | 947 | ret |= KVM_X86_SHADOW_INT_MOV_SS; |
852 | 948 | ||
853 | return ret & mask; | 949 | return ret & mask; |
854 | } | 950 | } |
@@ -860,9 +956,9 @@ static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | |||
860 | 956 | ||
861 | interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); | 957 | interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); |
862 | 958 | ||
863 | if (mask & X86_SHADOW_INT_MOV_SS) | 959 | if (mask & KVM_X86_SHADOW_INT_MOV_SS) |
864 | interruptibility |= GUEST_INTR_STATE_MOV_SS; | 960 | interruptibility |= GUEST_INTR_STATE_MOV_SS; |
865 | if (mask & X86_SHADOW_INT_STI) | 961 | else if (mask & KVM_X86_SHADOW_INT_STI) |
866 | interruptibility |= GUEST_INTR_STATE_STI; | 962 | interruptibility |= GUEST_INTR_STATE_STI; |
867 | 963 | ||
868 | if ((interruptibility != interruptibility_old)) | 964 | if ((interruptibility != interruptibility_old)) |
@@ -882,7 +978,8 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
882 | } | 978 | } |
883 | 979 | ||
884 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | 980 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, |
885 | bool has_error_code, u32 error_code) | 981 | bool has_error_code, u32 error_code, |
982 | bool reinject) | ||
886 | { | 983 | { |
887 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 984 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
888 | u32 intr_info = nr | INTR_INFO_VALID_MASK; | 985 | u32 intr_info = nr | INTR_INFO_VALID_MASK; |
@@ -1176,9 +1273,16 @@ static __init int vmx_disabled_by_bios(void) | |||
1176 | u64 msr; | 1273 | u64 msr; |
1177 | 1274 | ||
1178 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); | 1275 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); |
1179 | return (msr & (FEATURE_CONTROL_LOCKED | | 1276 | if (msr & FEATURE_CONTROL_LOCKED) { |
1180 | FEATURE_CONTROL_VMXON_ENABLED)) | 1277 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) |
1181 | == FEATURE_CONTROL_LOCKED; | 1278 | && tboot_enabled()) |
1279 | return 1; | ||
1280 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) | ||
1281 | && !tboot_enabled()) | ||
1282 | return 1; | ||
1283 | } | ||
1284 | |||
1285 | return 0; | ||
1182 | /* locked but not enabled */ | 1286 | /* locked but not enabled */ |
1183 | } | 1287 | } |
1184 | 1288 | ||
@@ -1186,21 +1290,23 @@ static int hardware_enable(void *garbage) | |||
1186 | { | 1290 | { |
1187 | int cpu = raw_smp_processor_id(); | 1291 | int cpu = raw_smp_processor_id(); |
1188 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); | 1292 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); |
1189 | u64 old; | 1293 | u64 old, test_bits; |
1190 | 1294 | ||
1191 | if (read_cr4() & X86_CR4_VMXE) | 1295 | if (read_cr4() & X86_CR4_VMXE) |
1192 | return -EBUSY; | 1296 | return -EBUSY; |
1193 | 1297 | ||
1194 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); | 1298 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); |
1195 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); | 1299 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); |
1196 | if ((old & (FEATURE_CONTROL_LOCKED | | 1300 | |
1197 | FEATURE_CONTROL_VMXON_ENABLED)) | 1301 | test_bits = FEATURE_CONTROL_LOCKED; |
1198 | != (FEATURE_CONTROL_LOCKED | | 1302 | test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; |
1199 | FEATURE_CONTROL_VMXON_ENABLED)) | 1303 | if (tboot_enabled()) |
1304 | test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX; | ||
1305 | |||
1306 | if ((old & test_bits) != test_bits) { | ||
1200 | /* enable and lock */ | 1307 | /* enable and lock */ |
1201 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | | 1308 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); |
1202 | FEATURE_CONTROL_LOCKED | | 1309 | } |
1203 | FEATURE_CONTROL_VMXON_ENABLED); | ||
1204 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ | 1310 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ |
1205 | asm volatile (ASM_VMX_VMXON_RAX | 1311 | asm volatile (ASM_VMX_VMXON_RAX |
1206 | : : "a"(&phys_addr), "m"(phys_addr) | 1312 | : : "a"(&phys_addr), "m"(phys_addr) |
@@ -1521,7 +1627,7 @@ static gva_t rmode_tss_base(struct kvm *kvm) | |||
1521 | struct kvm_memslots *slots; | 1627 | struct kvm_memslots *slots; |
1522 | gfn_t base_gfn; | 1628 | gfn_t base_gfn; |
1523 | 1629 | ||
1524 | slots = rcu_dereference(kvm->memslots); | 1630 | slots = kvm_memslots(kvm); |
1525 | base_gfn = kvm->memslots->memslots[0].base_gfn + | 1631 | base_gfn = kvm->memslots->memslots[0].base_gfn + |
1526 | kvm->memslots->memslots[0].npages - 3; | 1632 | kvm->memslots->memslots[0].npages - 3; |
1527 | return base_gfn << PAGE_SHIFT; | 1633 | return base_gfn << PAGE_SHIFT; |
@@ -1649,6 +1755,7 @@ static void exit_lmode(struct kvm_vcpu *vcpu) | |||
1649 | vmcs_write32(VM_ENTRY_CONTROLS, | 1755 | vmcs_write32(VM_ENTRY_CONTROLS, |
1650 | vmcs_read32(VM_ENTRY_CONTROLS) | 1756 | vmcs_read32(VM_ENTRY_CONTROLS) |
1651 | & ~VM_ENTRY_IA32E_MODE); | 1757 | & ~VM_ENTRY_IA32E_MODE); |
1758 | vmx_set_efer(vcpu, vcpu->arch.efer); | ||
1652 | } | 1759 | } |
1653 | 1760 | ||
1654 | #endif | 1761 | #endif |
@@ -1934,28 +2041,28 @@ static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | |||
1934 | *l = (ar >> 13) & 1; | 2041 | *l = (ar >> 13) & 1; |
1935 | } | 2042 | } |
1936 | 2043 | ||
1937 | static void vmx_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 2044 | static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
1938 | { | 2045 | { |
1939 | dt->limit = vmcs_read32(GUEST_IDTR_LIMIT); | 2046 | dt->size = vmcs_read32(GUEST_IDTR_LIMIT); |
1940 | dt->base = vmcs_readl(GUEST_IDTR_BASE); | 2047 | dt->address = vmcs_readl(GUEST_IDTR_BASE); |
1941 | } | 2048 | } |
1942 | 2049 | ||
1943 | static void vmx_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 2050 | static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
1944 | { | 2051 | { |
1945 | vmcs_write32(GUEST_IDTR_LIMIT, dt->limit); | 2052 | vmcs_write32(GUEST_IDTR_LIMIT, dt->size); |
1946 | vmcs_writel(GUEST_IDTR_BASE, dt->base); | 2053 | vmcs_writel(GUEST_IDTR_BASE, dt->address); |
1947 | } | 2054 | } |
1948 | 2055 | ||
1949 | static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 2056 | static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
1950 | { | 2057 | { |
1951 | dt->limit = vmcs_read32(GUEST_GDTR_LIMIT); | 2058 | dt->size = vmcs_read32(GUEST_GDTR_LIMIT); |
1952 | dt->base = vmcs_readl(GUEST_GDTR_BASE); | 2059 | dt->address = vmcs_readl(GUEST_GDTR_BASE); |
1953 | } | 2060 | } |
1954 | 2061 | ||
1955 | static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 2062 | static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
1956 | { | 2063 | { |
1957 | vmcs_write32(GUEST_GDTR_LIMIT, dt->limit); | 2064 | vmcs_write32(GUEST_GDTR_LIMIT, dt->size); |
1958 | vmcs_writel(GUEST_GDTR_BASE, dt->base); | 2065 | vmcs_writel(GUEST_GDTR_BASE, dt->address); |
1959 | } | 2066 | } |
1960 | 2067 | ||
1961 | static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) | 2068 | static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) |
@@ -2296,6 +2403,16 @@ static void allocate_vpid(struct vcpu_vmx *vmx) | |||
2296 | spin_unlock(&vmx_vpid_lock); | 2403 | spin_unlock(&vmx_vpid_lock); |
2297 | } | 2404 | } |
2298 | 2405 | ||
2406 | static void free_vpid(struct vcpu_vmx *vmx) | ||
2407 | { | ||
2408 | if (!enable_vpid) | ||
2409 | return; | ||
2410 | spin_lock(&vmx_vpid_lock); | ||
2411 | if (vmx->vpid != 0) | ||
2412 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); | ||
2413 | spin_unlock(&vmx_vpid_lock); | ||
2414 | } | ||
2415 | |||
2299 | static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) | 2416 | static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) |
2300 | { | 2417 | { |
2301 | int f = sizeof(unsigned long); | 2418 | int f = sizeof(unsigned long); |
@@ -2334,7 +2451,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2334 | u32 junk; | 2451 | u32 junk; |
2335 | u64 host_pat, tsc_this, tsc_base; | 2452 | u64 host_pat, tsc_this, tsc_base; |
2336 | unsigned long a; | 2453 | unsigned long a; |
2337 | struct descriptor_table dt; | 2454 | struct desc_ptr dt; |
2338 | int i; | 2455 | int i; |
2339 | unsigned long kvm_vmx_return; | 2456 | unsigned long kvm_vmx_return; |
2340 | u32 exec_control; | 2457 | u32 exec_control; |
@@ -2415,14 +2532,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2415 | 2532 | ||
2416 | vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ | 2533 | vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ |
2417 | 2534 | ||
2418 | kvm_get_idt(&dt); | 2535 | native_store_idt(&dt); |
2419 | vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ | 2536 | vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ |
2420 | 2537 | ||
2421 | asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); | 2538 | asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); |
2422 | vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ | 2539 | vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ |
2423 | vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); | 2540 | vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); |
2424 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); | 2541 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); |
2542 | vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host)); | ||
2425 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); | 2543 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); |
2544 | vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest)); | ||
2426 | 2545 | ||
2427 | rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); | 2546 | rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); |
2428 | vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); | 2547 | vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); |
@@ -2947,22 +3066,20 @@ static int handle_io(struct kvm_vcpu *vcpu) | |||
2947 | int size, in, string; | 3066 | int size, in, string; |
2948 | unsigned port; | 3067 | unsigned port; |
2949 | 3068 | ||
2950 | ++vcpu->stat.io_exits; | ||
2951 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3069 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
2952 | string = (exit_qualification & 16) != 0; | 3070 | string = (exit_qualification & 16) != 0; |
3071 | in = (exit_qualification & 8) != 0; | ||
2953 | 3072 | ||
2954 | if (string) { | 3073 | ++vcpu->stat.io_exits; |
2955 | if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO) | ||
2956 | return 0; | ||
2957 | return 1; | ||
2958 | } | ||
2959 | 3074 | ||
2960 | size = (exit_qualification & 7) + 1; | 3075 | if (string || in) |
2961 | in = (exit_qualification & 8) != 0; | 3076 | return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); |
2962 | port = exit_qualification >> 16; | ||
2963 | 3077 | ||
3078 | port = exit_qualification >> 16; | ||
3079 | size = (exit_qualification & 7) + 1; | ||
2964 | skip_emulated_instruction(vcpu); | 3080 | skip_emulated_instruction(vcpu); |
2965 | return kvm_emulate_pio(vcpu, in, size, port); | 3081 | |
3082 | return kvm_fast_pio_out(vcpu, size, port); | ||
2966 | } | 3083 | } |
2967 | 3084 | ||
2968 | static void | 3085 | static void |
@@ -3053,19 +3170,9 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
3053 | return 0; | 3170 | return 0; |
3054 | } | 3171 | } |
3055 | 3172 | ||
3056 | static int check_dr_alias(struct kvm_vcpu *vcpu) | ||
3057 | { | ||
3058 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
3059 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
3060 | return -1; | ||
3061 | } | ||
3062 | return 0; | ||
3063 | } | ||
3064 | |||
3065 | static int handle_dr(struct kvm_vcpu *vcpu) | 3173 | static int handle_dr(struct kvm_vcpu *vcpu) |
3066 | { | 3174 | { |
3067 | unsigned long exit_qualification; | 3175 | unsigned long exit_qualification; |
3068 | unsigned long val; | ||
3069 | int dr, reg; | 3176 | int dr, reg; |
3070 | 3177 | ||
3071 | /* Do not handle if the CPL > 0, will trigger GP on re-entry */ | 3178 | /* Do not handle if the CPL > 0, will trigger GP on re-entry */ |
@@ -3100,67 +3207,20 @@ static int handle_dr(struct kvm_vcpu *vcpu) | |||
3100 | dr = exit_qualification & DEBUG_REG_ACCESS_NUM; | 3207 | dr = exit_qualification & DEBUG_REG_ACCESS_NUM; |
3101 | reg = DEBUG_REG_ACCESS_REG(exit_qualification); | 3208 | reg = DEBUG_REG_ACCESS_REG(exit_qualification); |
3102 | if (exit_qualification & TYPE_MOV_FROM_DR) { | 3209 | if (exit_qualification & TYPE_MOV_FROM_DR) { |
3103 | switch (dr) { | 3210 | unsigned long val; |
3104 | case 0 ... 3: | 3211 | if (!kvm_get_dr(vcpu, dr, &val)) |
3105 | val = vcpu->arch.db[dr]; | 3212 | kvm_register_write(vcpu, reg, val); |
3106 | break; | 3213 | } else |
3107 | case 4: | 3214 | kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]); |
3108 | if (check_dr_alias(vcpu) < 0) | ||
3109 | return 1; | ||
3110 | /* fall through */ | ||
3111 | case 6: | ||
3112 | val = vcpu->arch.dr6; | ||
3113 | break; | ||
3114 | case 5: | ||
3115 | if (check_dr_alias(vcpu) < 0) | ||
3116 | return 1; | ||
3117 | /* fall through */ | ||
3118 | default: /* 7 */ | ||
3119 | val = vcpu->arch.dr7; | ||
3120 | break; | ||
3121 | } | ||
3122 | kvm_register_write(vcpu, reg, val); | ||
3123 | } else { | ||
3124 | val = vcpu->arch.regs[reg]; | ||
3125 | switch (dr) { | ||
3126 | case 0 ... 3: | ||
3127 | vcpu->arch.db[dr] = val; | ||
3128 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | ||
3129 | vcpu->arch.eff_db[dr] = val; | ||
3130 | break; | ||
3131 | case 4: | ||
3132 | if (check_dr_alias(vcpu) < 0) | ||
3133 | return 1; | ||
3134 | /* fall through */ | ||
3135 | case 6: | ||
3136 | if (val & 0xffffffff00000000ULL) { | ||
3137 | kvm_inject_gp(vcpu, 0); | ||
3138 | return 1; | ||
3139 | } | ||
3140 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | ||
3141 | break; | ||
3142 | case 5: | ||
3143 | if (check_dr_alias(vcpu) < 0) | ||
3144 | return 1; | ||
3145 | /* fall through */ | ||
3146 | default: /* 7 */ | ||
3147 | if (val & 0xffffffff00000000ULL) { | ||
3148 | kvm_inject_gp(vcpu, 0); | ||
3149 | return 1; | ||
3150 | } | ||
3151 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; | ||
3152 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | ||
3153 | vmcs_writel(GUEST_DR7, vcpu->arch.dr7); | ||
3154 | vcpu->arch.switch_db_regs = | ||
3155 | (val & DR7_BP_EN_MASK); | ||
3156 | } | ||
3157 | break; | ||
3158 | } | ||
3159 | } | ||
3160 | skip_emulated_instruction(vcpu); | 3215 | skip_emulated_instruction(vcpu); |
3161 | return 1; | 3216 | return 1; |
3162 | } | 3217 | } |
3163 | 3218 | ||
3219 | static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) | ||
3220 | { | ||
3221 | vmcs_writel(GUEST_DR7, val); | ||
3222 | } | ||
3223 | |||
3164 | static int handle_cpuid(struct kvm_vcpu *vcpu) | 3224 | static int handle_cpuid(struct kvm_vcpu *vcpu) |
3165 | { | 3225 | { |
3166 | kvm_emulate_cpuid(vcpu); | 3226 | kvm_emulate_cpuid(vcpu); |
@@ -3292,6 +3352,8 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
3292 | { | 3352 | { |
3293 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3353 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3294 | unsigned long exit_qualification; | 3354 | unsigned long exit_qualification; |
3355 | bool has_error_code = false; | ||
3356 | u32 error_code = 0; | ||
3295 | u16 tss_selector; | 3357 | u16 tss_selector; |
3296 | int reason, type, idt_v; | 3358 | int reason, type, idt_v; |
3297 | 3359 | ||
@@ -3314,6 +3376,13 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
3314 | kvm_clear_interrupt_queue(vcpu); | 3376 | kvm_clear_interrupt_queue(vcpu); |
3315 | break; | 3377 | break; |
3316 | case INTR_TYPE_HARD_EXCEPTION: | 3378 | case INTR_TYPE_HARD_EXCEPTION: |
3379 | if (vmx->idt_vectoring_info & | ||
3380 | VECTORING_INFO_DELIVER_CODE_MASK) { | ||
3381 | has_error_code = true; | ||
3382 | error_code = | ||
3383 | vmcs_read32(IDT_VECTORING_ERROR_CODE); | ||
3384 | } | ||
3385 | /* fall through */ | ||
3317 | case INTR_TYPE_SOFT_EXCEPTION: | 3386 | case INTR_TYPE_SOFT_EXCEPTION: |
3318 | kvm_clear_exception_queue(vcpu); | 3387 | kvm_clear_exception_queue(vcpu); |
3319 | break; | 3388 | break; |
@@ -3328,8 +3397,13 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
3328 | type != INTR_TYPE_NMI_INTR)) | 3397 | type != INTR_TYPE_NMI_INTR)) |
3329 | skip_emulated_instruction(vcpu); | 3398 | skip_emulated_instruction(vcpu); |
3330 | 3399 | ||
3331 | if (!kvm_task_switch(vcpu, tss_selector, reason)) | 3400 | if (kvm_task_switch(vcpu, tss_selector, reason, |
3401 | has_error_code, error_code) == EMULATE_FAIL) { | ||
3402 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
3403 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
3404 | vcpu->run->internal.ndata = 0; | ||
3332 | return 0; | 3405 | return 0; |
3406 | } | ||
3333 | 3407 | ||
3334 | /* clear all local breakpoint enable flags */ | 3408 | /* clear all local breakpoint enable flags */ |
3335 | vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55); | 3409 | vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55); |
@@ -3574,7 +3648,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
3574 | u32 exit_reason = vmx->exit_reason; | 3648 | u32 exit_reason = vmx->exit_reason; |
3575 | u32 vectoring_info = vmx->idt_vectoring_info; | 3649 | u32 vectoring_info = vmx->idt_vectoring_info; |
3576 | 3650 | ||
3577 | trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); | 3651 | trace_kvm_exit(exit_reason, vcpu); |
3578 | 3652 | ||
3579 | /* If guest state is invalid, start emulating */ | 3653 | /* If guest state is invalid, start emulating */ |
3580 | if (vmx->emulation_required && emulate_invalid_guest_state) | 3654 | if (vmx->emulation_required && emulate_invalid_guest_state) |
@@ -3923,10 +3997,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
3923 | { | 3997 | { |
3924 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3998 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3925 | 3999 | ||
3926 | spin_lock(&vmx_vpid_lock); | 4000 | free_vpid(vmx); |
3927 | if (vmx->vpid != 0) | ||
3928 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); | ||
3929 | spin_unlock(&vmx_vpid_lock); | ||
3930 | vmx_free_vmcs(vcpu); | 4001 | vmx_free_vmcs(vcpu); |
3931 | kfree(vmx->guest_msrs); | 4002 | kfree(vmx->guest_msrs); |
3932 | kvm_vcpu_uninit(vcpu); | 4003 | kvm_vcpu_uninit(vcpu); |
@@ -3988,6 +4059,7 @@ free_msrs: | |||
3988 | uninit_vcpu: | 4059 | uninit_vcpu: |
3989 | kvm_vcpu_uninit(&vmx->vcpu); | 4060 | kvm_vcpu_uninit(&vmx->vcpu); |
3990 | free_vcpu: | 4061 | free_vcpu: |
4062 | free_vpid(vmx); | ||
3991 | kmem_cache_free(kvm_vcpu_cache, vmx); | 4063 | kmem_cache_free(kvm_vcpu_cache, vmx); |
3992 | return ERR_PTR(err); | 4064 | return ERR_PTR(err); |
3993 | } | 4065 | } |
@@ -4118,6 +4190,10 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | |||
4118 | } | 4190 | } |
4119 | } | 4191 | } |
4120 | 4192 | ||
4193 | static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | ||
4194 | { | ||
4195 | } | ||
4196 | |||
4121 | static struct kvm_x86_ops vmx_x86_ops = { | 4197 | static struct kvm_x86_ops vmx_x86_ops = { |
4122 | .cpu_has_kvm_support = cpu_has_kvm_support, | 4198 | .cpu_has_kvm_support = cpu_has_kvm_support, |
4123 | .disabled_by_bios = vmx_disabled_by_bios, | 4199 | .disabled_by_bios = vmx_disabled_by_bios, |
@@ -4154,6 +4230,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4154 | .set_idt = vmx_set_idt, | 4230 | .set_idt = vmx_set_idt, |
4155 | .get_gdt = vmx_get_gdt, | 4231 | .get_gdt = vmx_get_gdt, |
4156 | .set_gdt = vmx_set_gdt, | 4232 | .set_gdt = vmx_set_gdt, |
4233 | .set_dr7 = vmx_set_dr7, | ||
4157 | .cache_reg = vmx_cache_reg, | 4234 | .cache_reg = vmx_cache_reg, |
4158 | .get_rflags = vmx_get_rflags, | 4235 | .get_rflags = vmx_get_rflags, |
4159 | .set_rflags = vmx_set_rflags, | 4236 | .set_rflags = vmx_set_rflags, |
@@ -4189,6 +4266,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4189 | .cpuid_update = vmx_cpuid_update, | 4266 | .cpuid_update = vmx_cpuid_update, |
4190 | 4267 | ||
4191 | .rdtscp_supported = vmx_rdtscp_supported, | 4268 | .rdtscp_supported = vmx_rdtscp_supported, |
4269 | |||
4270 | .set_supported_cpuid = vmx_set_supported_cpuid, | ||
4192 | }; | 4271 | }; |
4193 | 4272 | ||
4194 | static int __init vmx_init(void) | 4273 | static int __init vmx_init(void) |
@@ -4236,7 +4315,8 @@ static int __init vmx_init(void) | |||
4236 | 4315 | ||
4237 | set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ | 4316 | set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ |
4238 | 4317 | ||
4239 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); | 4318 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), |
4319 | __alignof__(struct vcpu_vmx), THIS_MODULE); | ||
4240 | if (r) | 4320 | if (r) |
4241 | goto out3; | 4321 | goto out3; |
4242 | 4322 | ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index dd9bc8fb81ab..05d571f6f196 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -42,7 +42,7 @@ | |||
42 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
43 | #include <linux/perf_event.h> | 43 | #include <linux/perf_event.h> |
44 | #include <trace/events/kvm.h> | 44 | #include <trace/events/kvm.h> |
45 | #undef TRACE_INCLUDE_FILE | 45 | |
46 | #define CREATE_TRACE_POINTS | 46 | #define CREATE_TRACE_POINTS |
47 | #include "trace.h" | 47 | #include "trace.h" |
48 | 48 | ||
@@ -224,34 +224,6 @@ static void drop_user_return_notifiers(void *ignore) | |||
224 | kvm_on_user_return(&smsr->urn); | 224 | kvm_on_user_return(&smsr->urn); |
225 | } | 225 | } |
226 | 226 | ||
227 | unsigned long segment_base(u16 selector) | ||
228 | { | ||
229 | struct descriptor_table gdt; | ||
230 | struct desc_struct *d; | ||
231 | unsigned long table_base; | ||
232 | unsigned long v; | ||
233 | |||
234 | if (selector == 0) | ||
235 | return 0; | ||
236 | |||
237 | kvm_get_gdt(&gdt); | ||
238 | table_base = gdt.base; | ||
239 | |||
240 | if (selector & 4) { /* from ldt */ | ||
241 | u16 ldt_selector = kvm_read_ldt(); | ||
242 | |||
243 | table_base = segment_base(ldt_selector); | ||
244 | } | ||
245 | d = (struct desc_struct *)(table_base + (selector & ~7)); | ||
246 | v = get_desc_base(d); | ||
247 | #ifdef CONFIG_X86_64 | ||
248 | if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) | ||
249 | v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; | ||
250 | #endif | ||
251 | return v; | ||
252 | } | ||
253 | EXPORT_SYMBOL_GPL(segment_base); | ||
254 | |||
255 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) | 227 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) |
256 | { | 228 | { |
257 | if (irqchip_in_kernel(vcpu->kvm)) | 229 | if (irqchip_in_kernel(vcpu->kvm)) |
@@ -293,7 +265,8 @@ static int exception_class(int vector) | |||
293 | } | 265 | } |
294 | 266 | ||
295 | static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | 267 | static void kvm_multiple_exception(struct kvm_vcpu *vcpu, |
296 | unsigned nr, bool has_error, u32 error_code) | 268 | unsigned nr, bool has_error, u32 error_code, |
269 | bool reinject) | ||
297 | { | 270 | { |
298 | u32 prev_nr; | 271 | u32 prev_nr; |
299 | int class1, class2; | 272 | int class1, class2; |
@@ -304,6 +277,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | |||
304 | vcpu->arch.exception.has_error_code = has_error; | 277 | vcpu->arch.exception.has_error_code = has_error; |
305 | vcpu->arch.exception.nr = nr; | 278 | vcpu->arch.exception.nr = nr; |
306 | vcpu->arch.exception.error_code = error_code; | 279 | vcpu->arch.exception.error_code = error_code; |
280 | vcpu->arch.exception.reinject = reinject; | ||
307 | return; | 281 | return; |
308 | } | 282 | } |
309 | 283 | ||
@@ -332,10 +306,16 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | |||
332 | 306 | ||
333 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) | 307 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) |
334 | { | 308 | { |
335 | kvm_multiple_exception(vcpu, nr, false, 0); | 309 | kvm_multiple_exception(vcpu, nr, false, 0, false); |
336 | } | 310 | } |
337 | EXPORT_SYMBOL_GPL(kvm_queue_exception); | 311 | EXPORT_SYMBOL_GPL(kvm_queue_exception); |
338 | 312 | ||
313 | void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr) | ||
314 | { | ||
315 | kvm_multiple_exception(vcpu, nr, false, 0, true); | ||
316 | } | ||
317 | EXPORT_SYMBOL_GPL(kvm_requeue_exception); | ||
318 | |||
339 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, | 319 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, |
340 | u32 error_code) | 320 | u32 error_code) |
341 | { | 321 | { |
@@ -352,10 +332,16 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi); | |||
352 | 332 | ||
353 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) | 333 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) |
354 | { | 334 | { |
355 | kvm_multiple_exception(vcpu, nr, true, error_code); | 335 | kvm_multiple_exception(vcpu, nr, true, error_code, false); |
356 | } | 336 | } |
357 | EXPORT_SYMBOL_GPL(kvm_queue_exception_e); | 337 | EXPORT_SYMBOL_GPL(kvm_queue_exception_e); |
358 | 338 | ||
339 | void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) | ||
340 | { | ||
341 | kvm_multiple_exception(vcpu, nr, true, error_code, true); | ||
342 | } | ||
343 | EXPORT_SYMBOL_GPL(kvm_requeue_exception_e); | ||
344 | |||
359 | /* | 345 | /* |
360 | * Checks if cpl <= required_cpl; if true, return true. Otherwise queue | 346 | * Checks if cpl <= required_cpl; if true, return true. Otherwise queue |
361 | * a #GP and return false. | 347 | * a #GP and return false. |
@@ -476,7 +462,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
476 | } | 462 | } |
477 | 463 | ||
478 | kvm_x86_ops->set_cr0(vcpu, cr0); | 464 | kvm_x86_ops->set_cr0(vcpu, cr0); |
479 | vcpu->arch.cr0 = cr0; | ||
480 | 465 | ||
481 | kvm_mmu_reset_context(vcpu); | 466 | kvm_mmu_reset_context(vcpu); |
482 | return; | 467 | return; |
@@ -485,7 +470,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0); | |||
485 | 470 | ||
486 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) | 471 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) |
487 | { | 472 | { |
488 | kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0ful) | (msw & 0x0f)); | 473 | kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f)); |
489 | } | 474 | } |
490 | EXPORT_SYMBOL_GPL(kvm_lmsw); | 475 | EXPORT_SYMBOL_GPL(kvm_lmsw); |
491 | 476 | ||
@@ -517,7 +502,6 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
517 | } | 502 | } |
518 | kvm_x86_ops->set_cr4(vcpu, cr4); | 503 | kvm_x86_ops->set_cr4(vcpu, cr4); |
519 | vcpu->arch.cr4 = cr4; | 504 | vcpu->arch.cr4 = cr4; |
520 | vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled; | ||
521 | kvm_mmu_reset_context(vcpu); | 505 | kvm_mmu_reset_context(vcpu); |
522 | } | 506 | } |
523 | EXPORT_SYMBOL_GPL(kvm_set_cr4); | 507 | EXPORT_SYMBOL_GPL(kvm_set_cr4); |
@@ -592,6 +576,80 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) | |||
592 | } | 576 | } |
593 | EXPORT_SYMBOL_GPL(kvm_get_cr8); | 577 | EXPORT_SYMBOL_GPL(kvm_get_cr8); |
594 | 578 | ||
579 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | ||
580 | { | ||
581 | switch (dr) { | ||
582 | case 0 ... 3: | ||
583 | vcpu->arch.db[dr] = val; | ||
584 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | ||
585 | vcpu->arch.eff_db[dr] = val; | ||
586 | break; | ||
587 | case 4: | ||
588 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
589 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
590 | return 1; | ||
591 | } | ||
592 | /* fall through */ | ||
593 | case 6: | ||
594 | if (val & 0xffffffff00000000ULL) { | ||
595 | kvm_inject_gp(vcpu, 0); | ||
596 | return 1; | ||
597 | } | ||
598 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | ||
599 | break; | ||
600 | case 5: | ||
601 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
602 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
603 | return 1; | ||
604 | } | ||
605 | /* fall through */ | ||
606 | default: /* 7 */ | ||
607 | if (val & 0xffffffff00000000ULL) { | ||
608 | kvm_inject_gp(vcpu, 0); | ||
609 | return 1; | ||
610 | } | ||
611 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; | ||
612 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | ||
613 | kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7); | ||
614 | vcpu->arch.switch_db_regs = (val & DR7_BP_EN_MASK); | ||
615 | } | ||
616 | break; | ||
617 | } | ||
618 | |||
619 | return 0; | ||
620 | } | ||
621 | EXPORT_SYMBOL_GPL(kvm_set_dr); | ||
622 | |||
623 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) | ||
624 | { | ||
625 | switch (dr) { | ||
626 | case 0 ... 3: | ||
627 | *val = vcpu->arch.db[dr]; | ||
628 | break; | ||
629 | case 4: | ||
630 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
631 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
632 | return 1; | ||
633 | } | ||
634 | /* fall through */ | ||
635 | case 6: | ||
636 | *val = vcpu->arch.dr6; | ||
637 | break; | ||
638 | case 5: | ||
639 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
640 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
641 | return 1; | ||
642 | } | ||
643 | /* fall through */ | ||
644 | default: /* 7 */ | ||
645 | *val = vcpu->arch.dr7; | ||
646 | break; | ||
647 | } | ||
648 | |||
649 | return 0; | ||
650 | } | ||
651 | EXPORT_SYMBOL_GPL(kvm_get_dr); | ||
652 | |||
595 | static inline u32 bit(int bitno) | 653 | static inline u32 bit(int bitno) |
596 | { | 654 | { |
597 | return 1 << (bitno & 31); | 655 | return 1 << (bitno & 31); |
@@ -606,9 +664,10 @@ static inline u32 bit(int bitno) | |||
606 | * kvm-specific. Those are put in the beginning of the list. | 664 | * kvm-specific. Those are put in the beginning of the list. |
607 | */ | 665 | */ |
608 | 666 | ||
609 | #define KVM_SAVE_MSRS_BEGIN 5 | 667 | #define KVM_SAVE_MSRS_BEGIN 7 |
610 | static u32 msrs_to_save[] = { | 668 | static u32 msrs_to_save[] = { |
611 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 669 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, |
670 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, | ||
612 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | 671 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, |
613 | HV_X64_MSR_APIC_ASSIST_PAGE, | 672 | HV_X64_MSR_APIC_ASSIST_PAGE, |
614 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 673 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
@@ -625,48 +684,42 @@ static u32 emulated_msrs[] = { | |||
625 | MSR_IA32_MISC_ENABLE, | 684 | MSR_IA32_MISC_ENABLE, |
626 | }; | 685 | }; |
627 | 686 | ||
628 | static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | 687 | static int set_efer(struct kvm_vcpu *vcpu, u64 efer) |
629 | { | 688 | { |
630 | if (efer & efer_reserved_bits) { | 689 | if (efer & efer_reserved_bits) |
631 | kvm_inject_gp(vcpu, 0); | 690 | return 1; |
632 | return; | ||
633 | } | ||
634 | 691 | ||
635 | if (is_paging(vcpu) | 692 | if (is_paging(vcpu) |
636 | && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { | 693 | && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) |
637 | kvm_inject_gp(vcpu, 0); | 694 | return 1; |
638 | return; | ||
639 | } | ||
640 | 695 | ||
641 | if (efer & EFER_FFXSR) { | 696 | if (efer & EFER_FFXSR) { |
642 | struct kvm_cpuid_entry2 *feat; | 697 | struct kvm_cpuid_entry2 *feat; |
643 | 698 | ||
644 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 699 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); |
645 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { | 700 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) |
646 | kvm_inject_gp(vcpu, 0); | 701 | return 1; |
647 | return; | ||
648 | } | ||
649 | } | 702 | } |
650 | 703 | ||
651 | if (efer & EFER_SVME) { | 704 | if (efer & EFER_SVME) { |
652 | struct kvm_cpuid_entry2 *feat; | 705 | struct kvm_cpuid_entry2 *feat; |
653 | 706 | ||
654 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 707 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); |
655 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { | 708 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) |
656 | kvm_inject_gp(vcpu, 0); | 709 | return 1; |
657 | return; | ||
658 | } | ||
659 | } | 710 | } |
660 | 711 | ||
661 | kvm_x86_ops->set_efer(vcpu, efer); | ||
662 | |||
663 | efer &= ~EFER_LMA; | 712 | efer &= ~EFER_LMA; |
664 | efer |= vcpu->arch.efer & EFER_LMA; | 713 | efer |= vcpu->arch.efer & EFER_LMA; |
665 | 714 | ||
715 | kvm_x86_ops->set_efer(vcpu, efer); | ||
716 | |||
666 | vcpu->arch.efer = efer; | 717 | vcpu->arch.efer = efer; |
667 | 718 | ||
668 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; | 719 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; |
669 | kvm_mmu_reset_context(vcpu); | 720 | kvm_mmu_reset_context(vcpu); |
721 | |||
722 | return 0; | ||
670 | } | 723 | } |
671 | 724 | ||
672 | void kvm_enable_efer_bits(u64 mask) | 725 | void kvm_enable_efer_bits(u64 mask) |
@@ -696,14 +749,22 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) | |||
696 | 749 | ||
697 | static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) | 750 | static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) |
698 | { | 751 | { |
699 | static int version; | 752 | int version; |
753 | int r; | ||
700 | struct pvclock_wall_clock wc; | 754 | struct pvclock_wall_clock wc; |
701 | struct timespec boot; | 755 | struct timespec boot; |
702 | 756 | ||
703 | if (!wall_clock) | 757 | if (!wall_clock) |
704 | return; | 758 | return; |
705 | 759 | ||
706 | version++; | 760 | r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version)); |
761 | if (r) | ||
762 | return; | ||
763 | |||
764 | if (version & 1) | ||
765 | ++version; /* first time write, random junk */ | ||
766 | |||
767 | ++version; | ||
707 | 768 | ||
708 | kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); | 769 | kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); |
709 | 770 | ||
@@ -796,6 +857,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) | |||
796 | vcpu->hv_clock.system_time = ts.tv_nsec + | 857 | vcpu->hv_clock.system_time = ts.tv_nsec + |
797 | (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; | 858 | (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; |
798 | 859 | ||
860 | vcpu->hv_clock.flags = 0; | ||
861 | |||
799 | /* | 862 | /* |
800 | * The interface expects us to write an even number signaling that the | 863 | * The interface expects us to write an even number signaling that the |
801 | * update is finished. Since the guest won't see the intermediate | 864 | * update is finished. Since the guest won't see the intermediate |
@@ -1087,10 +1150,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1087 | { | 1150 | { |
1088 | switch (msr) { | 1151 | switch (msr) { |
1089 | case MSR_EFER: | 1152 | case MSR_EFER: |
1090 | set_efer(vcpu, data); | 1153 | return set_efer(vcpu, data); |
1091 | break; | ||
1092 | case MSR_K7_HWCR: | 1154 | case MSR_K7_HWCR: |
1093 | data &= ~(u64)0x40; /* ignore flush filter disable */ | 1155 | data &= ~(u64)0x40; /* ignore flush filter disable */ |
1156 | data &= ~(u64)0x100; /* ignore ignne emulation enable */ | ||
1094 | if (data != 0) { | 1157 | if (data != 0) { |
1095 | pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", | 1158 | pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", |
1096 | data); | 1159 | data); |
@@ -1133,10 +1196,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1133 | case MSR_IA32_MISC_ENABLE: | 1196 | case MSR_IA32_MISC_ENABLE: |
1134 | vcpu->arch.ia32_misc_enable_msr = data; | 1197 | vcpu->arch.ia32_misc_enable_msr = data; |
1135 | break; | 1198 | break; |
1199 | case MSR_KVM_WALL_CLOCK_NEW: | ||
1136 | case MSR_KVM_WALL_CLOCK: | 1200 | case MSR_KVM_WALL_CLOCK: |
1137 | vcpu->kvm->arch.wall_clock = data; | 1201 | vcpu->kvm->arch.wall_clock = data; |
1138 | kvm_write_wall_clock(vcpu->kvm, data); | 1202 | kvm_write_wall_clock(vcpu->kvm, data); |
1139 | break; | 1203 | break; |
1204 | case MSR_KVM_SYSTEM_TIME_NEW: | ||
1140 | case MSR_KVM_SYSTEM_TIME: { | 1205 | case MSR_KVM_SYSTEM_TIME: { |
1141 | if (vcpu->arch.time_page) { | 1206 | if (vcpu->arch.time_page) { |
1142 | kvm_release_page_dirty(vcpu->arch.time_page); | 1207 | kvm_release_page_dirty(vcpu->arch.time_page); |
@@ -1408,9 +1473,11 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1408 | data = vcpu->arch.efer; | 1473 | data = vcpu->arch.efer; |
1409 | break; | 1474 | break; |
1410 | case MSR_KVM_WALL_CLOCK: | 1475 | case MSR_KVM_WALL_CLOCK: |
1476 | case MSR_KVM_WALL_CLOCK_NEW: | ||
1411 | data = vcpu->kvm->arch.wall_clock; | 1477 | data = vcpu->kvm->arch.wall_clock; |
1412 | break; | 1478 | break; |
1413 | case MSR_KVM_SYSTEM_TIME: | 1479 | case MSR_KVM_SYSTEM_TIME: |
1480 | case MSR_KVM_SYSTEM_TIME_NEW: | ||
1414 | data = vcpu->arch.time; | 1481 | data = vcpu->arch.time; |
1415 | break; | 1482 | break; |
1416 | case MSR_IA32_P5_MC_ADDR: | 1483 | case MSR_IA32_P5_MC_ADDR: |
@@ -1549,6 +1616,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1549 | case KVM_CAP_HYPERV_VAPIC: | 1616 | case KVM_CAP_HYPERV_VAPIC: |
1550 | case KVM_CAP_HYPERV_SPIN: | 1617 | case KVM_CAP_HYPERV_SPIN: |
1551 | case KVM_CAP_PCI_SEGMENT: | 1618 | case KVM_CAP_PCI_SEGMENT: |
1619 | case KVM_CAP_DEBUGREGS: | ||
1552 | case KVM_CAP_X86_ROBUST_SINGLESTEP: | 1620 | case KVM_CAP_X86_ROBUST_SINGLESTEP: |
1553 | r = 1; | 1621 | r = 1; |
1554 | break; | 1622 | break; |
@@ -1769,6 +1837,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | |||
1769 | { | 1837 | { |
1770 | int r; | 1838 | int r; |
1771 | 1839 | ||
1840 | vcpu_load(vcpu); | ||
1772 | r = -E2BIG; | 1841 | r = -E2BIG; |
1773 | if (cpuid->nent < vcpu->arch.cpuid_nent) | 1842 | if (cpuid->nent < vcpu->arch.cpuid_nent) |
1774 | goto out; | 1843 | goto out; |
@@ -1780,6 +1849,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | |||
1780 | 1849 | ||
1781 | out: | 1850 | out: |
1782 | cpuid->nent = vcpu->arch.cpuid_nent; | 1851 | cpuid->nent = vcpu->arch.cpuid_nent; |
1852 | vcpu_put(vcpu); | ||
1783 | return r; | 1853 | return r; |
1784 | } | 1854 | } |
1785 | 1855 | ||
@@ -1910,6 +1980,24 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1910 | } | 1980 | } |
1911 | break; | 1981 | break; |
1912 | } | 1982 | } |
1983 | case KVM_CPUID_SIGNATURE: { | ||
1984 | char signature[12] = "KVMKVMKVM\0\0"; | ||
1985 | u32 *sigptr = (u32 *)signature; | ||
1986 | entry->eax = 0; | ||
1987 | entry->ebx = sigptr[0]; | ||
1988 | entry->ecx = sigptr[1]; | ||
1989 | entry->edx = sigptr[2]; | ||
1990 | break; | ||
1991 | } | ||
1992 | case KVM_CPUID_FEATURES: | ||
1993 | entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | | ||
1994 | (1 << KVM_FEATURE_NOP_IO_DELAY) | | ||
1995 | (1 << KVM_FEATURE_CLOCKSOURCE2) | | ||
1996 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); | ||
1997 | entry->ebx = 0; | ||
1998 | entry->ecx = 0; | ||
1999 | entry->edx = 0; | ||
2000 | break; | ||
1913 | case 0x80000000: | 2001 | case 0x80000000: |
1914 | entry->eax = min(entry->eax, 0x8000001a); | 2002 | entry->eax = min(entry->eax, 0x8000001a); |
1915 | break; | 2003 | break; |
@@ -1918,6 +2006,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1918 | entry->ecx &= kvm_supported_word6_x86_features; | 2006 | entry->ecx &= kvm_supported_word6_x86_features; |
1919 | break; | 2007 | break; |
1920 | } | 2008 | } |
2009 | |||
2010 | kvm_x86_ops->set_supported_cpuid(function, entry); | ||
2011 | |||
1921 | put_cpu(); | 2012 | put_cpu(); |
1922 | } | 2013 | } |
1923 | 2014 | ||
@@ -1953,6 +2044,23 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | |||
1953 | for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) | 2044 | for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) |
1954 | do_cpuid_ent(&cpuid_entries[nent], func, 0, | 2045 | do_cpuid_ent(&cpuid_entries[nent], func, 0, |
1955 | &nent, cpuid->nent); | 2046 | &nent, cpuid->nent); |
2047 | |||
2048 | |||
2049 | |||
2050 | r = -E2BIG; | ||
2051 | if (nent >= cpuid->nent) | ||
2052 | goto out_free; | ||
2053 | |||
2054 | do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent, | ||
2055 | cpuid->nent); | ||
2056 | |||
2057 | r = -E2BIG; | ||
2058 | if (nent >= cpuid->nent) | ||
2059 | goto out_free; | ||
2060 | |||
2061 | do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_FEATURES, 0, &nent, | ||
2062 | cpuid->nent); | ||
2063 | |||
1956 | r = -E2BIG; | 2064 | r = -E2BIG; |
1957 | if (nent >= cpuid->nent) | 2065 | if (nent >= cpuid->nent) |
1958 | goto out_free; | 2066 | goto out_free; |
@@ -2032,6 +2140,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, | |||
2032 | int r; | 2140 | int r; |
2033 | unsigned bank_num = mcg_cap & 0xff, bank; | 2141 | unsigned bank_num = mcg_cap & 0xff, bank; |
2034 | 2142 | ||
2143 | vcpu_load(vcpu); | ||
2035 | r = -EINVAL; | 2144 | r = -EINVAL; |
2036 | if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) | 2145 | if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) |
2037 | goto out; | 2146 | goto out; |
@@ -2046,6 +2155,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, | |||
2046 | for (bank = 0; bank < bank_num; bank++) | 2155 | for (bank = 0; bank < bank_num; bank++) |
2047 | vcpu->arch.mce_banks[bank*4] = ~(u64)0; | 2156 | vcpu->arch.mce_banks[bank*4] = ~(u64)0; |
2048 | out: | 2157 | out: |
2158 | vcpu_put(vcpu); | ||
2049 | return r; | 2159 | return r; |
2050 | } | 2160 | } |
2051 | 2161 | ||
@@ -2105,14 +2215,20 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
2105 | { | 2215 | { |
2106 | vcpu_load(vcpu); | 2216 | vcpu_load(vcpu); |
2107 | 2217 | ||
2108 | events->exception.injected = vcpu->arch.exception.pending; | 2218 | events->exception.injected = |
2219 | vcpu->arch.exception.pending && | ||
2220 | !kvm_exception_is_soft(vcpu->arch.exception.nr); | ||
2109 | events->exception.nr = vcpu->arch.exception.nr; | 2221 | events->exception.nr = vcpu->arch.exception.nr; |
2110 | events->exception.has_error_code = vcpu->arch.exception.has_error_code; | 2222 | events->exception.has_error_code = vcpu->arch.exception.has_error_code; |
2111 | events->exception.error_code = vcpu->arch.exception.error_code; | 2223 | events->exception.error_code = vcpu->arch.exception.error_code; |
2112 | 2224 | ||
2113 | events->interrupt.injected = vcpu->arch.interrupt.pending; | 2225 | events->interrupt.injected = |
2226 | vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft; | ||
2114 | events->interrupt.nr = vcpu->arch.interrupt.nr; | 2227 | events->interrupt.nr = vcpu->arch.interrupt.nr; |
2115 | events->interrupt.soft = vcpu->arch.interrupt.soft; | 2228 | events->interrupt.soft = 0; |
2229 | events->interrupt.shadow = | ||
2230 | kvm_x86_ops->get_interrupt_shadow(vcpu, | ||
2231 | KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI); | ||
2116 | 2232 | ||
2117 | events->nmi.injected = vcpu->arch.nmi_injected; | 2233 | events->nmi.injected = vcpu->arch.nmi_injected; |
2118 | events->nmi.pending = vcpu->arch.nmi_pending; | 2234 | events->nmi.pending = vcpu->arch.nmi_pending; |
@@ -2121,7 +2237,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
2121 | events->sipi_vector = vcpu->arch.sipi_vector; | 2237 | events->sipi_vector = vcpu->arch.sipi_vector; |
2122 | 2238 | ||
2123 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING | 2239 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING |
2124 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR); | 2240 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR |
2241 | | KVM_VCPUEVENT_VALID_SHADOW); | ||
2125 | 2242 | ||
2126 | vcpu_put(vcpu); | 2243 | vcpu_put(vcpu); |
2127 | } | 2244 | } |
@@ -2130,7 +2247,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2130 | struct kvm_vcpu_events *events) | 2247 | struct kvm_vcpu_events *events) |
2131 | { | 2248 | { |
2132 | if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING | 2249 | if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING |
2133 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR)) | 2250 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR |
2251 | | KVM_VCPUEVENT_VALID_SHADOW)) | ||
2134 | return -EINVAL; | 2252 | return -EINVAL; |
2135 | 2253 | ||
2136 | vcpu_load(vcpu); | 2254 | vcpu_load(vcpu); |
@@ -2145,6 +2263,9 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2145 | vcpu->arch.interrupt.soft = events->interrupt.soft; | 2263 | vcpu->arch.interrupt.soft = events->interrupt.soft; |
2146 | if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) | 2264 | if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) |
2147 | kvm_pic_clear_isr_ack(vcpu->kvm); | 2265 | kvm_pic_clear_isr_ack(vcpu->kvm); |
2266 | if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) | ||
2267 | kvm_x86_ops->set_interrupt_shadow(vcpu, | ||
2268 | events->interrupt.shadow); | ||
2148 | 2269 | ||
2149 | vcpu->arch.nmi_injected = events->nmi.injected; | 2270 | vcpu->arch.nmi_injected = events->nmi.injected; |
2150 | if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) | 2271 | if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) |
@@ -2159,6 +2280,36 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2159 | return 0; | 2280 | return 0; |
2160 | } | 2281 | } |
2161 | 2282 | ||
2283 | static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, | ||
2284 | struct kvm_debugregs *dbgregs) | ||
2285 | { | ||
2286 | vcpu_load(vcpu); | ||
2287 | |||
2288 | memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); | ||
2289 | dbgregs->dr6 = vcpu->arch.dr6; | ||
2290 | dbgregs->dr7 = vcpu->arch.dr7; | ||
2291 | dbgregs->flags = 0; | ||
2292 | |||
2293 | vcpu_put(vcpu); | ||
2294 | } | ||
2295 | |||
2296 | static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, | ||
2297 | struct kvm_debugregs *dbgregs) | ||
2298 | { | ||
2299 | if (dbgregs->flags) | ||
2300 | return -EINVAL; | ||
2301 | |||
2302 | vcpu_load(vcpu); | ||
2303 | |||
2304 | memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); | ||
2305 | vcpu->arch.dr6 = dbgregs->dr6; | ||
2306 | vcpu->arch.dr7 = dbgregs->dr7; | ||
2307 | |||
2308 | vcpu_put(vcpu); | ||
2309 | |||
2310 | return 0; | ||
2311 | } | ||
2312 | |||
2162 | long kvm_arch_vcpu_ioctl(struct file *filp, | 2313 | long kvm_arch_vcpu_ioctl(struct file *filp, |
2163 | unsigned int ioctl, unsigned long arg) | 2314 | unsigned int ioctl, unsigned long arg) |
2164 | { | 2315 | { |
@@ -2313,7 +2464,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
2313 | r = -EFAULT; | 2464 | r = -EFAULT; |
2314 | if (copy_from_user(&mce, argp, sizeof mce)) | 2465 | if (copy_from_user(&mce, argp, sizeof mce)) |
2315 | goto out; | 2466 | goto out; |
2467 | vcpu_load(vcpu); | ||
2316 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); | 2468 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); |
2469 | vcpu_put(vcpu); | ||
2317 | break; | 2470 | break; |
2318 | } | 2471 | } |
2319 | case KVM_GET_VCPU_EVENTS: { | 2472 | case KVM_GET_VCPU_EVENTS: { |
@@ -2337,6 +2490,29 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
2337 | r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); | 2490 | r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); |
2338 | break; | 2491 | break; |
2339 | } | 2492 | } |
2493 | case KVM_GET_DEBUGREGS: { | ||
2494 | struct kvm_debugregs dbgregs; | ||
2495 | |||
2496 | kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs); | ||
2497 | |||
2498 | r = -EFAULT; | ||
2499 | if (copy_to_user(argp, &dbgregs, | ||
2500 | sizeof(struct kvm_debugregs))) | ||
2501 | break; | ||
2502 | r = 0; | ||
2503 | break; | ||
2504 | } | ||
2505 | case KVM_SET_DEBUGREGS: { | ||
2506 | struct kvm_debugregs dbgregs; | ||
2507 | |||
2508 | r = -EFAULT; | ||
2509 | if (copy_from_user(&dbgregs, argp, | ||
2510 | sizeof(struct kvm_debugregs))) | ||
2511 | break; | ||
2512 | |||
2513 | r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs); | ||
2514 | break; | ||
2515 | } | ||
2340 | default: | 2516 | default: |
2341 | r = -EINVAL; | 2517 | r = -EINVAL; |
2342 | } | 2518 | } |
@@ -2390,7 +2566,7 @@ gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn) | |||
2390 | struct kvm_mem_alias *alias; | 2566 | struct kvm_mem_alias *alias; |
2391 | struct kvm_mem_aliases *aliases; | 2567 | struct kvm_mem_aliases *aliases; |
2392 | 2568 | ||
2393 | aliases = rcu_dereference(kvm->arch.aliases); | 2569 | aliases = kvm_aliases(kvm); |
2394 | 2570 | ||
2395 | for (i = 0; i < aliases->naliases; ++i) { | 2571 | for (i = 0; i < aliases->naliases; ++i) { |
2396 | alias = &aliases->aliases[i]; | 2572 | alias = &aliases->aliases[i]; |
@@ -2409,7 +2585,7 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | |||
2409 | struct kvm_mem_alias *alias; | 2585 | struct kvm_mem_alias *alias; |
2410 | struct kvm_mem_aliases *aliases; | 2586 | struct kvm_mem_aliases *aliases; |
2411 | 2587 | ||
2412 | aliases = rcu_dereference(kvm->arch.aliases); | 2588 | aliases = kvm_aliases(kvm); |
2413 | 2589 | ||
2414 | for (i = 0; i < aliases->naliases; ++i) { | 2590 | for (i = 0; i < aliases->naliases; ++i) { |
2415 | alias = &aliases->aliases[i]; | 2591 | alias = &aliases->aliases[i]; |
@@ -2804,11 +2980,13 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2804 | r = -EFAULT; | 2980 | r = -EFAULT; |
2805 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) | 2981 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) |
2806 | goto out; | 2982 | goto out; |
2983 | r = -ENXIO; | ||
2807 | if (irqchip_in_kernel(kvm)) { | 2984 | if (irqchip_in_kernel(kvm)) { |
2808 | __s32 status; | 2985 | __s32 status; |
2809 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, | 2986 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, |
2810 | irq_event.irq, irq_event.level); | 2987 | irq_event.irq, irq_event.level); |
2811 | if (ioctl == KVM_IRQ_LINE_STATUS) { | 2988 | if (ioctl == KVM_IRQ_LINE_STATUS) { |
2989 | r = -EFAULT; | ||
2812 | irq_event.status = status; | 2990 | irq_event.status = status; |
2813 | if (copy_to_user(argp, &irq_event, | 2991 | if (copy_to_user(argp, &irq_event, |
2814 | sizeof irq_event)) | 2992 | sizeof irq_event)) |
@@ -3024,6 +3202,18 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) | |||
3024 | return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); | 3202 | return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); |
3025 | } | 3203 | } |
3026 | 3204 | ||
3205 | static void kvm_set_segment(struct kvm_vcpu *vcpu, | ||
3206 | struct kvm_segment *var, int seg) | ||
3207 | { | ||
3208 | kvm_x86_ops->set_segment(vcpu, var, seg); | ||
3209 | } | ||
3210 | |||
3211 | void kvm_get_segment(struct kvm_vcpu *vcpu, | ||
3212 | struct kvm_segment *var, int seg) | ||
3213 | { | ||
3214 | kvm_x86_ops->get_segment(vcpu, var, seg); | ||
3215 | } | ||
3216 | |||
3027 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | 3217 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) |
3028 | { | 3218 | { |
3029 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3219 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
@@ -3104,14 +3294,17 @@ static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, | |||
3104 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); | 3294 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); |
3105 | } | 3295 | } |
3106 | 3296 | ||
3107 | static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3297 | static int kvm_write_guest_virt_system(gva_t addr, void *val, |
3108 | struct kvm_vcpu *vcpu, u32 *error) | 3298 | unsigned int bytes, |
3299 | struct kvm_vcpu *vcpu, | ||
3300 | u32 *error) | ||
3109 | { | 3301 | { |
3110 | void *data = val; | 3302 | void *data = val; |
3111 | int r = X86EMUL_CONTINUE; | 3303 | int r = X86EMUL_CONTINUE; |
3112 | 3304 | ||
3113 | while (bytes) { | 3305 | while (bytes) { |
3114 | gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error); | 3306 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, |
3307 | PFERR_WRITE_MASK, error); | ||
3115 | unsigned offset = addr & (PAGE_SIZE-1); | 3308 | unsigned offset = addr & (PAGE_SIZE-1); |
3116 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); | 3309 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); |
3117 | int ret; | 3310 | int ret; |
@@ -3134,7 +3327,6 @@ out: | |||
3134 | return r; | 3327 | return r; |
3135 | } | 3328 | } |
3136 | 3329 | ||
3137 | |||
3138 | static int emulator_read_emulated(unsigned long addr, | 3330 | static int emulator_read_emulated(unsigned long addr, |
3139 | void *val, | 3331 | void *val, |
3140 | unsigned int bytes, | 3332 | unsigned int bytes, |
@@ -3237,9 +3429,9 @@ mmio: | |||
3237 | } | 3429 | } |
3238 | 3430 | ||
3239 | int emulator_write_emulated(unsigned long addr, | 3431 | int emulator_write_emulated(unsigned long addr, |
3240 | const void *val, | 3432 | const void *val, |
3241 | unsigned int bytes, | 3433 | unsigned int bytes, |
3242 | struct kvm_vcpu *vcpu) | 3434 | struct kvm_vcpu *vcpu) |
3243 | { | 3435 | { |
3244 | /* Crossing a page boundary? */ | 3436 | /* Crossing a page boundary? */ |
3245 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { | 3437 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { |
@@ -3257,45 +3449,150 @@ int emulator_write_emulated(unsigned long addr, | |||
3257 | } | 3449 | } |
3258 | EXPORT_SYMBOL_GPL(emulator_write_emulated); | 3450 | EXPORT_SYMBOL_GPL(emulator_write_emulated); |
3259 | 3451 | ||
3452 | #define CMPXCHG_TYPE(t, ptr, old, new) \ | ||
3453 | (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) | ||
3454 | |||
3455 | #ifdef CONFIG_X86_64 | ||
3456 | # define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new) | ||
3457 | #else | ||
3458 | # define CMPXCHG64(ptr, old, new) \ | ||
3459 | (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) | ||
3460 | #endif | ||
3461 | |||
3260 | static int emulator_cmpxchg_emulated(unsigned long addr, | 3462 | static int emulator_cmpxchg_emulated(unsigned long addr, |
3261 | const void *old, | 3463 | const void *old, |
3262 | const void *new, | 3464 | const void *new, |
3263 | unsigned int bytes, | 3465 | unsigned int bytes, |
3264 | struct kvm_vcpu *vcpu) | 3466 | struct kvm_vcpu *vcpu) |
3265 | { | 3467 | { |
3266 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); | 3468 | gpa_t gpa; |
3267 | #ifndef CONFIG_X86_64 | 3469 | struct page *page; |
3268 | /* guests cmpxchg8b have to be emulated atomically */ | 3470 | char *kaddr; |
3269 | if (bytes == 8) { | 3471 | bool exchanged; |
3270 | gpa_t gpa; | ||
3271 | struct page *page; | ||
3272 | char *kaddr; | ||
3273 | u64 val; | ||
3274 | 3472 | ||
3275 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); | 3473 | /* guests cmpxchg8b have to be emulated atomically */ |
3474 | if (bytes > 8 || (bytes & (bytes - 1))) | ||
3475 | goto emul_write; | ||
3276 | 3476 | ||
3277 | if (gpa == UNMAPPED_GVA || | 3477 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); |
3278 | (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | ||
3279 | goto emul_write; | ||
3280 | 3478 | ||
3281 | if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) | 3479 | if (gpa == UNMAPPED_GVA || |
3282 | goto emul_write; | 3480 | (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
3481 | goto emul_write; | ||
3283 | 3482 | ||
3284 | val = *(u64 *)new; | 3483 | if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) |
3484 | goto emul_write; | ||
3285 | 3485 | ||
3286 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 3486 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
3287 | 3487 | ||
3288 | kaddr = kmap_atomic(page, KM_USER0); | 3488 | kaddr = kmap_atomic(page, KM_USER0); |
3289 | set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); | 3489 | kaddr += offset_in_page(gpa); |
3290 | kunmap_atomic(kaddr, KM_USER0); | 3490 | switch (bytes) { |
3291 | kvm_release_page_dirty(page); | 3491 | case 1: |
3492 | exchanged = CMPXCHG_TYPE(u8, kaddr, old, new); | ||
3493 | break; | ||
3494 | case 2: | ||
3495 | exchanged = CMPXCHG_TYPE(u16, kaddr, old, new); | ||
3496 | break; | ||
3497 | case 4: | ||
3498 | exchanged = CMPXCHG_TYPE(u32, kaddr, old, new); | ||
3499 | break; | ||
3500 | case 8: | ||
3501 | exchanged = CMPXCHG64(kaddr, old, new); | ||
3502 | break; | ||
3503 | default: | ||
3504 | BUG(); | ||
3292 | } | 3505 | } |
3506 | kunmap_atomic(kaddr, KM_USER0); | ||
3507 | kvm_release_page_dirty(page); | ||
3508 | |||
3509 | if (!exchanged) | ||
3510 | return X86EMUL_CMPXCHG_FAILED; | ||
3511 | |||
3512 | kvm_mmu_pte_write(vcpu, gpa, new, bytes, 1); | ||
3513 | |||
3514 | return X86EMUL_CONTINUE; | ||
3515 | |||
3293 | emul_write: | 3516 | emul_write: |
3294 | #endif | 3517 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); |
3295 | 3518 | ||
3296 | return emulator_write_emulated(addr, new, bytes, vcpu); | 3519 | return emulator_write_emulated(addr, new, bytes, vcpu); |
3297 | } | 3520 | } |
3298 | 3521 | ||
3522 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | ||
3523 | { | ||
3524 | /* TODO: String I/O for in kernel device */ | ||
3525 | int r; | ||
3526 | |||
3527 | if (vcpu->arch.pio.in) | ||
3528 | r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, | ||
3529 | vcpu->arch.pio.size, pd); | ||
3530 | else | ||
3531 | r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, | ||
3532 | vcpu->arch.pio.port, vcpu->arch.pio.size, | ||
3533 | pd); | ||
3534 | return r; | ||
3535 | } | ||
3536 | |||
3537 | |||
3538 | static int emulator_pio_in_emulated(int size, unsigned short port, void *val, | ||
3539 | unsigned int count, struct kvm_vcpu *vcpu) | ||
3540 | { | ||
3541 | if (vcpu->arch.pio.count) | ||
3542 | goto data_avail; | ||
3543 | |||
3544 | trace_kvm_pio(1, port, size, 1); | ||
3545 | |||
3546 | vcpu->arch.pio.port = port; | ||
3547 | vcpu->arch.pio.in = 1; | ||
3548 | vcpu->arch.pio.count = count; | ||
3549 | vcpu->arch.pio.size = size; | ||
3550 | |||
3551 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | ||
3552 | data_avail: | ||
3553 | memcpy(val, vcpu->arch.pio_data, size * count); | ||
3554 | vcpu->arch.pio.count = 0; | ||
3555 | return 1; | ||
3556 | } | ||
3557 | |||
3558 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
3559 | vcpu->run->io.direction = KVM_EXIT_IO_IN; | ||
3560 | vcpu->run->io.size = size; | ||
3561 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
3562 | vcpu->run->io.count = count; | ||
3563 | vcpu->run->io.port = port; | ||
3564 | |||
3565 | return 0; | ||
3566 | } | ||
3567 | |||
3568 | static int emulator_pio_out_emulated(int size, unsigned short port, | ||
3569 | const void *val, unsigned int count, | ||
3570 | struct kvm_vcpu *vcpu) | ||
3571 | { | ||
3572 | trace_kvm_pio(0, port, size, 1); | ||
3573 | |||
3574 | vcpu->arch.pio.port = port; | ||
3575 | vcpu->arch.pio.in = 0; | ||
3576 | vcpu->arch.pio.count = count; | ||
3577 | vcpu->arch.pio.size = size; | ||
3578 | |||
3579 | memcpy(vcpu->arch.pio_data, val, size * count); | ||
3580 | |||
3581 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | ||
3582 | vcpu->arch.pio.count = 0; | ||
3583 | return 1; | ||
3584 | } | ||
3585 | |||
3586 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
3587 | vcpu->run->io.direction = KVM_EXIT_IO_OUT; | ||
3588 | vcpu->run->io.size = size; | ||
3589 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
3590 | vcpu->run->io.count = count; | ||
3591 | vcpu->run->io.port = port; | ||
3592 | |||
3593 | return 0; | ||
3594 | } | ||
3595 | |||
3299 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) | 3596 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) |
3300 | { | 3597 | { |
3301 | return kvm_x86_ops->get_segment_base(vcpu, seg); | 3598 | return kvm_x86_ops->get_segment_base(vcpu, seg); |
@@ -3316,14 +3613,14 @@ int emulate_clts(struct kvm_vcpu *vcpu) | |||
3316 | 3613 | ||
3317 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) | 3614 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) |
3318 | { | 3615 | { |
3319 | return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest); | 3616 | return kvm_get_dr(ctxt->vcpu, dr, dest); |
3320 | } | 3617 | } |
3321 | 3618 | ||
3322 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) | 3619 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) |
3323 | { | 3620 | { |
3324 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; | 3621 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; |
3325 | 3622 | ||
3326 | return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask); | 3623 | return kvm_set_dr(ctxt->vcpu, dr, value & mask); |
3327 | } | 3624 | } |
3328 | 3625 | ||
3329 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | 3626 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) |
@@ -3344,12 +3641,167 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | |||
3344 | } | 3641 | } |
3345 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); | 3642 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); |
3346 | 3643 | ||
3644 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) | ||
3645 | { | ||
3646 | return (curr_cr & ~((1ULL << 32) - 1)) | new_val; | ||
3647 | } | ||
3648 | |||
3649 | static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) | ||
3650 | { | ||
3651 | unsigned long value; | ||
3652 | |||
3653 | switch (cr) { | ||
3654 | case 0: | ||
3655 | value = kvm_read_cr0(vcpu); | ||
3656 | break; | ||
3657 | case 2: | ||
3658 | value = vcpu->arch.cr2; | ||
3659 | break; | ||
3660 | case 3: | ||
3661 | value = vcpu->arch.cr3; | ||
3662 | break; | ||
3663 | case 4: | ||
3664 | value = kvm_read_cr4(vcpu); | ||
3665 | break; | ||
3666 | case 8: | ||
3667 | value = kvm_get_cr8(vcpu); | ||
3668 | break; | ||
3669 | default: | ||
3670 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
3671 | return 0; | ||
3672 | } | ||
3673 | |||
3674 | return value; | ||
3675 | } | ||
3676 | |||
3677 | static void emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) | ||
3678 | { | ||
3679 | switch (cr) { | ||
3680 | case 0: | ||
3681 | kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); | ||
3682 | break; | ||
3683 | case 2: | ||
3684 | vcpu->arch.cr2 = val; | ||
3685 | break; | ||
3686 | case 3: | ||
3687 | kvm_set_cr3(vcpu, val); | ||
3688 | break; | ||
3689 | case 4: | ||
3690 | kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); | ||
3691 | break; | ||
3692 | case 8: | ||
3693 | kvm_set_cr8(vcpu, val & 0xfUL); | ||
3694 | break; | ||
3695 | default: | ||
3696 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
3697 | } | ||
3698 | } | ||
3699 | |||
3700 | static int emulator_get_cpl(struct kvm_vcpu *vcpu) | ||
3701 | { | ||
3702 | return kvm_x86_ops->get_cpl(vcpu); | ||
3703 | } | ||
3704 | |||
3705 | static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) | ||
3706 | { | ||
3707 | kvm_x86_ops->get_gdt(vcpu, dt); | ||
3708 | } | ||
3709 | |||
3710 | static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, | ||
3711 | struct kvm_vcpu *vcpu) | ||
3712 | { | ||
3713 | struct kvm_segment var; | ||
3714 | |||
3715 | kvm_get_segment(vcpu, &var, seg); | ||
3716 | |||
3717 | if (var.unusable) | ||
3718 | return false; | ||
3719 | |||
3720 | if (var.g) | ||
3721 | var.limit >>= 12; | ||
3722 | set_desc_limit(desc, var.limit); | ||
3723 | set_desc_base(desc, (unsigned long)var.base); | ||
3724 | desc->type = var.type; | ||
3725 | desc->s = var.s; | ||
3726 | desc->dpl = var.dpl; | ||
3727 | desc->p = var.present; | ||
3728 | desc->avl = var.avl; | ||
3729 | desc->l = var.l; | ||
3730 | desc->d = var.db; | ||
3731 | desc->g = var.g; | ||
3732 | |||
3733 | return true; | ||
3734 | } | ||
3735 | |||
3736 | static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg, | ||
3737 | struct kvm_vcpu *vcpu) | ||
3738 | { | ||
3739 | struct kvm_segment var; | ||
3740 | |||
3741 | /* needed to preserve selector */ | ||
3742 | kvm_get_segment(vcpu, &var, seg); | ||
3743 | |||
3744 | var.base = get_desc_base(desc); | ||
3745 | var.limit = get_desc_limit(desc); | ||
3746 | if (desc->g) | ||
3747 | var.limit = (var.limit << 12) | 0xfff; | ||
3748 | var.type = desc->type; | ||
3749 | var.present = desc->p; | ||
3750 | var.dpl = desc->dpl; | ||
3751 | var.db = desc->d; | ||
3752 | var.s = desc->s; | ||
3753 | var.l = desc->l; | ||
3754 | var.g = desc->g; | ||
3755 | var.avl = desc->avl; | ||
3756 | var.present = desc->p; | ||
3757 | var.unusable = !var.present; | ||
3758 | var.padding = 0; | ||
3759 | |||
3760 | kvm_set_segment(vcpu, &var, seg); | ||
3761 | return; | ||
3762 | } | ||
3763 | |||
3764 | static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu) | ||
3765 | { | ||
3766 | struct kvm_segment kvm_seg; | ||
3767 | |||
3768 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
3769 | return kvm_seg.selector; | ||
3770 | } | ||
3771 | |||
3772 | static void emulator_set_segment_selector(u16 sel, int seg, | ||
3773 | struct kvm_vcpu *vcpu) | ||
3774 | { | ||
3775 | struct kvm_segment kvm_seg; | ||
3776 | |||
3777 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
3778 | kvm_seg.selector = sel; | ||
3779 | kvm_set_segment(vcpu, &kvm_seg, seg); | ||
3780 | } | ||
3781 | |||
3782 | static void emulator_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | ||
3783 | { | ||
3784 | kvm_x86_ops->set_rflags(vcpu, rflags); | ||
3785 | } | ||
3786 | |||
3347 | static struct x86_emulate_ops emulate_ops = { | 3787 | static struct x86_emulate_ops emulate_ops = { |
3348 | .read_std = kvm_read_guest_virt_system, | 3788 | .read_std = kvm_read_guest_virt_system, |
3789 | .write_std = kvm_write_guest_virt_system, | ||
3349 | .fetch = kvm_fetch_guest_virt, | 3790 | .fetch = kvm_fetch_guest_virt, |
3350 | .read_emulated = emulator_read_emulated, | 3791 | .read_emulated = emulator_read_emulated, |
3351 | .write_emulated = emulator_write_emulated, | 3792 | .write_emulated = emulator_write_emulated, |
3352 | .cmpxchg_emulated = emulator_cmpxchg_emulated, | 3793 | .cmpxchg_emulated = emulator_cmpxchg_emulated, |
3794 | .pio_in_emulated = emulator_pio_in_emulated, | ||
3795 | .pio_out_emulated = emulator_pio_out_emulated, | ||
3796 | .get_cached_descriptor = emulator_get_cached_descriptor, | ||
3797 | .set_cached_descriptor = emulator_set_cached_descriptor, | ||
3798 | .get_segment_selector = emulator_get_segment_selector, | ||
3799 | .set_segment_selector = emulator_set_segment_selector, | ||
3800 | .get_gdt = emulator_get_gdt, | ||
3801 | .get_cr = emulator_get_cr, | ||
3802 | .set_cr = emulator_set_cr, | ||
3803 | .cpl = emulator_get_cpl, | ||
3804 | .set_rflags = emulator_set_rflags, | ||
3353 | }; | 3805 | }; |
3354 | 3806 | ||
3355 | static void cache_all_regs(struct kvm_vcpu *vcpu) | 3807 | static void cache_all_regs(struct kvm_vcpu *vcpu) |
@@ -3380,14 +3832,14 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3380 | cache_all_regs(vcpu); | 3832 | cache_all_regs(vcpu); |
3381 | 3833 | ||
3382 | vcpu->mmio_is_write = 0; | 3834 | vcpu->mmio_is_write = 0; |
3383 | vcpu->arch.pio.string = 0; | ||
3384 | 3835 | ||
3385 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { | 3836 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { |
3386 | int cs_db, cs_l; | 3837 | int cs_db, cs_l; |
3387 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 3838 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
3388 | 3839 | ||
3389 | vcpu->arch.emulate_ctxt.vcpu = vcpu; | 3840 | vcpu->arch.emulate_ctxt.vcpu = vcpu; |
3390 | vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); | 3841 | vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); |
3842 | vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); | ||
3391 | vcpu->arch.emulate_ctxt.mode = | 3843 | vcpu->arch.emulate_ctxt.mode = |
3392 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : | 3844 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : |
3393 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) | 3845 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) |
@@ -3396,6 +3848,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3396 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | 3848 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; |
3397 | 3849 | ||
3398 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); | 3850 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); |
3851 | trace_kvm_emulate_insn_start(vcpu); | ||
3399 | 3852 | ||
3400 | /* Only allow emulation of specific instructions on #UD | 3853 | /* Only allow emulation of specific instructions on #UD |
3401 | * (namely VMMCALL, sysenter, sysexit, syscall)*/ | 3854 | * (namely VMMCALL, sysenter, sysexit, syscall)*/ |
@@ -3428,6 +3881,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3428 | ++vcpu->stat.insn_emulation; | 3881 | ++vcpu->stat.insn_emulation; |
3429 | if (r) { | 3882 | if (r) { |
3430 | ++vcpu->stat.insn_emulation_fail; | 3883 | ++vcpu->stat.insn_emulation_fail; |
3884 | trace_kvm_emulate_insn_failed(vcpu); | ||
3431 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) | 3885 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) |
3432 | return EMULATE_DONE; | 3886 | return EMULATE_DONE; |
3433 | return EMULATE_FAIL; | 3887 | return EMULATE_FAIL; |
@@ -3439,16 +3893,20 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3439 | return EMULATE_DONE; | 3893 | return EMULATE_DONE; |
3440 | } | 3894 | } |
3441 | 3895 | ||
3896 | restart: | ||
3442 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); | 3897 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); |
3443 | shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; | 3898 | shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; |
3444 | 3899 | ||
3445 | if (r == 0) | 3900 | if (r == 0) |
3446 | kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); | 3901 | kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); |
3447 | 3902 | ||
3448 | if (vcpu->arch.pio.string) | 3903 | if (vcpu->arch.pio.count) { |
3904 | if (!vcpu->arch.pio.in) | ||
3905 | vcpu->arch.pio.count = 0; | ||
3449 | return EMULATE_DO_MMIO; | 3906 | return EMULATE_DO_MMIO; |
3907 | } | ||
3450 | 3908 | ||
3451 | if ((r || vcpu->mmio_is_write) && run) { | 3909 | if (r || vcpu->mmio_is_write) { |
3452 | run->exit_reason = KVM_EXIT_MMIO; | 3910 | run->exit_reason = KVM_EXIT_MMIO; |
3453 | run->mmio.phys_addr = vcpu->mmio_phys_addr; | 3911 | run->mmio.phys_addr = vcpu->mmio_phys_addr; |
3454 | memcpy(run->mmio.data, vcpu->mmio_data, 8); | 3912 | memcpy(run->mmio.data, vcpu->mmio_data, 8); |
@@ -3458,222 +3916,41 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3458 | 3916 | ||
3459 | if (r) { | 3917 | if (r) { |
3460 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) | 3918 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) |
3461 | return EMULATE_DONE; | 3919 | goto done; |
3462 | if (!vcpu->mmio_needed) { | 3920 | if (!vcpu->mmio_needed) { |
3921 | ++vcpu->stat.insn_emulation_fail; | ||
3922 | trace_kvm_emulate_insn_failed(vcpu); | ||
3463 | kvm_report_emulation_failure(vcpu, "mmio"); | 3923 | kvm_report_emulation_failure(vcpu, "mmio"); |
3464 | return EMULATE_FAIL; | 3924 | return EMULATE_FAIL; |
3465 | } | 3925 | } |
3466 | return EMULATE_DO_MMIO; | 3926 | return EMULATE_DO_MMIO; |
3467 | } | 3927 | } |
3468 | 3928 | ||
3469 | kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | ||
3470 | |||
3471 | if (vcpu->mmio_is_write) { | 3929 | if (vcpu->mmio_is_write) { |
3472 | vcpu->mmio_needed = 0; | 3930 | vcpu->mmio_needed = 0; |
3473 | return EMULATE_DO_MMIO; | 3931 | return EMULATE_DO_MMIO; |
3474 | } | 3932 | } |
3475 | 3933 | ||
3476 | return EMULATE_DONE; | 3934 | done: |
3477 | } | 3935 | if (vcpu->arch.exception.pending) |
3478 | EXPORT_SYMBOL_GPL(emulate_instruction); | 3936 | vcpu->arch.emulate_ctxt.restart = false; |
3479 | |||
3480 | static int pio_copy_data(struct kvm_vcpu *vcpu) | ||
3481 | { | ||
3482 | void *p = vcpu->arch.pio_data; | ||
3483 | gva_t q = vcpu->arch.pio.guest_gva; | ||
3484 | unsigned bytes; | ||
3485 | int ret; | ||
3486 | u32 error_code; | ||
3487 | |||
3488 | bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; | ||
3489 | if (vcpu->arch.pio.in) | ||
3490 | ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code); | ||
3491 | else | ||
3492 | ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code); | ||
3493 | |||
3494 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
3495 | kvm_inject_page_fault(vcpu, q, error_code); | ||
3496 | |||
3497 | return ret; | ||
3498 | } | ||
3499 | |||
3500 | int complete_pio(struct kvm_vcpu *vcpu) | ||
3501 | { | ||
3502 | struct kvm_pio_request *io = &vcpu->arch.pio; | ||
3503 | long delta; | ||
3504 | int r; | ||
3505 | unsigned long val; | ||
3506 | |||
3507 | if (!io->string) { | ||
3508 | if (io->in) { | ||
3509 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
3510 | memcpy(&val, vcpu->arch.pio_data, io->size); | ||
3511 | kvm_register_write(vcpu, VCPU_REGS_RAX, val); | ||
3512 | } | ||
3513 | } else { | ||
3514 | if (io->in) { | ||
3515 | r = pio_copy_data(vcpu); | ||
3516 | if (r) | ||
3517 | goto out; | ||
3518 | } | ||
3519 | |||
3520 | delta = 1; | ||
3521 | if (io->rep) { | ||
3522 | delta *= io->cur_count; | ||
3523 | /* | ||
3524 | * The size of the register should really depend on | ||
3525 | * current address size. | ||
3526 | */ | ||
3527 | val = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
3528 | val -= delta; | ||
3529 | kvm_register_write(vcpu, VCPU_REGS_RCX, val); | ||
3530 | } | ||
3531 | if (io->down) | ||
3532 | delta = -delta; | ||
3533 | delta *= io->size; | ||
3534 | if (io->in) { | ||
3535 | val = kvm_register_read(vcpu, VCPU_REGS_RDI); | ||
3536 | val += delta; | ||
3537 | kvm_register_write(vcpu, VCPU_REGS_RDI, val); | ||
3538 | } else { | ||
3539 | val = kvm_register_read(vcpu, VCPU_REGS_RSI); | ||
3540 | val += delta; | ||
3541 | kvm_register_write(vcpu, VCPU_REGS_RSI, val); | ||
3542 | } | ||
3543 | } | ||
3544 | out: | ||
3545 | io->count -= io->cur_count; | ||
3546 | io->cur_count = 0; | ||
3547 | |||
3548 | return 0; | ||
3549 | } | ||
3550 | |||
3551 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | ||
3552 | { | ||
3553 | /* TODO: String I/O for in kernel device */ | ||
3554 | int r; | ||
3555 | |||
3556 | if (vcpu->arch.pio.in) | ||
3557 | r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, | ||
3558 | vcpu->arch.pio.size, pd); | ||
3559 | else | ||
3560 | r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, | ||
3561 | vcpu->arch.pio.port, vcpu->arch.pio.size, | ||
3562 | pd); | ||
3563 | return r; | ||
3564 | } | ||
3565 | 3937 | ||
3566 | static int pio_string_write(struct kvm_vcpu *vcpu) | 3938 | if (vcpu->arch.emulate_ctxt.restart) |
3567 | { | 3939 | goto restart; |
3568 | struct kvm_pio_request *io = &vcpu->arch.pio; | ||
3569 | void *pd = vcpu->arch.pio_data; | ||
3570 | int i, r = 0; | ||
3571 | 3940 | ||
3572 | for (i = 0; i < io->cur_count; i++) { | 3941 | return EMULATE_DONE; |
3573 | if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, | ||
3574 | io->port, io->size, pd)) { | ||
3575 | r = -EOPNOTSUPP; | ||
3576 | break; | ||
3577 | } | ||
3578 | pd += io->size; | ||
3579 | } | ||
3580 | return r; | ||
3581 | } | ||
3582 | |||
3583 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port) | ||
3584 | { | ||
3585 | unsigned long val; | ||
3586 | |||
3587 | trace_kvm_pio(!in, port, size, 1); | ||
3588 | |||
3589 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
3590 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | ||
3591 | vcpu->run->io.size = vcpu->arch.pio.size = size; | ||
3592 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
3593 | vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1; | ||
3594 | vcpu->run->io.port = vcpu->arch.pio.port = port; | ||
3595 | vcpu->arch.pio.in = in; | ||
3596 | vcpu->arch.pio.string = 0; | ||
3597 | vcpu->arch.pio.down = 0; | ||
3598 | vcpu->arch.pio.rep = 0; | ||
3599 | |||
3600 | if (!vcpu->arch.pio.in) { | ||
3601 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
3602 | memcpy(vcpu->arch.pio_data, &val, 4); | ||
3603 | } | ||
3604 | |||
3605 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | ||
3606 | complete_pio(vcpu); | ||
3607 | return 1; | ||
3608 | } | ||
3609 | return 0; | ||
3610 | } | 3942 | } |
3611 | EXPORT_SYMBOL_GPL(kvm_emulate_pio); | 3943 | EXPORT_SYMBOL_GPL(emulate_instruction); |
3612 | 3944 | ||
3613 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, | 3945 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) |
3614 | int size, unsigned long count, int down, | ||
3615 | gva_t address, int rep, unsigned port) | ||
3616 | { | 3946 | { |
3617 | unsigned now, in_page; | 3947 | unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); |
3618 | int ret = 0; | 3948 | int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu); |
3619 | 3949 | /* do not return to emulator after return from userspace */ | |
3620 | trace_kvm_pio(!in, port, size, count); | 3950 | vcpu->arch.pio.count = 0; |
3621 | |||
3622 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
3623 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | ||
3624 | vcpu->run->io.size = vcpu->arch.pio.size = size; | ||
3625 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
3626 | vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count; | ||
3627 | vcpu->run->io.port = vcpu->arch.pio.port = port; | ||
3628 | vcpu->arch.pio.in = in; | ||
3629 | vcpu->arch.pio.string = 1; | ||
3630 | vcpu->arch.pio.down = down; | ||
3631 | vcpu->arch.pio.rep = rep; | ||
3632 | |||
3633 | if (!count) { | ||
3634 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
3635 | return 1; | ||
3636 | } | ||
3637 | |||
3638 | if (!down) | ||
3639 | in_page = PAGE_SIZE - offset_in_page(address); | ||
3640 | else | ||
3641 | in_page = offset_in_page(address) + size; | ||
3642 | now = min(count, (unsigned long)in_page / size); | ||
3643 | if (!now) | ||
3644 | now = 1; | ||
3645 | if (down) { | ||
3646 | /* | ||
3647 | * String I/O in reverse. Yuck. Kill the guest, fix later. | ||
3648 | */ | ||
3649 | pr_unimpl(vcpu, "guest string pio down\n"); | ||
3650 | kvm_inject_gp(vcpu, 0); | ||
3651 | return 1; | ||
3652 | } | ||
3653 | vcpu->run->io.count = now; | ||
3654 | vcpu->arch.pio.cur_count = now; | ||
3655 | |||
3656 | if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count) | ||
3657 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
3658 | |||
3659 | vcpu->arch.pio.guest_gva = address; | ||
3660 | |||
3661 | if (!vcpu->arch.pio.in) { | ||
3662 | /* string PIO write */ | ||
3663 | ret = pio_copy_data(vcpu); | ||
3664 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
3665 | return 1; | ||
3666 | if (ret == 0 && !pio_string_write(vcpu)) { | ||
3667 | complete_pio(vcpu); | ||
3668 | if (vcpu->arch.pio.count == 0) | ||
3669 | ret = 1; | ||
3670 | } | ||
3671 | } | ||
3672 | /* no string PIO read support yet */ | ||
3673 | |||
3674 | return ret; | 3951 | return ret; |
3675 | } | 3952 | } |
3676 | EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); | 3953 | EXPORT_SYMBOL_GPL(kvm_fast_pio_out); |
3677 | 3954 | ||
3678 | static void bounce_off(void *info) | 3955 | static void bounce_off(void *info) |
3679 | { | 3956 | { |
@@ -3996,85 +4273,20 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | |||
3996 | return emulator_write_emulated(rip, instruction, 3, vcpu); | 4273 | return emulator_write_emulated(rip, instruction, 3, vcpu); |
3997 | } | 4274 | } |
3998 | 4275 | ||
3999 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) | ||
4000 | { | ||
4001 | return (curr_cr & ~((1ULL << 32) - 1)) | new_val; | ||
4002 | } | ||
4003 | |||
4004 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | 4276 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) |
4005 | { | 4277 | { |
4006 | struct descriptor_table dt = { limit, base }; | 4278 | struct desc_ptr dt = { limit, base }; |
4007 | 4279 | ||
4008 | kvm_x86_ops->set_gdt(vcpu, &dt); | 4280 | kvm_x86_ops->set_gdt(vcpu, &dt); |
4009 | } | 4281 | } |
4010 | 4282 | ||
4011 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | 4283 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) |
4012 | { | 4284 | { |
4013 | struct descriptor_table dt = { limit, base }; | 4285 | struct desc_ptr dt = { limit, base }; |
4014 | 4286 | ||
4015 | kvm_x86_ops->set_idt(vcpu, &dt); | 4287 | kvm_x86_ops->set_idt(vcpu, &dt); |
4016 | } | 4288 | } |
4017 | 4289 | ||
4018 | void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, | ||
4019 | unsigned long *rflags) | ||
4020 | { | ||
4021 | kvm_lmsw(vcpu, msw); | ||
4022 | *rflags = kvm_get_rflags(vcpu); | ||
4023 | } | ||
4024 | |||
4025 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) | ||
4026 | { | ||
4027 | unsigned long value; | ||
4028 | |||
4029 | switch (cr) { | ||
4030 | case 0: | ||
4031 | value = kvm_read_cr0(vcpu); | ||
4032 | break; | ||
4033 | case 2: | ||
4034 | value = vcpu->arch.cr2; | ||
4035 | break; | ||
4036 | case 3: | ||
4037 | value = vcpu->arch.cr3; | ||
4038 | break; | ||
4039 | case 4: | ||
4040 | value = kvm_read_cr4(vcpu); | ||
4041 | break; | ||
4042 | case 8: | ||
4043 | value = kvm_get_cr8(vcpu); | ||
4044 | break; | ||
4045 | default: | ||
4046 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
4047 | return 0; | ||
4048 | } | ||
4049 | |||
4050 | return value; | ||
4051 | } | ||
4052 | |||
4053 | void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, | ||
4054 | unsigned long *rflags) | ||
4055 | { | ||
4056 | switch (cr) { | ||
4057 | case 0: | ||
4058 | kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); | ||
4059 | *rflags = kvm_get_rflags(vcpu); | ||
4060 | break; | ||
4061 | case 2: | ||
4062 | vcpu->arch.cr2 = val; | ||
4063 | break; | ||
4064 | case 3: | ||
4065 | kvm_set_cr3(vcpu, val); | ||
4066 | break; | ||
4067 | case 4: | ||
4068 | kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); | ||
4069 | break; | ||
4070 | case 8: | ||
4071 | kvm_set_cr8(vcpu, val & 0xfUL); | ||
4072 | break; | ||
4073 | default: | ||
4074 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
4075 | } | ||
4076 | } | ||
4077 | |||
4078 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) | 4290 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) |
4079 | { | 4291 | { |
4080 | struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; | 4292 | struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; |
@@ -4138,9 +4350,13 @@ int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) | |||
4138 | { | 4350 | { |
4139 | struct kvm_cpuid_entry2 *best; | 4351 | struct kvm_cpuid_entry2 *best; |
4140 | 4352 | ||
4353 | best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0); | ||
4354 | if (!best || best->eax < 0x80000008) | ||
4355 | goto not_found; | ||
4141 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); | 4356 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); |
4142 | if (best) | 4357 | if (best) |
4143 | return best->eax & 0xff; | 4358 | return best->eax & 0xff; |
4359 | not_found: | ||
4144 | return 36; | 4360 | return 36; |
4145 | } | 4361 | } |
4146 | 4362 | ||
@@ -4254,9 +4470,13 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) | |||
4254 | { | 4470 | { |
4255 | /* try to reinject previous events if any */ | 4471 | /* try to reinject previous events if any */ |
4256 | if (vcpu->arch.exception.pending) { | 4472 | if (vcpu->arch.exception.pending) { |
4473 | trace_kvm_inj_exception(vcpu->arch.exception.nr, | ||
4474 | vcpu->arch.exception.has_error_code, | ||
4475 | vcpu->arch.exception.error_code); | ||
4257 | kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, | 4476 | kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, |
4258 | vcpu->arch.exception.has_error_code, | 4477 | vcpu->arch.exception.has_error_code, |
4259 | vcpu->arch.exception.error_code); | 4478 | vcpu->arch.exception.error_code, |
4479 | vcpu->arch.exception.reinject); | ||
4260 | return; | 4480 | return; |
4261 | } | 4481 | } |
4262 | 4482 | ||
@@ -4486,7 +4706,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
4486 | } | 4706 | } |
4487 | 4707 | ||
4488 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | 4708 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
4489 | post_kvm_run_save(vcpu); | ||
4490 | 4709 | ||
4491 | vapic_exit(vcpu); | 4710 | vapic_exit(vcpu); |
4492 | 4711 | ||
@@ -4514,26 +4733,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
4514 | if (!irqchip_in_kernel(vcpu->kvm)) | 4733 | if (!irqchip_in_kernel(vcpu->kvm)) |
4515 | kvm_set_cr8(vcpu, kvm_run->cr8); | 4734 | kvm_set_cr8(vcpu, kvm_run->cr8); |
4516 | 4735 | ||
4517 | if (vcpu->arch.pio.cur_count) { | 4736 | if (vcpu->arch.pio.count || vcpu->mmio_needed || |
4518 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 4737 | vcpu->arch.emulate_ctxt.restart) { |
4519 | r = complete_pio(vcpu); | 4738 | if (vcpu->mmio_needed) { |
4520 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 4739 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); |
4521 | if (r) | 4740 | vcpu->mmio_read_completed = 1; |
4522 | goto out; | 4741 | vcpu->mmio_needed = 0; |
4523 | } | 4742 | } |
4524 | if (vcpu->mmio_needed) { | ||
4525 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); | ||
4526 | vcpu->mmio_read_completed = 1; | ||
4527 | vcpu->mmio_needed = 0; | ||
4528 | |||
4529 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 4743 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
4530 | r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, | 4744 | r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); |
4531 | EMULTYPE_NO_DECODE); | ||
4532 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 4745 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
4533 | if (r == EMULATE_DO_MMIO) { | 4746 | if (r == EMULATE_DO_MMIO) { |
4534 | /* | ||
4535 | * Read-modify-write. Back to userspace. | ||
4536 | */ | ||
4537 | r = 0; | 4747 | r = 0; |
4538 | goto out; | 4748 | goto out; |
4539 | } | 4749 | } |
@@ -4545,6 +4755,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
4545 | r = __vcpu_run(vcpu); | 4755 | r = __vcpu_run(vcpu); |
4546 | 4756 | ||
4547 | out: | 4757 | out: |
4758 | post_kvm_run_save(vcpu); | ||
4548 | if (vcpu->sigset_active) | 4759 | if (vcpu->sigset_active) |
4549 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 4760 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
4550 | 4761 | ||
@@ -4616,12 +4827,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
4616 | return 0; | 4827 | return 0; |
4617 | } | 4828 | } |
4618 | 4829 | ||
4619 | void kvm_get_segment(struct kvm_vcpu *vcpu, | ||
4620 | struct kvm_segment *var, int seg) | ||
4621 | { | ||
4622 | kvm_x86_ops->get_segment(vcpu, var, seg); | ||
4623 | } | ||
4624 | |||
4625 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | 4830 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) |
4626 | { | 4831 | { |
4627 | struct kvm_segment cs; | 4832 | struct kvm_segment cs; |
@@ -4635,7 +4840,7 @@ EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits); | |||
4635 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | 4840 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, |
4636 | struct kvm_sregs *sregs) | 4841 | struct kvm_sregs *sregs) |
4637 | { | 4842 | { |
4638 | struct descriptor_table dt; | 4843 | struct desc_ptr dt; |
4639 | 4844 | ||
4640 | vcpu_load(vcpu); | 4845 | vcpu_load(vcpu); |
4641 | 4846 | ||
@@ -4650,11 +4855,11 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
4650 | kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); | 4855 | kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); |
4651 | 4856 | ||
4652 | kvm_x86_ops->get_idt(vcpu, &dt); | 4857 | kvm_x86_ops->get_idt(vcpu, &dt); |
4653 | sregs->idt.limit = dt.limit; | 4858 | sregs->idt.limit = dt.size; |
4654 | sregs->idt.base = dt.base; | 4859 | sregs->idt.base = dt.address; |
4655 | kvm_x86_ops->get_gdt(vcpu, &dt); | 4860 | kvm_x86_ops->get_gdt(vcpu, &dt); |
4656 | sregs->gdt.limit = dt.limit; | 4861 | sregs->gdt.limit = dt.size; |
4657 | sregs->gdt.base = dt.base; | 4862 | sregs->gdt.base = dt.address; |
4658 | 4863 | ||
4659 | sregs->cr0 = kvm_read_cr0(vcpu); | 4864 | sregs->cr0 = kvm_read_cr0(vcpu); |
4660 | sregs->cr2 = vcpu->arch.cr2; | 4865 | sregs->cr2 = vcpu->arch.cr2; |
@@ -4693,563 +4898,33 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | |||
4693 | return 0; | 4898 | return 0; |
4694 | } | 4899 | } |
4695 | 4900 | ||
4696 | static void kvm_set_segment(struct kvm_vcpu *vcpu, | 4901 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, |
4697 | struct kvm_segment *var, int seg) | 4902 | bool has_error_code, u32 error_code) |
4698 | { | ||
4699 | kvm_x86_ops->set_segment(vcpu, var, seg); | ||
4700 | } | ||
4701 | |||
4702 | static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, | ||
4703 | struct kvm_segment *kvm_desct) | ||
4704 | { | ||
4705 | kvm_desct->base = get_desc_base(seg_desc); | ||
4706 | kvm_desct->limit = get_desc_limit(seg_desc); | ||
4707 | if (seg_desc->g) { | ||
4708 | kvm_desct->limit <<= 12; | ||
4709 | kvm_desct->limit |= 0xfff; | ||
4710 | } | ||
4711 | kvm_desct->selector = selector; | ||
4712 | kvm_desct->type = seg_desc->type; | ||
4713 | kvm_desct->present = seg_desc->p; | ||
4714 | kvm_desct->dpl = seg_desc->dpl; | ||
4715 | kvm_desct->db = seg_desc->d; | ||
4716 | kvm_desct->s = seg_desc->s; | ||
4717 | kvm_desct->l = seg_desc->l; | ||
4718 | kvm_desct->g = seg_desc->g; | ||
4719 | kvm_desct->avl = seg_desc->avl; | ||
4720 | if (!selector) | ||
4721 | kvm_desct->unusable = 1; | ||
4722 | else | ||
4723 | kvm_desct->unusable = 0; | ||
4724 | kvm_desct->padding = 0; | ||
4725 | } | ||
4726 | |||
4727 | static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu, | ||
4728 | u16 selector, | ||
4729 | struct descriptor_table *dtable) | ||
4730 | { | ||
4731 | if (selector & 1 << 2) { | ||
4732 | struct kvm_segment kvm_seg; | ||
4733 | |||
4734 | kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); | ||
4735 | |||
4736 | if (kvm_seg.unusable) | ||
4737 | dtable->limit = 0; | ||
4738 | else | ||
4739 | dtable->limit = kvm_seg.limit; | ||
4740 | dtable->base = kvm_seg.base; | ||
4741 | } | ||
4742 | else | ||
4743 | kvm_x86_ops->get_gdt(vcpu, dtable); | ||
4744 | } | ||
4745 | |||
4746 | /* allowed just for 8 bytes segments */ | ||
4747 | static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | ||
4748 | struct desc_struct *seg_desc) | ||
4749 | { | ||
4750 | struct descriptor_table dtable; | ||
4751 | u16 index = selector >> 3; | ||
4752 | int ret; | ||
4753 | u32 err; | ||
4754 | gva_t addr; | ||
4755 | |||
4756 | get_segment_descriptor_dtable(vcpu, selector, &dtable); | ||
4757 | |||
4758 | if (dtable.limit < index * 8 + 7) { | ||
4759 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); | ||
4760 | return X86EMUL_PROPAGATE_FAULT; | ||
4761 | } | ||
4762 | addr = dtable.base + index * 8; | ||
4763 | ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc), | ||
4764 | vcpu, &err); | ||
4765 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
4766 | kvm_inject_page_fault(vcpu, addr, err); | ||
4767 | |||
4768 | return ret; | ||
4769 | } | ||
4770 | |||
4771 | /* allowed just for 8 bytes segments */ | ||
4772 | static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | ||
4773 | struct desc_struct *seg_desc) | ||
4774 | { | ||
4775 | struct descriptor_table dtable; | ||
4776 | u16 index = selector >> 3; | ||
4777 | |||
4778 | get_segment_descriptor_dtable(vcpu, selector, &dtable); | ||
4779 | |||
4780 | if (dtable.limit < index * 8 + 7) | ||
4781 | return 1; | ||
4782 | return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL); | ||
4783 | } | ||
4784 | |||
4785 | static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu, | ||
4786 | struct desc_struct *seg_desc) | ||
4787 | { | ||
4788 | u32 base_addr = get_desc_base(seg_desc); | ||
4789 | |||
4790 | return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL); | ||
4791 | } | ||
4792 | |||
4793 | static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu, | ||
4794 | struct desc_struct *seg_desc) | ||
4795 | { | ||
4796 | u32 base_addr = get_desc_base(seg_desc); | ||
4797 | |||
4798 | return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL); | ||
4799 | } | ||
4800 | |||
4801 | static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) | ||
4802 | { | ||
4803 | struct kvm_segment kvm_seg; | ||
4804 | |||
4805 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
4806 | return kvm_seg.selector; | ||
4807 | } | ||
4808 | |||
4809 | static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) | ||
4810 | { | ||
4811 | struct kvm_segment segvar = { | ||
4812 | .base = selector << 4, | ||
4813 | .limit = 0xffff, | ||
4814 | .selector = selector, | ||
4815 | .type = 3, | ||
4816 | .present = 1, | ||
4817 | .dpl = 3, | ||
4818 | .db = 0, | ||
4819 | .s = 1, | ||
4820 | .l = 0, | ||
4821 | .g = 0, | ||
4822 | .avl = 0, | ||
4823 | .unusable = 0, | ||
4824 | }; | ||
4825 | kvm_x86_ops->set_segment(vcpu, &segvar, seg); | ||
4826 | return X86EMUL_CONTINUE; | ||
4827 | } | ||
4828 | |||
4829 | static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) | ||
4830 | { | 4903 | { |
4831 | return (seg != VCPU_SREG_LDTR) && | 4904 | int cs_db, cs_l, ret; |
4832 | (seg != VCPU_SREG_TR) && | 4905 | cache_all_regs(vcpu); |
4833 | (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); | ||
4834 | } | ||
4835 | |||
4836 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg) | ||
4837 | { | ||
4838 | struct kvm_segment kvm_seg; | ||
4839 | struct desc_struct seg_desc; | ||
4840 | u8 dpl, rpl, cpl; | ||
4841 | unsigned err_vec = GP_VECTOR; | ||
4842 | u32 err_code = 0; | ||
4843 | bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ | ||
4844 | int ret; | ||
4845 | 4906 | ||
4846 | if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu)) | 4907 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
4847 | return kvm_load_realmode_segment(vcpu, selector, seg); | ||
4848 | 4908 | ||
4849 | /* NULL selector is not valid for TR, CS and SS */ | 4909 | vcpu->arch.emulate_ctxt.vcpu = vcpu; |
4850 | if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) | 4910 | vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); |
4851 | && null_selector) | 4911 | vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); |
4852 | goto exception; | 4912 | vcpu->arch.emulate_ctxt.mode = |
4913 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : | ||
4914 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) | ||
4915 | ? X86EMUL_MODE_VM86 : cs_l | ||
4916 | ? X86EMUL_MODE_PROT64 : cs_db | ||
4917 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | ||
4853 | 4918 | ||
4854 | /* TR should be in GDT only */ | 4919 | ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops, |
4855 | if (seg == VCPU_SREG_TR && (selector & (1 << 2))) | 4920 | tss_selector, reason, has_error_code, |
4856 | goto exception; | 4921 | error_code); |
4857 | 4922 | ||
4858 | ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc); | ||
4859 | if (ret) | 4923 | if (ret) |
4860 | return ret; | 4924 | return EMULATE_FAIL; |
4861 | |||
4862 | seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg); | ||
4863 | |||
4864 | if (null_selector) { /* for NULL selector skip all following checks */ | ||
4865 | kvm_seg.unusable = 1; | ||
4866 | goto load; | ||
4867 | } | ||
4868 | |||
4869 | err_code = selector & 0xfffc; | ||
4870 | err_vec = GP_VECTOR; | ||
4871 | |||
4872 | /* can't load system descriptor into segment selecor */ | ||
4873 | if (seg <= VCPU_SREG_GS && !kvm_seg.s) | ||
4874 | goto exception; | ||
4875 | |||
4876 | if (!kvm_seg.present) { | ||
4877 | err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; | ||
4878 | goto exception; | ||
4879 | } | ||
4880 | |||
4881 | rpl = selector & 3; | ||
4882 | dpl = kvm_seg.dpl; | ||
4883 | cpl = kvm_x86_ops->get_cpl(vcpu); | ||
4884 | |||
4885 | switch (seg) { | ||
4886 | case VCPU_SREG_SS: | ||
4887 | /* | ||
4888 | * segment is not a writable data segment or segment | ||
4889 | * selector's RPL != CPL or segment selector's RPL != CPL | ||
4890 | */ | ||
4891 | if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl) | ||
4892 | goto exception; | ||
4893 | break; | ||
4894 | case VCPU_SREG_CS: | ||
4895 | if (!(kvm_seg.type & 8)) | ||
4896 | goto exception; | ||
4897 | |||
4898 | if (kvm_seg.type & 4) { | ||
4899 | /* conforming */ | ||
4900 | if (dpl > cpl) | ||
4901 | goto exception; | ||
4902 | } else { | ||
4903 | /* nonconforming */ | ||
4904 | if (rpl > cpl || dpl != cpl) | ||
4905 | goto exception; | ||
4906 | } | ||
4907 | /* CS(RPL) <- CPL */ | ||
4908 | selector = (selector & 0xfffc) | cpl; | ||
4909 | break; | ||
4910 | case VCPU_SREG_TR: | ||
4911 | if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9)) | ||
4912 | goto exception; | ||
4913 | break; | ||
4914 | case VCPU_SREG_LDTR: | ||
4915 | if (kvm_seg.s || kvm_seg.type != 2) | ||
4916 | goto exception; | ||
4917 | break; | ||
4918 | default: /* DS, ES, FS, or GS */ | ||
4919 | /* | ||
4920 | * segment is not a data or readable code segment or | ||
4921 | * ((segment is a data or nonconforming code segment) | ||
4922 | * and (both RPL and CPL > DPL)) | ||
4923 | */ | ||
4924 | if ((kvm_seg.type & 0xa) == 0x8 || | ||
4925 | (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl))) | ||
4926 | goto exception; | ||
4927 | break; | ||
4928 | } | ||
4929 | |||
4930 | if (!kvm_seg.unusable && kvm_seg.s) { | ||
4931 | /* mark segment as accessed */ | ||
4932 | kvm_seg.type |= 1; | ||
4933 | seg_desc.type |= 1; | ||
4934 | save_guest_segment_descriptor(vcpu, selector, &seg_desc); | ||
4935 | } | ||
4936 | load: | ||
4937 | kvm_set_segment(vcpu, &kvm_seg, seg); | ||
4938 | return X86EMUL_CONTINUE; | ||
4939 | exception: | ||
4940 | kvm_queue_exception_e(vcpu, err_vec, err_code); | ||
4941 | return X86EMUL_PROPAGATE_FAULT; | ||
4942 | } | ||
4943 | |||
4944 | static void save_state_to_tss32(struct kvm_vcpu *vcpu, | ||
4945 | struct tss_segment_32 *tss) | ||
4946 | { | ||
4947 | tss->cr3 = vcpu->arch.cr3; | ||
4948 | tss->eip = kvm_rip_read(vcpu); | ||
4949 | tss->eflags = kvm_get_rflags(vcpu); | ||
4950 | tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
4951 | tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
4952 | tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX); | ||
4953 | tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX); | ||
4954 | tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP); | ||
4955 | tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP); | ||
4956 | tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI); | ||
4957 | tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI); | ||
4958 | tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); | ||
4959 | tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); | ||
4960 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); | ||
4961 | tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); | ||
4962 | tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS); | ||
4963 | tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS); | ||
4964 | tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); | ||
4965 | } | ||
4966 | |||
4967 | static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg) | ||
4968 | { | ||
4969 | struct kvm_segment kvm_seg; | ||
4970 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
4971 | kvm_seg.selector = sel; | ||
4972 | kvm_set_segment(vcpu, &kvm_seg, seg); | ||
4973 | } | ||
4974 | |||
4975 | static int load_state_from_tss32(struct kvm_vcpu *vcpu, | ||
4976 | struct tss_segment_32 *tss) | ||
4977 | { | ||
4978 | kvm_set_cr3(vcpu, tss->cr3); | ||
4979 | |||
4980 | kvm_rip_write(vcpu, tss->eip); | ||
4981 | kvm_set_rflags(vcpu, tss->eflags | 2); | ||
4982 | |||
4983 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax); | ||
4984 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); | ||
4985 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx); | ||
4986 | kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx); | ||
4987 | kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp); | ||
4988 | kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp); | ||
4989 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); | ||
4990 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); | ||
4991 | |||
4992 | /* | ||
4993 | * SDM says that segment selectors are loaded before segment | ||
4994 | * descriptors | ||
4995 | */ | ||
4996 | kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR); | ||
4997 | kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); | ||
4998 | kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); | ||
4999 | kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); | ||
5000 | kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); | ||
5001 | kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS); | ||
5002 | kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS); | ||
5003 | |||
5004 | /* | ||
5005 | * Now load segment descriptors. If fault happenes at this stage | ||
5006 | * it is handled in a context of new task | ||
5007 | */ | ||
5008 | if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR)) | ||
5009 | return 1; | ||
5010 | |||
5011 | if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) | ||
5012 | return 1; | ||
5013 | 4925 | ||
5014 | if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) | 4926 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
5015 | return 1; | 4927 | return EMULATE_DONE; |
5016 | |||
5017 | if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) | ||
5018 | return 1; | ||
5019 | |||
5020 | if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) | ||
5021 | return 1; | ||
5022 | |||
5023 | if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS)) | ||
5024 | return 1; | ||
5025 | |||
5026 | if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS)) | ||
5027 | return 1; | ||
5028 | return 0; | ||
5029 | } | ||
5030 | |||
5031 | static void save_state_to_tss16(struct kvm_vcpu *vcpu, | ||
5032 | struct tss_segment_16 *tss) | ||
5033 | { | ||
5034 | tss->ip = kvm_rip_read(vcpu); | ||
5035 | tss->flag = kvm_get_rflags(vcpu); | ||
5036 | tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
5037 | tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
5038 | tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX); | ||
5039 | tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX); | ||
5040 | tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP); | ||
5041 | tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP); | ||
5042 | tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI); | ||
5043 | tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI); | ||
5044 | |||
5045 | tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); | ||
5046 | tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); | ||
5047 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); | ||
5048 | tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); | ||
5049 | tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR); | ||
5050 | } | ||
5051 | |||
5052 | static int load_state_from_tss16(struct kvm_vcpu *vcpu, | ||
5053 | struct tss_segment_16 *tss) | ||
5054 | { | ||
5055 | kvm_rip_write(vcpu, tss->ip); | ||
5056 | kvm_set_rflags(vcpu, tss->flag | 2); | ||
5057 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax); | ||
5058 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx); | ||
5059 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx); | ||
5060 | kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx); | ||
5061 | kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp); | ||
5062 | kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp); | ||
5063 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); | ||
5064 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); | ||
5065 | |||
5066 | /* | ||
5067 | * SDM says that segment selectors are loaded before segment | ||
5068 | * descriptors | ||
5069 | */ | ||
5070 | kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR); | ||
5071 | kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); | ||
5072 | kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); | ||
5073 | kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); | ||
5074 | kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); | ||
5075 | |||
5076 | /* | ||
5077 | * Now load segment descriptors. If fault happenes at this stage | ||
5078 | * it is handled in a context of new task | ||
5079 | */ | ||
5080 | if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR)) | ||
5081 | return 1; | ||
5082 | |||
5083 | if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) | ||
5084 | return 1; | ||
5085 | |||
5086 | if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) | ||
5087 | return 1; | ||
5088 | |||
5089 | if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) | ||
5090 | return 1; | ||
5091 | |||
5092 | if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) | ||
5093 | return 1; | ||
5094 | return 0; | ||
5095 | } | ||
5096 | |||
5097 | static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, | ||
5098 | u16 old_tss_sel, u32 old_tss_base, | ||
5099 | struct desc_struct *nseg_desc) | ||
5100 | { | ||
5101 | struct tss_segment_16 tss_segment_16; | ||
5102 | int ret = 0; | ||
5103 | |||
5104 | if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16, | ||
5105 | sizeof tss_segment_16)) | ||
5106 | goto out; | ||
5107 | |||
5108 | save_state_to_tss16(vcpu, &tss_segment_16); | ||
5109 | |||
5110 | if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16, | ||
5111 | sizeof tss_segment_16)) | ||
5112 | goto out; | ||
5113 | |||
5114 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), | ||
5115 | &tss_segment_16, sizeof tss_segment_16)) | ||
5116 | goto out; | ||
5117 | |||
5118 | if (old_tss_sel != 0xffff) { | ||
5119 | tss_segment_16.prev_task_link = old_tss_sel; | ||
5120 | |||
5121 | if (kvm_write_guest(vcpu->kvm, | ||
5122 | get_tss_base_addr_write(vcpu, nseg_desc), | ||
5123 | &tss_segment_16.prev_task_link, | ||
5124 | sizeof tss_segment_16.prev_task_link)) | ||
5125 | goto out; | ||
5126 | } | ||
5127 | |||
5128 | if (load_state_from_tss16(vcpu, &tss_segment_16)) | ||
5129 | goto out; | ||
5130 | |||
5131 | ret = 1; | ||
5132 | out: | ||
5133 | return ret; | ||
5134 | } | ||
5135 | |||
5136 | static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, | ||
5137 | u16 old_tss_sel, u32 old_tss_base, | ||
5138 | struct desc_struct *nseg_desc) | ||
5139 | { | ||
5140 | struct tss_segment_32 tss_segment_32; | ||
5141 | int ret = 0; | ||
5142 | |||
5143 | if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32, | ||
5144 | sizeof tss_segment_32)) | ||
5145 | goto out; | ||
5146 | |||
5147 | save_state_to_tss32(vcpu, &tss_segment_32); | ||
5148 | |||
5149 | if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32, | ||
5150 | sizeof tss_segment_32)) | ||
5151 | goto out; | ||
5152 | |||
5153 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), | ||
5154 | &tss_segment_32, sizeof tss_segment_32)) | ||
5155 | goto out; | ||
5156 | |||
5157 | if (old_tss_sel != 0xffff) { | ||
5158 | tss_segment_32.prev_task_link = old_tss_sel; | ||
5159 | |||
5160 | if (kvm_write_guest(vcpu->kvm, | ||
5161 | get_tss_base_addr_write(vcpu, nseg_desc), | ||
5162 | &tss_segment_32.prev_task_link, | ||
5163 | sizeof tss_segment_32.prev_task_link)) | ||
5164 | goto out; | ||
5165 | } | ||
5166 | |||
5167 | if (load_state_from_tss32(vcpu, &tss_segment_32)) | ||
5168 | goto out; | ||
5169 | |||
5170 | ret = 1; | ||
5171 | out: | ||
5172 | return ret; | ||
5173 | } | ||
5174 | |||
5175 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | ||
5176 | { | ||
5177 | struct kvm_segment tr_seg; | ||
5178 | struct desc_struct cseg_desc; | ||
5179 | struct desc_struct nseg_desc; | ||
5180 | int ret = 0; | ||
5181 | u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); | ||
5182 | u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); | ||
5183 | u32 desc_limit; | ||
5184 | |||
5185 | old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL); | ||
5186 | |||
5187 | /* FIXME: Handle errors. Failure to read either TSS or their | ||
5188 | * descriptors should generate a pagefault. | ||
5189 | */ | ||
5190 | if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) | ||
5191 | goto out; | ||
5192 | |||
5193 | if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc)) | ||
5194 | goto out; | ||
5195 | |||
5196 | if (reason != TASK_SWITCH_IRET) { | ||
5197 | int cpl; | ||
5198 | |||
5199 | cpl = kvm_x86_ops->get_cpl(vcpu); | ||
5200 | if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) { | ||
5201 | kvm_queue_exception_e(vcpu, GP_VECTOR, 0); | ||
5202 | return 1; | ||
5203 | } | ||
5204 | } | ||
5205 | |||
5206 | desc_limit = get_desc_limit(&nseg_desc); | ||
5207 | if (!nseg_desc.p || | ||
5208 | ((desc_limit < 0x67 && (nseg_desc.type & 8)) || | ||
5209 | desc_limit < 0x2b)) { | ||
5210 | kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); | ||
5211 | return 1; | ||
5212 | } | ||
5213 | |||
5214 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { | ||
5215 | cseg_desc.type &= ~(1 << 1); //clear the B flag | ||
5216 | save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc); | ||
5217 | } | ||
5218 | |||
5219 | if (reason == TASK_SWITCH_IRET) { | ||
5220 | u32 eflags = kvm_get_rflags(vcpu); | ||
5221 | kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); | ||
5222 | } | ||
5223 | |||
5224 | /* set back link to prev task only if NT bit is set in eflags | ||
5225 | note that old_tss_sel is not used afetr this point */ | ||
5226 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | ||
5227 | old_tss_sel = 0xffff; | ||
5228 | |||
5229 | if (nseg_desc.type & 8) | ||
5230 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, | ||
5231 | old_tss_base, &nseg_desc); | ||
5232 | else | ||
5233 | ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel, | ||
5234 | old_tss_base, &nseg_desc); | ||
5235 | |||
5236 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { | ||
5237 | u32 eflags = kvm_get_rflags(vcpu); | ||
5238 | kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT); | ||
5239 | } | ||
5240 | |||
5241 | if (reason != TASK_SWITCH_IRET) { | ||
5242 | nseg_desc.type |= (1 << 1); | ||
5243 | save_guest_segment_descriptor(vcpu, tss_selector, | ||
5244 | &nseg_desc); | ||
5245 | } | ||
5246 | |||
5247 | kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS); | ||
5248 | seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); | ||
5249 | tr_seg.type = 11; | ||
5250 | kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); | ||
5251 | out: | ||
5252 | return ret; | ||
5253 | } | 4928 | } |
5254 | EXPORT_SYMBOL_GPL(kvm_task_switch); | 4929 | EXPORT_SYMBOL_GPL(kvm_task_switch); |
5255 | 4930 | ||
@@ -5258,15 +4933,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5258 | { | 4933 | { |
5259 | int mmu_reset_needed = 0; | 4934 | int mmu_reset_needed = 0; |
5260 | int pending_vec, max_bits; | 4935 | int pending_vec, max_bits; |
5261 | struct descriptor_table dt; | 4936 | struct desc_ptr dt; |
5262 | 4937 | ||
5263 | vcpu_load(vcpu); | 4938 | vcpu_load(vcpu); |
5264 | 4939 | ||
5265 | dt.limit = sregs->idt.limit; | 4940 | dt.size = sregs->idt.limit; |
5266 | dt.base = sregs->idt.base; | 4941 | dt.address = sregs->idt.base; |
5267 | kvm_x86_ops->set_idt(vcpu, &dt); | 4942 | kvm_x86_ops->set_idt(vcpu, &dt); |
5268 | dt.limit = sregs->gdt.limit; | 4943 | dt.size = sregs->gdt.limit; |
5269 | dt.base = sregs->gdt.base; | 4944 | dt.address = sregs->gdt.base; |
5270 | kvm_x86_ops->set_gdt(vcpu, &dt); | 4945 | kvm_x86_ops->set_gdt(vcpu, &dt); |
5271 | 4946 | ||
5272 | vcpu->arch.cr2 = sregs->cr2; | 4947 | vcpu->arch.cr2 = sregs->cr2; |
@@ -5365,11 +5040,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
5365 | vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); | 5040 | vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); |
5366 | } | 5041 | } |
5367 | 5042 | ||
5368 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { | 5043 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) |
5369 | vcpu->arch.singlestep_cs = | 5044 | vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) + |
5370 | get_segment_selector(vcpu, VCPU_SREG_CS); | 5045 | get_segment_base(vcpu, VCPU_SREG_CS); |
5371 | vcpu->arch.singlestep_rip = kvm_rip_read(vcpu); | ||
5372 | } | ||
5373 | 5046 | ||
5374 | /* | 5047 | /* |
5375 | * Trigger an rflags update that will inject or remove the trace | 5048 | * Trigger an rflags update that will inject or remove the trace |
@@ -5860,13 +5533,22 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) | |||
5860 | return kvm_x86_ops->interrupt_allowed(vcpu); | 5533 | return kvm_x86_ops->interrupt_allowed(vcpu); |
5861 | } | 5534 | } |
5862 | 5535 | ||
5536 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip) | ||
5537 | { | ||
5538 | unsigned long current_rip = kvm_rip_read(vcpu) + | ||
5539 | get_segment_base(vcpu, VCPU_SREG_CS); | ||
5540 | |||
5541 | return current_rip == linear_rip; | ||
5542 | } | ||
5543 | EXPORT_SYMBOL_GPL(kvm_is_linear_rip); | ||
5544 | |||
5863 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) | 5545 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) |
5864 | { | 5546 | { |
5865 | unsigned long rflags; | 5547 | unsigned long rflags; |
5866 | 5548 | ||
5867 | rflags = kvm_x86_ops->get_rflags(vcpu); | 5549 | rflags = kvm_x86_ops->get_rflags(vcpu); |
5868 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | 5550 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) |
5869 | rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF); | 5551 | rflags &= ~X86_EFLAGS_TF; |
5870 | return rflags; | 5552 | return rflags; |
5871 | } | 5553 | } |
5872 | EXPORT_SYMBOL_GPL(kvm_get_rflags); | 5554 | EXPORT_SYMBOL_GPL(kvm_get_rflags); |
@@ -5874,10 +5556,8 @@ EXPORT_SYMBOL_GPL(kvm_get_rflags); | |||
5874 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 5556 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
5875 | { | 5557 | { |
5876 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && | 5558 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && |
5877 | vcpu->arch.singlestep_cs == | 5559 | kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip)) |
5878 | get_segment_selector(vcpu, VCPU_SREG_CS) && | 5560 | rflags |= X86_EFLAGS_TF; |
5879 | vcpu->arch.singlestep_rip == kvm_rip_read(vcpu)) | ||
5880 | rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; | ||
5881 | kvm_x86_ops->set_rflags(vcpu, rflags); | 5561 | kvm_x86_ops->set_rflags(vcpu, rflags); |
5882 | } | 5562 | } |
5883 | EXPORT_SYMBOL_GPL(kvm_set_rflags); | 5563 | EXPORT_SYMBOL_GPL(kvm_set_rflags); |
@@ -5893,3 +5573,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); | |||
5893 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); | 5573 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); |
5894 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); | 5574 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); |
5895 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); | 5575 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); |
5576 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); | ||
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index b7a404722d2b..f4b54458285b 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -65,6 +65,13 @@ static inline int is_paging(struct kvm_vcpu *vcpu) | |||
65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); | 65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); |
66 | } | 66 | } |
67 | 67 | ||
68 | static inline struct kvm_mem_aliases *kvm_aliases(struct kvm *kvm) | ||
69 | { | ||
70 | return rcu_dereference_check(kvm->arch.aliases, | ||
71 | srcu_read_lock_held(&kvm->srcu) | ||
72 | || lockdep_is_held(&kvm->slots_lock)); | ||
73 | } | ||
74 | |||
68 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); | 75 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); |
69 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); | 76 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); |
70 | 77 | ||