diff options
Diffstat (limited to 'drivers/kvm/vmx.c')
-rw-r--r-- | drivers/kvm/vmx.c | 652 |
1 files changed, 412 insertions, 240 deletions
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index c1ac106ace8c..80628f69916d 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c | |||
@@ -17,28 +17,35 @@ | |||
17 | 17 | ||
18 | #include "kvm.h" | 18 | #include "kvm.h" |
19 | #include "vmx.h" | 19 | #include "vmx.h" |
20 | #include "segment_descriptor.h" | ||
21 | |||
20 | #include <linux/module.h> | 22 | #include <linux/module.h> |
21 | #include <linux/kernel.h> | 23 | #include <linux/kernel.h> |
22 | #include <linux/mm.h> | 24 | #include <linux/mm.h> |
23 | #include <linux/highmem.h> | 25 | #include <linux/highmem.h> |
24 | #include <linux/profile.h> | 26 | #include <linux/profile.h> |
25 | #include <linux/sched.h> | 27 | #include <linux/sched.h> |
28 | |||
26 | #include <asm/io.h> | 29 | #include <asm/io.h> |
27 | #include <asm/desc.h> | 30 | #include <asm/desc.h> |
28 | 31 | ||
29 | #include "segment_descriptor.h" | ||
30 | |||
31 | MODULE_AUTHOR("Qumranet"); | 32 | MODULE_AUTHOR("Qumranet"); |
32 | MODULE_LICENSE("GPL"); | 33 | MODULE_LICENSE("GPL"); |
33 | 34 | ||
35 | static int init_rmode_tss(struct kvm *kvm); | ||
36 | |||
34 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 37 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
35 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 38 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
36 | 39 | ||
40 | static struct page *vmx_io_bitmap_a; | ||
41 | static struct page *vmx_io_bitmap_b; | ||
42 | |||
37 | #ifdef CONFIG_X86_64 | 43 | #ifdef CONFIG_X86_64 |
38 | #define HOST_IS_64 1 | 44 | #define HOST_IS_64 1 |
39 | #else | 45 | #else |
40 | #define HOST_IS_64 0 | 46 | #define HOST_IS_64 0 |
41 | #endif | 47 | #endif |
48 | #define EFER_SAVE_RESTORE_BITS ((u64)EFER_SCE) | ||
42 | 49 | ||
43 | static struct vmcs_descriptor { | 50 | static struct vmcs_descriptor { |
44 | int size; | 51 | int size; |
@@ -82,18 +89,17 @@ static const u32 vmx_msr_index[] = { | |||
82 | }; | 89 | }; |
83 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) | 90 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) |
84 | 91 | ||
85 | #ifdef CONFIG_X86_64 | 92 | static inline u64 msr_efer_save_restore_bits(struct vmx_msr_entry msr) |
86 | static unsigned msr_offset_kernel_gs_base; | 93 | { |
87 | #define NR_64BIT_MSRS 4 | 94 | return (u64)msr.data & EFER_SAVE_RESTORE_BITS; |
88 | /* | 95 | } |
89 | * avoid save/load MSR_SYSCALL_MASK and MSR_LSTAR by std vt | 96 | |
90 | * mechanism (cpu bug AA24) | 97 | static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu) |
91 | */ | 98 | { |
92 | #define NR_BAD_MSRS 2 | 99 | int efer_offset = vcpu->msr_offset_efer; |
93 | #else | 100 | return msr_efer_save_restore_bits(vcpu->host_msrs[efer_offset]) != |
94 | #define NR_64BIT_MSRS 0 | 101 | msr_efer_save_restore_bits(vcpu->guest_msrs[efer_offset]); |
95 | #define NR_BAD_MSRS 0 | 102 | } |
96 | #endif | ||
97 | 103 | ||
98 | static inline int is_page_fault(u32 intr_info) | 104 | static inline int is_page_fault(u32 intr_info) |
99 | { | 105 | { |
@@ -115,13 +121,23 @@ static inline int is_external_interrupt(u32 intr_info) | |||
115 | == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); | 121 | == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); |
116 | } | 122 | } |
117 | 123 | ||
118 | static struct vmx_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr) | 124 | static int __find_msr_index(struct kvm_vcpu *vcpu, u32 msr) |
119 | { | 125 | { |
120 | int i; | 126 | int i; |
121 | 127 | ||
122 | for (i = 0; i < vcpu->nmsrs; ++i) | 128 | for (i = 0; i < vcpu->nmsrs; ++i) |
123 | if (vcpu->guest_msrs[i].index == msr) | 129 | if (vcpu->guest_msrs[i].index == msr) |
124 | return &vcpu->guest_msrs[i]; | 130 | return i; |
131 | return -1; | ||
132 | } | ||
133 | |||
134 | static struct vmx_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr) | ||
135 | { | ||
136 | int i; | ||
137 | |||
138 | i = __find_msr_index(vcpu, msr); | ||
139 | if (i >= 0) | ||
140 | return &vcpu->guest_msrs[i]; | ||
125 | return NULL; | 141 | return NULL; |
126 | } | 142 | } |
127 | 143 | ||
@@ -147,6 +163,7 @@ static void __vcpu_clear(void *arg) | |||
147 | vmcs_clear(vcpu->vmcs); | 163 | vmcs_clear(vcpu->vmcs); |
148 | if (per_cpu(current_vmcs, cpu) == vcpu->vmcs) | 164 | if (per_cpu(current_vmcs, cpu) == vcpu->vmcs) |
149 | per_cpu(current_vmcs, cpu) = NULL; | 165 | per_cpu(current_vmcs, cpu) = NULL; |
166 | rdtscll(vcpu->host_tsc); | ||
150 | } | 167 | } |
151 | 168 | ||
152 | static void vcpu_clear(struct kvm_vcpu *vcpu) | 169 | static void vcpu_clear(struct kvm_vcpu *vcpu) |
@@ -234,6 +251,127 @@ static void vmcs_set_bits(unsigned long field, u32 mask) | |||
234 | vmcs_writel(field, vmcs_readl(field) | mask); | 251 | vmcs_writel(field, vmcs_readl(field) | mask); |
235 | } | 252 | } |
236 | 253 | ||
254 | static void update_exception_bitmap(struct kvm_vcpu *vcpu) | ||
255 | { | ||
256 | u32 eb; | ||
257 | |||
258 | eb = 1u << PF_VECTOR; | ||
259 | if (!vcpu->fpu_active) | ||
260 | eb |= 1u << NM_VECTOR; | ||
261 | if (vcpu->guest_debug.enabled) | ||
262 | eb |= 1u << 1; | ||
263 | if (vcpu->rmode.active) | ||
264 | eb = ~0; | ||
265 | vmcs_write32(EXCEPTION_BITMAP, eb); | ||
266 | } | ||
267 | |||
268 | static void reload_tss(void) | ||
269 | { | ||
270 | #ifndef CONFIG_X86_64 | ||
271 | |||
272 | /* | ||
273 | * VT restores TR but not its size. Useless. | ||
274 | */ | ||
275 | struct descriptor_table gdt; | ||
276 | struct segment_descriptor *descs; | ||
277 | |||
278 | get_gdt(&gdt); | ||
279 | descs = (void *)gdt.base; | ||
280 | descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ | ||
281 | load_TR_desc(); | ||
282 | #endif | ||
283 | } | ||
284 | |||
285 | static void load_transition_efer(struct kvm_vcpu *vcpu) | ||
286 | { | ||
287 | u64 trans_efer; | ||
288 | int efer_offset = vcpu->msr_offset_efer; | ||
289 | |||
290 | trans_efer = vcpu->host_msrs[efer_offset].data; | ||
291 | trans_efer &= ~EFER_SAVE_RESTORE_BITS; | ||
292 | trans_efer |= msr_efer_save_restore_bits( | ||
293 | vcpu->guest_msrs[efer_offset]); | ||
294 | wrmsrl(MSR_EFER, trans_efer); | ||
295 | vcpu->stat.efer_reload++; | ||
296 | } | ||
297 | |||
298 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) | ||
299 | { | ||
300 | struct vmx_host_state *hs = &vcpu->vmx_host_state; | ||
301 | |||
302 | if (hs->loaded) | ||
303 | return; | ||
304 | |||
305 | hs->loaded = 1; | ||
306 | /* | ||
307 | * Set host fs and gs selectors. Unfortunately, 22.2.3 does not | ||
308 | * allow segment selectors with cpl > 0 or ti == 1. | ||
309 | */ | ||
310 | hs->ldt_sel = read_ldt(); | ||
311 | hs->fs_gs_ldt_reload_needed = hs->ldt_sel; | ||
312 | hs->fs_sel = read_fs(); | ||
313 | if (!(hs->fs_sel & 7)) | ||
314 | vmcs_write16(HOST_FS_SELECTOR, hs->fs_sel); | ||
315 | else { | ||
316 | vmcs_write16(HOST_FS_SELECTOR, 0); | ||
317 | hs->fs_gs_ldt_reload_needed = 1; | ||
318 | } | ||
319 | hs->gs_sel = read_gs(); | ||
320 | if (!(hs->gs_sel & 7)) | ||
321 | vmcs_write16(HOST_GS_SELECTOR, hs->gs_sel); | ||
322 | else { | ||
323 | vmcs_write16(HOST_GS_SELECTOR, 0); | ||
324 | hs->fs_gs_ldt_reload_needed = 1; | ||
325 | } | ||
326 | |||
327 | #ifdef CONFIG_X86_64 | ||
328 | vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE)); | ||
329 | vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE)); | ||
330 | #else | ||
331 | vmcs_writel(HOST_FS_BASE, segment_base(hs->fs_sel)); | ||
332 | vmcs_writel(HOST_GS_BASE, segment_base(hs->gs_sel)); | ||
333 | #endif | ||
334 | |||
335 | #ifdef CONFIG_X86_64 | ||
336 | if (is_long_mode(vcpu)) { | ||
337 | save_msrs(vcpu->host_msrs + vcpu->msr_offset_kernel_gs_base, 1); | ||
338 | } | ||
339 | #endif | ||
340 | load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); | ||
341 | if (msr_efer_need_save_restore(vcpu)) | ||
342 | load_transition_efer(vcpu); | ||
343 | } | ||
344 | |||
345 | static void vmx_load_host_state(struct kvm_vcpu *vcpu) | ||
346 | { | ||
347 | struct vmx_host_state *hs = &vcpu->vmx_host_state; | ||
348 | |||
349 | if (!hs->loaded) | ||
350 | return; | ||
351 | |||
352 | hs->loaded = 0; | ||
353 | if (hs->fs_gs_ldt_reload_needed) { | ||
354 | load_ldt(hs->ldt_sel); | ||
355 | load_fs(hs->fs_sel); | ||
356 | /* | ||
357 | * If we have to reload gs, we must take care to | ||
358 | * preserve our gs base. | ||
359 | */ | ||
360 | local_irq_disable(); | ||
361 | load_gs(hs->gs_sel); | ||
362 | #ifdef CONFIG_X86_64 | ||
363 | wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); | ||
364 | #endif | ||
365 | local_irq_enable(); | ||
366 | |||
367 | reload_tss(); | ||
368 | } | ||
369 | save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); | ||
370 | load_msrs(vcpu->host_msrs, vcpu->save_nmsrs); | ||
371 | if (msr_efer_need_save_restore(vcpu)) | ||
372 | load_msrs(vcpu->host_msrs + vcpu->msr_offset_efer, 1); | ||
373 | } | ||
374 | |||
237 | /* | 375 | /* |
238 | * Switches to specified vcpu, until a matching vcpu_put(), but assumes | 376 | * Switches to specified vcpu, until a matching vcpu_put(), but assumes |
239 | * vcpu mutex is already taken. | 377 | * vcpu mutex is already taken. |
@@ -242,6 +380,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu) | |||
242 | { | 380 | { |
243 | u64 phys_addr = __pa(vcpu->vmcs); | 381 | u64 phys_addr = __pa(vcpu->vmcs); |
244 | int cpu; | 382 | int cpu; |
383 | u64 tsc_this, delta; | ||
245 | 384 | ||
246 | cpu = get_cpu(); | 385 | cpu = get_cpu(); |
247 | 386 | ||
@@ -275,15 +414,43 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu) | |||
275 | 414 | ||
276 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); | 415 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); |
277 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ | 416 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ |
417 | |||
418 | /* | ||
419 | * Make sure the time stamp counter is monotonous. | ||
420 | */ | ||
421 | rdtscll(tsc_this); | ||
422 | delta = vcpu->host_tsc - tsc_this; | ||
423 | vmcs_write64(TSC_OFFSET, vmcs_read64(TSC_OFFSET) + delta); | ||
278 | } | 424 | } |
279 | } | 425 | } |
280 | 426 | ||
281 | static void vmx_vcpu_put(struct kvm_vcpu *vcpu) | 427 | static void vmx_vcpu_put(struct kvm_vcpu *vcpu) |
282 | { | 428 | { |
429 | vmx_load_host_state(vcpu); | ||
283 | kvm_put_guest_fpu(vcpu); | 430 | kvm_put_guest_fpu(vcpu); |
284 | put_cpu(); | 431 | put_cpu(); |
285 | } | 432 | } |
286 | 433 | ||
434 | static void vmx_fpu_activate(struct kvm_vcpu *vcpu) | ||
435 | { | ||
436 | if (vcpu->fpu_active) | ||
437 | return; | ||
438 | vcpu->fpu_active = 1; | ||
439 | vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK); | ||
440 | if (vcpu->cr0 & CR0_TS_MASK) | ||
441 | vmcs_set_bits(GUEST_CR0, CR0_TS_MASK); | ||
442 | update_exception_bitmap(vcpu); | ||
443 | } | ||
444 | |||
445 | static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) | ||
446 | { | ||
447 | if (!vcpu->fpu_active) | ||
448 | return; | ||
449 | vcpu->fpu_active = 0; | ||
450 | vmcs_set_bits(GUEST_CR0, CR0_TS_MASK); | ||
451 | update_exception_bitmap(vcpu); | ||
452 | } | ||
453 | |||
287 | static void vmx_vcpu_decache(struct kvm_vcpu *vcpu) | 454 | static void vmx_vcpu_decache(struct kvm_vcpu *vcpu) |
288 | { | 455 | { |
289 | vcpu_clear(vcpu); | 456 | vcpu_clear(vcpu); |
@@ -332,41 +499,61 @@ static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code) | |||
332 | } | 499 | } |
333 | 500 | ||
334 | /* | 501 | /* |
502 | * Swap MSR entry in host/guest MSR entry array. | ||
503 | */ | ||
504 | void move_msr_up(struct kvm_vcpu *vcpu, int from, int to) | ||
505 | { | ||
506 | struct vmx_msr_entry tmp; | ||
507 | tmp = vcpu->guest_msrs[to]; | ||
508 | vcpu->guest_msrs[to] = vcpu->guest_msrs[from]; | ||
509 | vcpu->guest_msrs[from] = tmp; | ||
510 | tmp = vcpu->host_msrs[to]; | ||
511 | vcpu->host_msrs[to] = vcpu->host_msrs[from]; | ||
512 | vcpu->host_msrs[from] = tmp; | ||
513 | } | ||
514 | |||
515 | /* | ||
335 | * Set up the vmcs to automatically save and restore system | 516 | * Set up the vmcs to automatically save and restore system |
336 | * msrs. Don't touch the 64-bit msrs if the guest is in legacy | 517 | * msrs. Don't touch the 64-bit msrs if the guest is in legacy |
337 | * mode, as fiddling with msrs is very expensive. | 518 | * mode, as fiddling with msrs is very expensive. |
338 | */ | 519 | */ |
339 | static void setup_msrs(struct kvm_vcpu *vcpu) | 520 | static void setup_msrs(struct kvm_vcpu *vcpu) |
340 | { | 521 | { |
341 | int nr_skip, nr_good_msrs; | 522 | int save_nmsrs; |
342 | |||
343 | if (is_long_mode(vcpu)) | ||
344 | nr_skip = NR_BAD_MSRS; | ||
345 | else | ||
346 | nr_skip = NR_64BIT_MSRS; | ||
347 | nr_good_msrs = vcpu->nmsrs - nr_skip; | ||
348 | 523 | ||
349 | /* | 524 | save_nmsrs = 0; |
350 | * MSR_K6_STAR is only needed on long mode guests, and only | ||
351 | * if efer.sce is enabled. | ||
352 | */ | ||
353 | if (find_msr_entry(vcpu, MSR_K6_STAR)) { | ||
354 | --nr_good_msrs; | ||
355 | #ifdef CONFIG_X86_64 | 525 | #ifdef CONFIG_X86_64 |
356 | if (is_long_mode(vcpu) && (vcpu->shadow_efer & EFER_SCE)) | 526 | if (is_long_mode(vcpu)) { |
357 | ++nr_good_msrs; | 527 | int index; |
358 | #endif | 528 | |
529 | index = __find_msr_index(vcpu, MSR_SYSCALL_MASK); | ||
530 | if (index >= 0) | ||
531 | move_msr_up(vcpu, index, save_nmsrs++); | ||
532 | index = __find_msr_index(vcpu, MSR_LSTAR); | ||
533 | if (index >= 0) | ||
534 | move_msr_up(vcpu, index, save_nmsrs++); | ||
535 | index = __find_msr_index(vcpu, MSR_CSTAR); | ||
536 | if (index >= 0) | ||
537 | move_msr_up(vcpu, index, save_nmsrs++); | ||
538 | index = __find_msr_index(vcpu, MSR_KERNEL_GS_BASE); | ||
539 | if (index >= 0) | ||
540 | move_msr_up(vcpu, index, save_nmsrs++); | ||
541 | /* | ||
542 | * MSR_K6_STAR is only needed on long mode guests, and only | ||
543 | * if efer.sce is enabled. | ||
544 | */ | ||
545 | index = __find_msr_index(vcpu, MSR_K6_STAR); | ||
546 | if ((index >= 0) && (vcpu->shadow_efer & EFER_SCE)) | ||
547 | move_msr_up(vcpu, index, save_nmsrs++); | ||
359 | } | 548 | } |
549 | #endif | ||
550 | vcpu->save_nmsrs = save_nmsrs; | ||
360 | 551 | ||
361 | vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR, | 552 | #ifdef CONFIG_X86_64 |
362 | virt_to_phys(vcpu->guest_msrs + nr_skip)); | 553 | vcpu->msr_offset_kernel_gs_base = |
363 | vmcs_writel(VM_EXIT_MSR_STORE_ADDR, | 554 | __find_msr_index(vcpu, MSR_KERNEL_GS_BASE); |
364 | virt_to_phys(vcpu->guest_msrs + nr_skip)); | 555 | #endif |
365 | vmcs_writel(VM_EXIT_MSR_LOAD_ADDR, | 556 | vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER); |
366 | virt_to_phys(vcpu->host_msrs + nr_skip)); | ||
367 | vmcs_write32(VM_EXIT_MSR_STORE_COUNT, nr_good_msrs); /* 22.2.2 */ | ||
368 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */ | ||
369 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */ | ||
370 | } | 557 | } |
371 | 558 | ||
372 | /* | 559 | /* |
@@ -394,23 +581,6 @@ static void guest_write_tsc(u64 guest_tsc) | |||
394 | vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc); | 581 | vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc); |
395 | } | 582 | } |
396 | 583 | ||
397 | static void reload_tss(void) | ||
398 | { | ||
399 | #ifndef CONFIG_X86_64 | ||
400 | |||
401 | /* | ||
402 | * VT restores TR but not its size. Useless. | ||
403 | */ | ||
404 | struct descriptor_table gdt; | ||
405 | struct segment_descriptor *descs; | ||
406 | |||
407 | get_gdt(&gdt); | ||
408 | descs = (void *)gdt.base; | ||
409 | descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ | ||
410 | load_TR_desc(); | ||
411 | #endif | ||
412 | } | ||
413 | |||
414 | /* | 584 | /* |
415 | * Reads an msr value (of 'msr_index') into 'pdata'. | 585 | * Reads an msr value (of 'msr_index') into 'pdata'. |
416 | * Returns 0 on success, non-0 otherwise. | 586 | * Returns 0 on success, non-0 otherwise. |
@@ -470,10 +640,15 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
470 | static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | 640 | static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) |
471 | { | 641 | { |
472 | struct vmx_msr_entry *msr; | 642 | struct vmx_msr_entry *msr; |
643 | int ret = 0; | ||
644 | |||
473 | switch (msr_index) { | 645 | switch (msr_index) { |
474 | #ifdef CONFIG_X86_64 | 646 | #ifdef CONFIG_X86_64 |
475 | case MSR_EFER: | 647 | case MSR_EFER: |
476 | return kvm_set_msr_common(vcpu, msr_index, data); | 648 | ret = kvm_set_msr_common(vcpu, msr_index, data); |
649 | if (vcpu->vmx_host_state.loaded) | ||
650 | load_transition_efer(vcpu); | ||
651 | break; | ||
477 | case MSR_FS_BASE: | 652 | case MSR_FS_BASE: |
478 | vmcs_writel(GUEST_FS_BASE, data); | 653 | vmcs_writel(GUEST_FS_BASE, data); |
479 | break; | 654 | break; |
@@ -497,14 +672,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
497 | msr = find_msr_entry(vcpu, msr_index); | 672 | msr = find_msr_entry(vcpu, msr_index); |
498 | if (msr) { | 673 | if (msr) { |
499 | msr->data = data; | 674 | msr->data = data; |
675 | if (vcpu->vmx_host_state.loaded) | ||
676 | load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); | ||
500 | break; | 677 | break; |
501 | } | 678 | } |
502 | return kvm_set_msr_common(vcpu, msr_index, data); | 679 | ret = kvm_set_msr_common(vcpu, msr_index, data); |
503 | msr->data = data; | ||
504 | break; | ||
505 | } | 680 | } |
506 | 681 | ||
507 | return 0; | 682 | return ret; |
508 | } | 683 | } |
509 | 684 | ||
510 | /* | 685 | /* |
@@ -530,10 +705,8 @@ static void vcpu_put_rsp_rip(struct kvm_vcpu *vcpu) | |||
530 | static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) | 705 | static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) |
531 | { | 706 | { |
532 | unsigned long dr7 = 0x400; | 707 | unsigned long dr7 = 0x400; |
533 | u32 exception_bitmap; | ||
534 | int old_singlestep; | 708 | int old_singlestep; |
535 | 709 | ||
536 | exception_bitmap = vmcs_read32(EXCEPTION_BITMAP); | ||
537 | old_singlestep = vcpu->guest_debug.singlestep; | 710 | old_singlestep = vcpu->guest_debug.singlestep; |
538 | 711 | ||
539 | vcpu->guest_debug.enabled = dbg->enabled; | 712 | vcpu->guest_debug.enabled = dbg->enabled; |
@@ -549,13 +722,9 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) | |||
549 | dr7 |= 0 << (i*4+16); /* execution breakpoint */ | 722 | dr7 |= 0 << (i*4+16); /* execution breakpoint */ |
550 | } | 723 | } |
551 | 724 | ||
552 | exception_bitmap |= (1u << 1); /* Trap debug exceptions */ | ||
553 | |||
554 | vcpu->guest_debug.singlestep = dbg->singlestep; | 725 | vcpu->guest_debug.singlestep = dbg->singlestep; |
555 | } else { | 726 | } else |
556 | exception_bitmap &= ~(1u << 1); /* Ignore debug exceptions */ | ||
557 | vcpu->guest_debug.singlestep = 0; | 727 | vcpu->guest_debug.singlestep = 0; |
558 | } | ||
559 | 728 | ||
560 | if (old_singlestep && !vcpu->guest_debug.singlestep) { | 729 | if (old_singlestep && !vcpu->guest_debug.singlestep) { |
561 | unsigned long flags; | 730 | unsigned long flags; |
@@ -565,7 +734,7 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) | |||
565 | vmcs_writel(GUEST_RFLAGS, flags); | 734 | vmcs_writel(GUEST_RFLAGS, flags); |
566 | } | 735 | } |
567 | 736 | ||
568 | vmcs_write32(EXCEPTION_BITMAP, exception_bitmap); | 737 | update_exception_bitmap(vcpu); |
569 | vmcs_writel(GUEST_DR7, dr7); | 738 | vmcs_writel(GUEST_DR7, dr7); |
570 | 739 | ||
571 | return 0; | 740 | return 0; |
@@ -679,14 +848,6 @@ static __exit void hardware_unsetup(void) | |||
679 | free_kvm_area(); | 848 | free_kvm_area(); |
680 | } | 849 | } |
681 | 850 | ||
682 | static void update_exception_bitmap(struct kvm_vcpu *vcpu) | ||
683 | { | ||
684 | if (vcpu->rmode.active) | ||
685 | vmcs_write32(EXCEPTION_BITMAP, ~0); | ||
686 | else | ||
687 | vmcs_write32(EXCEPTION_BITMAP, 1 << PF_VECTOR); | ||
688 | } | ||
689 | |||
690 | static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save) | 851 | static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save) |
691 | { | 852 | { |
692 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 853 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
@@ -793,6 +954,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
793 | fix_rmode_seg(VCPU_SREG_DS, &vcpu->rmode.ds); | 954 | fix_rmode_seg(VCPU_SREG_DS, &vcpu->rmode.ds); |
794 | fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs); | 955 | fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs); |
795 | fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs); | 956 | fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs); |
957 | |||
958 | init_rmode_tss(vcpu->kvm); | ||
796 | } | 959 | } |
797 | 960 | ||
798 | #ifdef CONFIG_X86_64 | 961 | #ifdef CONFIG_X86_64 |
@@ -837,6 +1000,8 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | |||
837 | 1000 | ||
838 | static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 1001 | static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
839 | { | 1002 | { |
1003 | vmx_fpu_deactivate(vcpu); | ||
1004 | |||
840 | if (vcpu->rmode.active && (cr0 & CR0_PE_MASK)) | 1005 | if (vcpu->rmode.active && (cr0 & CR0_PE_MASK)) |
841 | enter_pmode(vcpu); | 1006 | enter_pmode(vcpu); |
842 | 1007 | ||
@@ -852,26 +1017,20 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
852 | } | 1017 | } |
853 | #endif | 1018 | #endif |
854 | 1019 | ||
855 | if (!(cr0 & CR0_TS_MASK)) { | ||
856 | vcpu->fpu_active = 1; | ||
857 | vmcs_clear_bits(EXCEPTION_BITMAP, CR0_TS_MASK); | ||
858 | } | ||
859 | |||
860 | vmcs_writel(CR0_READ_SHADOW, cr0); | 1020 | vmcs_writel(CR0_READ_SHADOW, cr0); |
861 | vmcs_writel(GUEST_CR0, | 1021 | vmcs_writel(GUEST_CR0, |
862 | (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); | 1022 | (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); |
863 | vcpu->cr0 = cr0; | 1023 | vcpu->cr0 = cr0; |
1024 | |||
1025 | if (!(cr0 & CR0_TS_MASK) || !(cr0 & CR0_PE_MASK)) | ||
1026 | vmx_fpu_activate(vcpu); | ||
864 | } | 1027 | } |
865 | 1028 | ||
866 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | 1029 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) |
867 | { | 1030 | { |
868 | vmcs_writel(GUEST_CR3, cr3); | 1031 | vmcs_writel(GUEST_CR3, cr3); |
869 | 1032 | if (vcpu->cr0 & CR0_PE_MASK) | |
870 | if (!(vcpu->cr0 & CR0_TS_MASK)) { | 1033 | vmx_fpu_deactivate(vcpu); |
871 | vcpu->fpu_active = 0; | ||
872 | vmcs_set_bits(GUEST_CR0, CR0_TS_MASK); | ||
873 | vmcs_set_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR); | ||
874 | } | ||
875 | } | 1034 | } |
876 | 1035 | ||
877 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 1036 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
@@ -937,23 +1096,11 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
937 | var->unusable = (ar >> 16) & 1; | 1096 | var->unusable = (ar >> 16) & 1; |
938 | } | 1097 | } |
939 | 1098 | ||
940 | static void vmx_set_segment(struct kvm_vcpu *vcpu, | 1099 | static u32 vmx_segment_access_rights(struct kvm_segment *var) |
941 | struct kvm_segment *var, int seg) | ||
942 | { | 1100 | { |
943 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | ||
944 | u32 ar; | 1101 | u32 ar; |
945 | 1102 | ||
946 | vmcs_writel(sf->base, var->base); | 1103 | if (var->unusable) |
947 | vmcs_write32(sf->limit, var->limit); | ||
948 | vmcs_write16(sf->selector, var->selector); | ||
949 | if (vcpu->rmode.active && var->s) { | ||
950 | /* | ||
951 | * Hack real-mode segments into vm86 compatibility. | ||
952 | */ | ||
953 | if (var->base == 0xffff0000 && var->selector == 0xf000) | ||
954 | vmcs_writel(sf->base, 0xf0000); | ||
955 | ar = 0xf3; | ||
956 | } else if (var->unusable) | ||
957 | ar = 1 << 16; | 1104 | ar = 1 << 16; |
958 | else { | 1105 | else { |
959 | ar = var->type & 15; | 1106 | ar = var->type & 15; |
@@ -967,6 +1114,35 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
967 | } | 1114 | } |
968 | if (ar == 0) /* a 0 value means unusable */ | 1115 | if (ar == 0) /* a 0 value means unusable */ |
969 | ar = AR_UNUSABLE_MASK; | 1116 | ar = AR_UNUSABLE_MASK; |
1117 | |||
1118 | return ar; | ||
1119 | } | ||
1120 | |||
1121 | static void vmx_set_segment(struct kvm_vcpu *vcpu, | ||
1122 | struct kvm_segment *var, int seg) | ||
1123 | { | ||
1124 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | ||
1125 | u32 ar; | ||
1126 | |||
1127 | if (vcpu->rmode.active && seg == VCPU_SREG_TR) { | ||
1128 | vcpu->rmode.tr.selector = var->selector; | ||
1129 | vcpu->rmode.tr.base = var->base; | ||
1130 | vcpu->rmode.tr.limit = var->limit; | ||
1131 | vcpu->rmode.tr.ar = vmx_segment_access_rights(var); | ||
1132 | return; | ||
1133 | } | ||
1134 | vmcs_writel(sf->base, var->base); | ||
1135 | vmcs_write32(sf->limit, var->limit); | ||
1136 | vmcs_write16(sf->selector, var->selector); | ||
1137 | if (vcpu->rmode.active && var->s) { | ||
1138 | /* | ||
1139 | * Hack real-mode segments into vm86 compatibility. | ||
1140 | */ | ||
1141 | if (var->base == 0xffff0000 && var->selector == 0xf000) | ||
1142 | vmcs_writel(sf->base, 0xf0000); | ||
1143 | ar = 0xf3; | ||
1144 | } else | ||
1145 | ar = vmx_segment_access_rights(var); | ||
970 | vmcs_write32(sf->ar_bytes, ar); | 1146 | vmcs_write32(sf->ar_bytes, ar); |
971 | } | 1147 | } |
972 | 1148 | ||
@@ -1018,16 +1194,16 @@ static int init_rmode_tss(struct kvm* kvm) | |||
1018 | } | 1194 | } |
1019 | 1195 | ||
1020 | page = kmap_atomic(p1, KM_USER0); | 1196 | page = kmap_atomic(p1, KM_USER0); |
1021 | memset(page, 0, PAGE_SIZE); | 1197 | clear_page(page); |
1022 | *(u16*)(page + 0x66) = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE; | 1198 | *(u16*)(page + 0x66) = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE; |
1023 | kunmap_atomic(page, KM_USER0); | 1199 | kunmap_atomic(page, KM_USER0); |
1024 | 1200 | ||
1025 | page = kmap_atomic(p2, KM_USER0); | 1201 | page = kmap_atomic(p2, KM_USER0); |
1026 | memset(page, 0, PAGE_SIZE); | 1202 | clear_page(page); |
1027 | kunmap_atomic(page, KM_USER0); | 1203 | kunmap_atomic(page, KM_USER0); |
1028 | 1204 | ||
1029 | page = kmap_atomic(p3, KM_USER0); | 1205 | page = kmap_atomic(p3, KM_USER0); |
1030 | memset(page, 0, PAGE_SIZE); | 1206 | clear_page(page); |
1031 | *(page + RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1) = ~0; | 1207 | *(page + RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1) = ~0; |
1032 | kunmap_atomic(page, KM_USER0); | 1208 | kunmap_atomic(page, KM_USER0); |
1033 | 1209 | ||
@@ -1066,7 +1242,7 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) | |||
1066 | struct descriptor_table dt; | 1242 | struct descriptor_table dt; |
1067 | int i; | 1243 | int i; |
1068 | int ret = 0; | 1244 | int ret = 0; |
1069 | extern asmlinkage void kvm_vmx_return(void); | 1245 | unsigned long kvm_vmx_return; |
1070 | 1246 | ||
1071 | if (!init_rmode_tss(vcpu->kvm)) { | 1247 | if (!init_rmode_tss(vcpu->kvm)) { |
1072 | ret = -ENOMEM; | 1248 | ret = -ENOMEM; |
@@ -1076,9 +1252,9 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) | |||
1076 | memset(vcpu->regs, 0, sizeof(vcpu->regs)); | 1252 | memset(vcpu->regs, 0, sizeof(vcpu->regs)); |
1077 | vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val(); | 1253 | vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val(); |
1078 | vcpu->cr8 = 0; | 1254 | vcpu->cr8 = 0; |
1079 | vcpu->apic_base = 0xfee00000 | | 1255 | vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; |
1080 | /*for vcpu 0*/ MSR_IA32_APICBASE_BSP | | 1256 | if (vcpu == &vcpu->kvm->vcpus[0]) |
1081 | MSR_IA32_APICBASE_ENABLE; | 1257 | vcpu->apic_base |= MSR_IA32_APICBASE_BSP; |
1082 | 1258 | ||
1083 | fx_init(vcpu); | 1259 | fx_init(vcpu); |
1084 | 1260 | ||
@@ -1129,8 +1305,8 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) | |||
1129 | vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); | 1305 | vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); |
1130 | 1306 | ||
1131 | /* I/O */ | 1307 | /* I/O */ |
1132 | vmcs_write64(IO_BITMAP_A, 0); | 1308 | vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a)); |
1133 | vmcs_write64(IO_BITMAP_B, 0); | 1309 | vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b)); |
1134 | 1310 | ||
1135 | guest_write_tsc(0); | 1311 | guest_write_tsc(0); |
1136 | 1312 | ||
@@ -1150,12 +1326,11 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) | |||
1150 | CPU_BASED_HLT_EXITING /* 20.6.2 */ | 1326 | CPU_BASED_HLT_EXITING /* 20.6.2 */ |
1151 | | CPU_BASED_CR8_LOAD_EXITING /* 20.6.2 */ | 1327 | | CPU_BASED_CR8_LOAD_EXITING /* 20.6.2 */ |
1152 | | CPU_BASED_CR8_STORE_EXITING /* 20.6.2 */ | 1328 | | CPU_BASED_CR8_STORE_EXITING /* 20.6.2 */ |
1153 | | CPU_BASED_UNCOND_IO_EXITING /* 20.6.2 */ | 1329 | | CPU_BASED_ACTIVATE_IO_BITMAP /* 20.6.2 */ |
1154 | | CPU_BASED_MOV_DR_EXITING | 1330 | | CPU_BASED_MOV_DR_EXITING |
1155 | | CPU_BASED_USE_TSC_OFFSETING /* 21.3 */ | 1331 | | CPU_BASED_USE_TSC_OFFSETING /* 21.3 */ |
1156 | ); | 1332 | ); |
1157 | 1333 | ||
1158 | vmcs_write32(EXCEPTION_BITMAP, 1 << PF_VECTOR); | ||
1159 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); | 1334 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); |
1160 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0); | 1335 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0); |
1161 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ | 1336 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ |
@@ -1185,8 +1360,11 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) | |||
1185 | get_idt(&dt); | 1360 | get_idt(&dt); |
1186 | vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ | 1361 | vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ |
1187 | 1362 | ||
1188 | 1363 | asm ("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); | |
1189 | vmcs_writel(HOST_RIP, (unsigned long)kvm_vmx_return); /* 22.2.5 */ | 1364 | vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ |
1365 | vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); | ||
1366 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); | ||
1367 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); | ||
1190 | 1368 | ||
1191 | rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); | 1369 | rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); |
1192 | vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); | 1370 | vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); |
@@ -1210,10 +1388,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) | |||
1210 | vcpu->host_msrs[j].reserved = 0; | 1388 | vcpu->host_msrs[j].reserved = 0; |
1211 | vcpu->host_msrs[j].data = data; | 1389 | vcpu->host_msrs[j].data = data; |
1212 | vcpu->guest_msrs[j] = vcpu->host_msrs[j]; | 1390 | vcpu->guest_msrs[j] = vcpu->host_msrs[j]; |
1213 | #ifdef CONFIG_X86_64 | ||
1214 | if (index == MSR_KERNEL_GS_BASE) | ||
1215 | msr_offset_kernel_gs_base = j; | ||
1216 | #endif | ||
1217 | ++vcpu->nmsrs; | 1391 | ++vcpu->nmsrs; |
1218 | } | 1392 | } |
1219 | 1393 | ||
@@ -1241,6 +1415,8 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) | |||
1241 | #ifdef CONFIG_X86_64 | 1415 | #ifdef CONFIG_X86_64 |
1242 | vmx_set_efer(vcpu, 0); | 1416 | vmx_set_efer(vcpu, 0); |
1243 | #endif | 1417 | #endif |
1418 | vmx_fpu_activate(vcpu); | ||
1419 | update_exception_bitmap(vcpu); | ||
1244 | 1420 | ||
1245 | return 0; | 1421 | return 0; |
1246 | 1422 | ||
@@ -1365,7 +1541,11 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
1365 | if (!vcpu->rmode.active) | 1541 | if (!vcpu->rmode.active) |
1366 | return 0; | 1542 | return 0; |
1367 | 1543 | ||
1368 | if (vec == GP_VECTOR && err_code == 0) | 1544 | /* |
1545 | * Instruction with address size override prefix opcode 0x67 | ||
1546 | * Cause the #SS fault with 0 error code in VM86 mode. | ||
1547 | */ | ||
1548 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) | ||
1369 | if (emulate_instruction(vcpu, NULL, 0, 0) == EMULATE_DONE) | 1549 | if (emulate_instruction(vcpu, NULL, 0, 0) == EMULATE_DONE) |
1370 | return 1; | 1550 | return 1; |
1371 | return 0; | 1551 | return 0; |
@@ -1400,10 +1580,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1400 | } | 1580 | } |
1401 | 1581 | ||
1402 | if (is_no_device(intr_info)) { | 1582 | if (is_no_device(intr_info)) { |
1403 | vcpu->fpu_active = 1; | 1583 | vmx_fpu_activate(vcpu); |
1404 | vmcs_clear_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR); | ||
1405 | if (!(vcpu->cr0 & CR0_TS_MASK)) | ||
1406 | vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK); | ||
1407 | return 1; | 1584 | return 1; |
1408 | } | 1585 | } |
1409 | 1586 | ||
@@ -1445,8 +1622,13 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1445 | 1622 | ||
1446 | if (vcpu->rmode.active && | 1623 | if (vcpu->rmode.active && |
1447 | handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK, | 1624 | handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK, |
1448 | error_code)) | 1625 | error_code)) { |
1626 | if (vcpu->halt_request) { | ||
1627 | vcpu->halt_request = 0; | ||
1628 | return kvm_emulate_halt(vcpu); | ||
1629 | } | ||
1449 | return 1; | 1630 | return 1; |
1631 | } | ||
1450 | 1632 | ||
1451 | if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) == (INTR_TYPE_EXCEPTION | 1)) { | 1633 | if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) == (INTR_TYPE_EXCEPTION | 1)) { |
1452 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 1634 | kvm_run->exit_reason = KVM_EXIT_DEBUG; |
@@ -1595,11 +1777,10 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1595 | break; | 1777 | break; |
1596 | case 2: /* clts */ | 1778 | case 2: /* clts */ |
1597 | vcpu_load_rsp_rip(vcpu); | 1779 | vcpu_load_rsp_rip(vcpu); |
1598 | vcpu->fpu_active = 1; | 1780 | vmx_fpu_deactivate(vcpu); |
1599 | vmcs_clear_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR); | ||
1600 | vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK); | ||
1601 | vcpu->cr0 &= ~CR0_TS_MASK; | 1781 | vcpu->cr0 &= ~CR0_TS_MASK; |
1602 | vmcs_writel(CR0_READ_SHADOW, vcpu->cr0); | 1782 | vmcs_writel(CR0_READ_SHADOW, vcpu->cr0); |
1783 | vmx_fpu_activate(vcpu); | ||
1603 | skip_emulated_instruction(vcpu); | 1784 | skip_emulated_instruction(vcpu); |
1604 | return 1; | 1785 | return 1; |
1605 | case 1: /*mov from cr*/ | 1786 | case 1: /*mov from cr*/ |
@@ -1734,12 +1915,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, | |||
1734 | static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1915 | static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
1735 | { | 1916 | { |
1736 | skip_emulated_instruction(vcpu); | 1917 | skip_emulated_instruction(vcpu); |
1737 | if (vcpu->irq_summary) | 1918 | return kvm_emulate_halt(vcpu); |
1738 | return 1; | ||
1739 | |||
1740 | kvm_run->exit_reason = KVM_EXIT_HLT; | ||
1741 | ++vcpu->stat.halt_exits; | ||
1742 | return 0; | ||
1743 | } | 1919 | } |
1744 | 1920 | ||
1745 | static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1921 | static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
@@ -1770,7 +1946,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | |||
1770 | }; | 1946 | }; |
1771 | 1947 | ||
1772 | static const int kvm_vmx_max_exit_handlers = | 1948 | static const int kvm_vmx_max_exit_handlers = |
1773 | sizeof(kvm_vmx_exit_handlers) / sizeof(*kvm_vmx_exit_handlers); | 1949 | ARRAY_SIZE(kvm_vmx_exit_handlers); |
1774 | 1950 | ||
1775 | /* | 1951 | /* |
1776 | * The guest has exited. See if we can fix it or if we need userspace | 1952 | * The guest has exited. See if we can fix it or if we need userspace |
@@ -1810,61 +1986,44 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, | |||
1810 | (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)); | 1986 | (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)); |
1811 | } | 1987 | } |
1812 | 1988 | ||
1989 | static void vmx_flush_tlb(struct kvm_vcpu *vcpu) | ||
1990 | { | ||
1991 | } | ||
1992 | |||
1813 | static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1993 | static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
1814 | { | 1994 | { |
1815 | u8 fail; | 1995 | u8 fail; |
1816 | u16 fs_sel, gs_sel, ldt_sel; | ||
1817 | int fs_gs_ldt_reload_needed; | ||
1818 | int r; | 1996 | int r; |
1819 | 1997 | ||
1820 | again: | 1998 | preempted: |
1821 | /* | 1999 | if (vcpu->guest_debug.enabled) |
1822 | * Set host fs and gs selectors. Unfortunately, 22.2.3 does not | 2000 | kvm_guest_debug_pre(vcpu); |
1823 | * allow segment selectors with cpl > 0 or ti == 1. | ||
1824 | */ | ||
1825 | fs_sel = read_fs(); | ||
1826 | gs_sel = read_gs(); | ||
1827 | ldt_sel = read_ldt(); | ||
1828 | fs_gs_ldt_reload_needed = (fs_sel & 7) | (gs_sel & 7) | ldt_sel; | ||
1829 | if (!fs_gs_ldt_reload_needed) { | ||
1830 | vmcs_write16(HOST_FS_SELECTOR, fs_sel); | ||
1831 | vmcs_write16(HOST_GS_SELECTOR, gs_sel); | ||
1832 | } else { | ||
1833 | vmcs_write16(HOST_FS_SELECTOR, 0); | ||
1834 | vmcs_write16(HOST_GS_SELECTOR, 0); | ||
1835 | } | ||
1836 | |||
1837 | #ifdef CONFIG_X86_64 | ||
1838 | vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE)); | ||
1839 | vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE)); | ||
1840 | #else | ||
1841 | vmcs_writel(HOST_FS_BASE, segment_base(fs_sel)); | ||
1842 | vmcs_writel(HOST_GS_BASE, segment_base(gs_sel)); | ||
1843 | #endif | ||
1844 | 2001 | ||
2002 | again: | ||
1845 | if (!vcpu->mmio_read_completed) | 2003 | if (!vcpu->mmio_read_completed) |
1846 | do_interrupt_requests(vcpu, kvm_run); | 2004 | do_interrupt_requests(vcpu, kvm_run); |
1847 | 2005 | ||
1848 | if (vcpu->guest_debug.enabled) | 2006 | vmx_save_host_state(vcpu); |
1849 | kvm_guest_debug_pre(vcpu); | ||
1850 | |||
1851 | kvm_load_guest_fpu(vcpu); | 2007 | kvm_load_guest_fpu(vcpu); |
1852 | 2008 | ||
2009 | r = kvm_mmu_reload(vcpu); | ||
2010 | if (unlikely(r)) | ||
2011 | goto out; | ||
2012 | |||
1853 | /* | 2013 | /* |
1854 | * Loading guest fpu may have cleared host cr0.ts | 2014 | * Loading guest fpu may have cleared host cr0.ts |
1855 | */ | 2015 | */ |
1856 | vmcs_writel(HOST_CR0, read_cr0()); | 2016 | vmcs_writel(HOST_CR0, read_cr0()); |
1857 | 2017 | ||
1858 | #ifdef CONFIG_X86_64 | 2018 | local_irq_disable(); |
1859 | if (is_long_mode(vcpu)) { | 2019 | |
1860 | save_msrs(vcpu->host_msrs + msr_offset_kernel_gs_base, 1); | 2020 | vcpu->guest_mode = 1; |
1861 | load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); | 2021 | if (vcpu->requests) |
1862 | } | 2022 | if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests)) |
1863 | #endif | 2023 | vmx_flush_tlb(vcpu); |
1864 | 2024 | ||
1865 | asm ( | 2025 | asm ( |
1866 | /* Store host registers */ | 2026 | /* Store host registers */ |
1867 | "pushf \n\t" | ||
1868 | #ifdef CONFIG_X86_64 | 2027 | #ifdef CONFIG_X86_64 |
1869 | "push %%rax; push %%rbx; push %%rdx;" | 2028 | "push %%rax; push %%rbx; push %%rdx;" |
1870 | "push %%rsi; push %%rdi; push %%rbp;" | 2029 | "push %%rsi; push %%rdi; push %%rbp;" |
@@ -1909,12 +2068,11 @@ again: | |||
1909 | "mov %c[rcx](%3), %%ecx \n\t" /* kills %3 (ecx) */ | 2068 | "mov %c[rcx](%3), %%ecx \n\t" /* kills %3 (ecx) */ |
1910 | #endif | 2069 | #endif |
1911 | /* Enter guest mode */ | 2070 | /* Enter guest mode */ |
1912 | "jne launched \n\t" | 2071 | "jne .Llaunched \n\t" |
1913 | ASM_VMX_VMLAUNCH "\n\t" | 2072 | ASM_VMX_VMLAUNCH "\n\t" |
1914 | "jmp kvm_vmx_return \n\t" | 2073 | "jmp .Lkvm_vmx_return \n\t" |
1915 | "launched: " ASM_VMX_VMRESUME "\n\t" | 2074 | ".Llaunched: " ASM_VMX_VMRESUME "\n\t" |
1916 | ".globl kvm_vmx_return \n\t" | 2075 | ".Lkvm_vmx_return: " |
1917 | "kvm_vmx_return: " | ||
1918 | /* Save guest registers, load host registers, keep flags */ | 2076 | /* Save guest registers, load host registers, keep flags */ |
1919 | #ifdef CONFIG_X86_64 | 2077 | #ifdef CONFIG_X86_64 |
1920 | "xchg %3, (%%rsp) \n\t" | 2078 | "xchg %3, (%%rsp) \n\t" |
@@ -1957,7 +2115,6 @@ again: | |||
1957 | "pop %%ecx; popa \n\t" | 2115 | "pop %%ecx; popa \n\t" |
1958 | #endif | 2116 | #endif |
1959 | "setbe %0 \n\t" | 2117 | "setbe %0 \n\t" |
1960 | "popf \n\t" | ||
1961 | : "=q" (fail) | 2118 | : "=q" (fail) |
1962 | : "r"(vcpu->launched), "d"((unsigned long)HOST_RSP), | 2119 | : "r"(vcpu->launched), "d"((unsigned long)HOST_RSP), |
1963 | "c"(vcpu), | 2120 | "c"(vcpu), |
@@ -1981,84 +2138,61 @@ again: | |||
1981 | [cr2]"i"(offsetof(struct kvm_vcpu, cr2)) | 2138 | [cr2]"i"(offsetof(struct kvm_vcpu, cr2)) |
1982 | : "cc", "memory" ); | 2139 | : "cc", "memory" ); |
1983 | 2140 | ||
1984 | /* | 2141 | vcpu->guest_mode = 0; |
1985 | * Reload segment selectors ASAP. (it's needed for a functional | 2142 | local_irq_enable(); |
1986 | * kernel: x86 relies on having __KERNEL_PDA in %fs and x86_64 | ||
1987 | * relies on having 0 in %gs for the CPU PDA to work.) | ||
1988 | */ | ||
1989 | if (fs_gs_ldt_reload_needed) { | ||
1990 | load_ldt(ldt_sel); | ||
1991 | load_fs(fs_sel); | ||
1992 | /* | ||
1993 | * If we have to reload gs, we must take care to | ||
1994 | * preserve our gs base. | ||
1995 | */ | ||
1996 | local_irq_disable(); | ||
1997 | load_gs(gs_sel); | ||
1998 | #ifdef CONFIG_X86_64 | ||
1999 | wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); | ||
2000 | #endif | ||
2001 | local_irq_enable(); | ||
2002 | 2143 | ||
2003 | reload_tss(); | ||
2004 | } | ||
2005 | ++vcpu->stat.exits; | 2144 | ++vcpu->stat.exits; |
2006 | 2145 | ||
2007 | #ifdef CONFIG_X86_64 | ||
2008 | if (is_long_mode(vcpu)) { | ||
2009 | save_msrs(vcpu->guest_msrs, NR_BAD_MSRS); | ||
2010 | load_msrs(vcpu->host_msrs, NR_BAD_MSRS); | ||
2011 | } | ||
2012 | #endif | ||
2013 | |||
2014 | vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; | 2146 | vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; |
2015 | 2147 | ||
2016 | asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); | 2148 | asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); |
2017 | 2149 | ||
2018 | if (fail) { | 2150 | if (unlikely(fail)) { |
2019 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 2151 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
2020 | kvm_run->fail_entry.hardware_entry_failure_reason | 2152 | kvm_run->fail_entry.hardware_entry_failure_reason |
2021 | = vmcs_read32(VM_INSTRUCTION_ERROR); | 2153 | = vmcs_read32(VM_INSTRUCTION_ERROR); |
2022 | r = 0; | 2154 | r = 0; |
2023 | } else { | 2155 | goto out; |
2024 | /* | 2156 | } |
2025 | * Profile KVM exit RIPs: | 2157 | /* |
2026 | */ | 2158 | * Profile KVM exit RIPs: |
2027 | if (unlikely(prof_on == KVM_PROFILING)) | 2159 | */ |
2028 | profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); | 2160 | if (unlikely(prof_on == KVM_PROFILING)) |
2029 | 2161 | profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); | |
2030 | vcpu->launched = 1; | 2162 | |
2031 | r = kvm_handle_exit(kvm_run, vcpu); | 2163 | vcpu->launched = 1; |
2032 | if (r > 0) { | 2164 | r = kvm_handle_exit(kvm_run, vcpu); |
2033 | /* Give scheduler a change to reschedule. */ | 2165 | if (r > 0) { |
2034 | if (signal_pending(current)) { | 2166 | /* Give scheduler a change to reschedule. */ |
2035 | ++vcpu->stat.signal_exits; | 2167 | if (signal_pending(current)) { |
2036 | post_kvm_run_save(vcpu, kvm_run); | 2168 | r = -EINTR; |
2037 | kvm_run->exit_reason = KVM_EXIT_INTR; | 2169 | kvm_run->exit_reason = KVM_EXIT_INTR; |
2038 | return -EINTR; | 2170 | ++vcpu->stat.signal_exits; |
2039 | } | 2171 | goto out; |
2040 | 2172 | } | |
2041 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { | 2173 | |
2042 | ++vcpu->stat.request_irq_exits; | 2174 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { |
2043 | post_kvm_run_save(vcpu, kvm_run); | 2175 | r = -EINTR; |
2044 | kvm_run->exit_reason = KVM_EXIT_INTR; | 2176 | kvm_run->exit_reason = KVM_EXIT_INTR; |
2045 | return -EINTR; | 2177 | ++vcpu->stat.request_irq_exits; |
2046 | } | 2178 | goto out; |
2047 | 2179 | } | |
2048 | kvm_resched(vcpu); | 2180 | if (!need_resched()) { |
2181 | ++vcpu->stat.light_exits; | ||
2049 | goto again; | 2182 | goto again; |
2050 | } | 2183 | } |
2051 | } | 2184 | } |
2052 | 2185 | ||
2186 | out: | ||
2187 | if (r > 0) { | ||
2188 | kvm_resched(vcpu); | ||
2189 | goto preempted; | ||
2190 | } | ||
2191 | |||
2053 | post_kvm_run_save(vcpu, kvm_run); | 2192 | post_kvm_run_save(vcpu, kvm_run); |
2054 | return r; | 2193 | return r; |
2055 | } | 2194 | } |
2056 | 2195 | ||
2057 | static void vmx_flush_tlb(struct kvm_vcpu *vcpu) | ||
2058 | { | ||
2059 | vmcs_writel(GUEST_CR3, vmcs_readl(GUEST_CR3)); | ||
2060 | } | ||
2061 | |||
2062 | static void vmx_inject_page_fault(struct kvm_vcpu *vcpu, | 2196 | static void vmx_inject_page_fault(struct kvm_vcpu *vcpu, |
2063 | unsigned long addr, | 2197 | unsigned long addr, |
2064 | u32 err_code) | 2198 | u32 err_code) |
@@ -2122,7 +2256,6 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) | |||
2122 | vmcs_clear(vmcs); | 2256 | vmcs_clear(vmcs); |
2123 | vcpu->vmcs = vmcs; | 2257 | vcpu->vmcs = vmcs; |
2124 | vcpu->launched = 0; | 2258 | vcpu->launched = 0; |
2125 | vcpu->fpu_active = 1; | ||
2126 | 2259 | ||
2127 | return 0; | 2260 | return 0; |
2128 | 2261 | ||
@@ -2188,11 +2321,50 @@ static struct kvm_arch_ops vmx_arch_ops = { | |||
2188 | 2321 | ||
2189 | static int __init vmx_init(void) | 2322 | static int __init vmx_init(void) |
2190 | { | 2323 | { |
2191 | return kvm_init_arch(&vmx_arch_ops, THIS_MODULE); | 2324 | void *iova; |
2325 | int r; | ||
2326 | |||
2327 | vmx_io_bitmap_a = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); | ||
2328 | if (!vmx_io_bitmap_a) | ||
2329 | return -ENOMEM; | ||
2330 | |||
2331 | vmx_io_bitmap_b = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); | ||
2332 | if (!vmx_io_bitmap_b) { | ||
2333 | r = -ENOMEM; | ||
2334 | goto out; | ||
2335 | } | ||
2336 | |||
2337 | /* | ||
2338 | * Allow direct access to the PC debug port (it is often used for I/O | ||
2339 | * delays, but the vmexits simply slow things down). | ||
2340 | */ | ||
2341 | iova = kmap(vmx_io_bitmap_a); | ||
2342 | memset(iova, 0xff, PAGE_SIZE); | ||
2343 | clear_bit(0x80, iova); | ||
2344 | kunmap(vmx_io_bitmap_a); | ||
2345 | |||
2346 | iova = kmap(vmx_io_bitmap_b); | ||
2347 | memset(iova, 0xff, PAGE_SIZE); | ||
2348 | kunmap(vmx_io_bitmap_b); | ||
2349 | |||
2350 | r = kvm_init_arch(&vmx_arch_ops, THIS_MODULE); | ||
2351 | if (r) | ||
2352 | goto out1; | ||
2353 | |||
2354 | return 0; | ||
2355 | |||
2356 | out1: | ||
2357 | __free_page(vmx_io_bitmap_b); | ||
2358 | out: | ||
2359 | __free_page(vmx_io_bitmap_a); | ||
2360 | return r; | ||
2192 | } | 2361 | } |
2193 | 2362 | ||
2194 | static void __exit vmx_exit(void) | 2363 | static void __exit vmx_exit(void) |
2195 | { | 2364 | { |
2365 | __free_page(vmx_io_bitmap_b); | ||
2366 | __free_page(vmx_io_bitmap_a); | ||
2367 | |||
2196 | kvm_exit_arch(); | 2368 | kvm_exit_arch(); |
2197 | } | 2369 | } |
2198 | 2370 | ||