diff options
| -rw-r--r-- | drivers/kvm/kvm.h | 100 | ||||
| -rw-r--r-- | drivers/kvm/kvm_main.c | 792 | ||||
| -rw-r--r-- | drivers/kvm/kvm_svm.h | 13 | ||||
| -rw-r--r-- | drivers/kvm/kvm_vmx.h | 14 | ||||
| -rw-r--r-- | drivers/kvm/mmu.c | 154 | ||||
| -rw-r--r-- | drivers/kvm/paging_tmpl.h | 12 | ||||
| -rw-r--r-- | drivers/kvm/svm.c | 197 | ||||
| -rw-r--r-- | drivers/kvm/svm.h | 6 | ||||
| -rw-r--r-- | drivers/kvm/vmx.c | 273 | ||||
| -rw-r--r-- | drivers/kvm/x86_emulate.c | 51 | ||||
| -rw-r--r-- | drivers/kvm/x86_emulate.h | 32 | ||||
| -rw-r--r-- | include/linux/Kbuild | 1 | ||||
| -rw-r--r-- | include/linux/kvm.h | 133 | ||||
| -rw-r--r-- | include/linux/miscdevice.h | 1 |
14 files changed, 1301 insertions, 478 deletions
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 0d122bf889db..41634fde8e13 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h | |||
| @@ -51,16 +51,19 @@ | |||
| 51 | #define UNMAPPED_GVA (~(gpa_t)0) | 51 | #define UNMAPPED_GVA (~(gpa_t)0) |
| 52 | 52 | ||
| 53 | #define KVM_MAX_VCPUS 1 | 53 | #define KVM_MAX_VCPUS 1 |
| 54 | #define KVM_ALIAS_SLOTS 4 | ||
| 54 | #define KVM_MEMORY_SLOTS 4 | 55 | #define KVM_MEMORY_SLOTS 4 |
| 55 | #define KVM_NUM_MMU_PAGES 256 | 56 | #define KVM_NUM_MMU_PAGES 256 |
| 56 | #define KVM_MIN_FREE_MMU_PAGES 5 | 57 | #define KVM_MIN_FREE_MMU_PAGES 5 |
| 57 | #define KVM_REFILL_PAGES 25 | 58 | #define KVM_REFILL_PAGES 25 |
| 59 | #define KVM_MAX_CPUID_ENTRIES 40 | ||
| 58 | 60 | ||
| 59 | #define FX_IMAGE_SIZE 512 | 61 | #define FX_IMAGE_SIZE 512 |
| 60 | #define FX_IMAGE_ALIGN 16 | 62 | #define FX_IMAGE_ALIGN 16 |
| 61 | #define FX_BUF_SIZE (2 * FX_IMAGE_SIZE + FX_IMAGE_ALIGN) | 63 | #define FX_BUF_SIZE (2 * FX_IMAGE_SIZE + FX_IMAGE_ALIGN) |
| 62 | 64 | ||
| 63 | #define DE_VECTOR 0 | 65 | #define DE_VECTOR 0 |
| 66 | #define NM_VECTOR 7 | ||
| 64 | #define DF_VECTOR 8 | 67 | #define DF_VECTOR 8 |
| 65 | #define TS_VECTOR 10 | 68 | #define TS_VECTOR 10 |
| 66 | #define NP_VECTOR 11 | 69 | #define NP_VECTOR 11 |
| @@ -73,6 +76,8 @@ | |||
| 73 | 76 | ||
| 74 | #define IOPL_SHIFT 12 | 77 | #define IOPL_SHIFT 12 |
| 75 | 78 | ||
| 79 | #define KVM_PIO_PAGE_OFFSET 1 | ||
| 80 | |||
| 76 | /* | 81 | /* |
| 77 | * Address types: | 82 | * Address types: |
| 78 | * | 83 | * |
| @@ -106,6 +111,7 @@ struct kvm_pte_chain { | |||
| 106 | * bits 4:7 - page table level for this shadow (1-4) | 111 | * bits 4:7 - page table level for this shadow (1-4) |
| 107 | * bits 8:9 - page table quadrant for 2-level guests | 112 | * bits 8:9 - page table quadrant for 2-level guests |
| 108 | * bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode) | 113 | * bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode) |
| 114 | * bits 17:18 - "access" - the user and writable bits of a huge page pde | ||
| 109 | */ | 115 | */ |
| 110 | union kvm_mmu_page_role { | 116 | union kvm_mmu_page_role { |
| 111 | unsigned word; | 117 | unsigned word; |
| @@ -115,6 +121,7 @@ union kvm_mmu_page_role { | |||
| 115 | unsigned quadrant : 2; | 121 | unsigned quadrant : 2; |
| 116 | unsigned pad_for_nice_hex_output : 6; | 122 | unsigned pad_for_nice_hex_output : 6; |
| 117 | unsigned metaphysical : 1; | 123 | unsigned metaphysical : 1; |
| 124 | unsigned hugepage_access : 2; | ||
| 118 | }; | 125 | }; |
| 119 | }; | 126 | }; |
| 120 | 127 | ||
| @@ -133,7 +140,6 @@ struct kvm_mmu_page { | |||
| 133 | unsigned long slot_bitmap; /* One bit set per slot which has memory | 140 | unsigned long slot_bitmap; /* One bit set per slot which has memory |
| 134 | * in this shadow page. | 141 | * in this shadow page. |
| 135 | */ | 142 | */ |
| 136 | int global; /* Set if all ptes in this page are global */ | ||
| 137 | int multimapped; /* More than one parent_pte? */ | 143 | int multimapped; /* More than one parent_pte? */ |
| 138 | int root_count; /* Currently serving as active root */ | 144 | int root_count; /* Currently serving as active root */ |
| 139 | union { | 145 | union { |
| @@ -219,6 +225,34 @@ enum { | |||
| 219 | VCPU_SREG_LDTR, | 225 | VCPU_SREG_LDTR, |
| 220 | }; | 226 | }; |
| 221 | 227 | ||
| 228 | struct kvm_pio_request { | ||
| 229 | unsigned long count; | ||
| 230 | int cur_count; | ||
| 231 | struct page *guest_pages[2]; | ||
| 232 | unsigned guest_page_offset; | ||
| 233 | int in; | ||
| 234 | int size; | ||
| 235 | int string; | ||
| 236 | int down; | ||
| 237 | int rep; | ||
| 238 | }; | ||
| 239 | |||
| 240 | struct kvm_stat { | ||
| 241 | u32 pf_fixed; | ||
| 242 | u32 pf_guest; | ||
| 243 | u32 tlb_flush; | ||
| 244 | u32 invlpg; | ||
| 245 | |||
| 246 | u32 exits; | ||
| 247 | u32 io_exits; | ||
| 248 | u32 mmio_exits; | ||
| 249 | u32 signal_exits; | ||
| 250 | u32 irq_window_exits; | ||
| 251 | u32 halt_exits; | ||
| 252 | u32 request_irq_exits; | ||
| 253 | u32 irq_exits; | ||
| 254 | }; | ||
| 255 | |||
| 222 | struct kvm_vcpu { | 256 | struct kvm_vcpu { |
| 223 | struct kvm *kvm; | 257 | struct kvm *kvm; |
| 224 | union { | 258 | union { |
| @@ -228,6 +262,8 @@ struct kvm_vcpu { | |||
| 228 | struct mutex mutex; | 262 | struct mutex mutex; |
| 229 | int cpu; | 263 | int cpu; |
| 230 | int launched; | 264 | int launched; |
| 265 | u64 host_tsc; | ||
| 266 | struct kvm_run *run; | ||
| 231 | int interrupt_window_open; | 267 | int interrupt_window_open; |
| 232 | unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ | 268 | unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ |
| 233 | #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long) | 269 | #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long) |
| @@ -266,6 +302,7 @@ struct kvm_vcpu { | |||
| 266 | char fx_buf[FX_BUF_SIZE]; | 302 | char fx_buf[FX_BUF_SIZE]; |
| 267 | char *host_fx_image; | 303 | char *host_fx_image; |
| 268 | char *guest_fx_image; | 304 | char *guest_fx_image; |
| 305 | int fpu_active; | ||
| 269 | 306 | ||
| 270 | int mmio_needed; | 307 | int mmio_needed; |
| 271 | int mmio_read_completed; | 308 | int mmio_read_completed; |
| @@ -273,6 +310,14 @@ struct kvm_vcpu { | |||
| 273 | int mmio_size; | 310 | int mmio_size; |
| 274 | unsigned char mmio_data[8]; | 311 | unsigned char mmio_data[8]; |
| 275 | gpa_t mmio_phys_addr; | 312 | gpa_t mmio_phys_addr; |
| 313 | gva_t mmio_fault_cr2; | ||
| 314 | struct kvm_pio_request pio; | ||
| 315 | void *pio_data; | ||
| 316 | |||
| 317 | int sigset_active; | ||
| 318 | sigset_t sigset; | ||
| 319 | |||
| 320 | struct kvm_stat stat; | ||
| 276 | 321 | ||
| 277 | struct { | 322 | struct { |
| 278 | int active; | 323 | int active; |
| @@ -284,6 +329,15 @@ struct kvm_vcpu { | |||
| 284 | u32 ar; | 329 | u32 ar; |
| 285 | } tr, es, ds, fs, gs; | 330 | } tr, es, ds, fs, gs; |
| 286 | } rmode; | 331 | } rmode; |
| 332 | |||
| 333 | int cpuid_nent; | ||
| 334 | struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES]; | ||
| 335 | }; | ||
| 336 | |||
| 337 | struct kvm_mem_alias { | ||
| 338 | gfn_t base_gfn; | ||
| 339 | unsigned long npages; | ||
| 340 | gfn_t target_gfn; | ||
| 287 | }; | 341 | }; |
| 288 | 342 | ||
| 289 | struct kvm_memory_slot { | 343 | struct kvm_memory_slot { |
| @@ -296,6 +350,8 @@ struct kvm_memory_slot { | |||
| 296 | 350 | ||
| 297 | struct kvm { | 351 | struct kvm { |
| 298 | spinlock_t lock; /* protects everything except vcpus */ | 352 | spinlock_t lock; /* protects everything except vcpus */ |
| 353 | int naliases; | ||
| 354 | struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; | ||
| 299 | int nmemslots; | 355 | int nmemslots; |
| 300 | struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS]; | 356 | struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS]; |
| 301 | /* | 357 | /* |
| @@ -312,22 +368,6 @@ struct kvm { | |||
| 312 | struct file *filp; | 368 | struct file *filp; |
| 313 | }; | 369 | }; |
| 314 | 370 | ||
| 315 | struct kvm_stat { | ||
| 316 | u32 pf_fixed; | ||
| 317 | u32 pf_guest; | ||
| 318 | u32 tlb_flush; | ||
| 319 | u32 invlpg; | ||
| 320 | |||
| 321 | u32 exits; | ||
| 322 | u32 io_exits; | ||
| 323 | u32 mmio_exits; | ||
| 324 | u32 signal_exits; | ||
| 325 | u32 irq_window_exits; | ||
| 326 | u32 halt_exits; | ||
| 327 | u32 request_irq_exits; | ||
| 328 | u32 irq_exits; | ||
| 329 | }; | ||
| 330 | |||
| 331 | struct descriptor_table { | 371 | struct descriptor_table { |
| 332 | u16 limit; | 372 | u16 limit; |
| 333 | unsigned long base; | 373 | unsigned long base; |
| @@ -358,10 +398,8 @@ struct kvm_arch_ops { | |||
| 358 | void (*set_segment)(struct kvm_vcpu *vcpu, | 398 | void (*set_segment)(struct kvm_vcpu *vcpu, |
| 359 | struct kvm_segment *var, int seg); | 399 | struct kvm_segment *var, int seg); |
| 360 | void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); | 400 | void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); |
| 361 | void (*decache_cr0_cr4_guest_bits)(struct kvm_vcpu *vcpu); | 401 | void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); |
| 362 | void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); | 402 | void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); |
| 363 | void (*set_cr0_no_modeswitch)(struct kvm_vcpu *vcpu, | ||
| 364 | unsigned long cr0); | ||
| 365 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); | 403 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); |
| 366 | void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); | 404 | void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); |
| 367 | void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); | 405 | void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); |
| @@ -391,7 +429,6 @@ struct kvm_arch_ops { | |||
| 391 | unsigned char *hypercall_addr); | 429 | unsigned char *hypercall_addr); |
| 392 | }; | 430 | }; |
| 393 | 431 | ||
| 394 | extern struct kvm_stat kvm_stat; | ||
| 395 | extern struct kvm_arch_ops *kvm_arch_ops; | 432 | extern struct kvm_arch_ops *kvm_arch_ops; |
| 396 | 433 | ||
| 397 | #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt) | 434 | #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt) |
| @@ -400,28 +437,29 @@ extern struct kvm_arch_ops *kvm_arch_ops; | |||
| 400 | int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module); | 437 | int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module); |
| 401 | void kvm_exit_arch(void); | 438 | void kvm_exit_arch(void); |
| 402 | 439 | ||
| 440 | int kvm_mmu_module_init(void); | ||
| 441 | void kvm_mmu_module_exit(void); | ||
| 442 | |||
| 403 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu); | 443 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu); |
| 404 | int kvm_mmu_create(struct kvm_vcpu *vcpu); | 444 | int kvm_mmu_create(struct kvm_vcpu *vcpu); |
| 405 | int kvm_mmu_setup(struct kvm_vcpu *vcpu); | 445 | int kvm_mmu_setup(struct kvm_vcpu *vcpu); |
| 406 | 446 | ||
| 407 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); | 447 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); |
| 408 | void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot); | 448 | void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot); |
| 449 | void kvm_mmu_zap_all(struct kvm_vcpu *vcpu); | ||
| 409 | 450 | ||
| 410 | hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa); | 451 | hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa); |
| 411 | #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) | 452 | #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) |
| 412 | #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) | 453 | #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) |
| 413 | static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } | 454 | static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } |
| 414 | hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva); | 455 | hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva); |
| 456 | struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); | ||
| 415 | 457 | ||
| 416 | void kvm_emulator_want_group7_invlpg(void); | 458 | void kvm_emulator_want_group7_invlpg(void); |
| 417 | 459 | ||
| 418 | extern hpa_t bad_page_address; | 460 | extern hpa_t bad_page_address; |
| 419 | 461 | ||
| 420 | static inline struct page *gfn_to_page(struct kvm_memory_slot *slot, gfn_t gfn) | 462 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); |
| 421 | { | ||
| 422 | return slot->phys_mem[gfn - slot->base_gfn]; | ||
| 423 | } | ||
| 424 | |||
| 425 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); | 463 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); |
| 426 | void mark_page_dirty(struct kvm *kvm, gfn_t gfn); | 464 | void mark_page_dirty(struct kvm *kvm, gfn_t gfn); |
| 427 | 465 | ||
| @@ -444,6 +482,10 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, | |||
| 444 | 482 | ||
| 445 | struct x86_emulate_ctxt; | 483 | struct x86_emulate_ctxt; |
| 446 | 484 | ||
| 485 | int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | ||
| 486 | int size, unsigned long count, int string, int down, | ||
| 487 | gva_t address, int rep, unsigned port); | ||
| 488 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); | ||
| 447 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); | 489 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); |
| 448 | int emulate_clts(struct kvm_vcpu *vcpu); | 490 | int emulate_clts(struct kvm_vcpu *vcpu); |
| 449 | int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, | 491 | int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, |
| @@ -493,12 +535,6 @@ static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | |||
| 493 | return vcpu->mmu.page_fault(vcpu, gva, error_code); | 535 | return vcpu->mmu.page_fault(vcpu, gva, error_code); |
| 494 | } | 536 | } |
| 495 | 537 | ||
| 496 | static inline struct page *_gfn_to_page(struct kvm *kvm, gfn_t gfn) | ||
| 497 | { | ||
| 498 | struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); | ||
| 499 | return (slot) ? slot->phys_mem[gfn - slot->base_gfn] : NULL; | ||
| 500 | } | ||
| 501 | |||
| 502 | static inline int is_long_mode(struct kvm_vcpu *vcpu) | 538 | static inline int is_long_mode(struct kvm_vcpu *vcpu) |
| 503 | { | 539 | { |
| 504 | #ifdef CONFIG_X86_64 | 540 | #ifdef CONFIG_X86_64 |
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index dc7a8c78cbf9..c8b8cfa332bb 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c | |||
| @@ -51,27 +51,27 @@ static DEFINE_SPINLOCK(kvm_lock); | |||
| 51 | static LIST_HEAD(vm_list); | 51 | static LIST_HEAD(vm_list); |
| 52 | 52 | ||
| 53 | struct kvm_arch_ops *kvm_arch_ops; | 53 | struct kvm_arch_ops *kvm_arch_ops; |
| 54 | struct kvm_stat kvm_stat; | 54 | |
| 55 | EXPORT_SYMBOL_GPL(kvm_stat); | 55 | #define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x) |
| 56 | 56 | ||
| 57 | static struct kvm_stats_debugfs_item { | 57 | static struct kvm_stats_debugfs_item { |
| 58 | const char *name; | 58 | const char *name; |
| 59 | u32 *data; | 59 | int offset; |
| 60 | struct dentry *dentry; | 60 | struct dentry *dentry; |
| 61 | } debugfs_entries[] = { | 61 | } debugfs_entries[] = { |
| 62 | { "pf_fixed", &kvm_stat.pf_fixed }, | 62 | { "pf_fixed", STAT_OFFSET(pf_fixed) }, |
| 63 | { "pf_guest", &kvm_stat.pf_guest }, | 63 | { "pf_guest", STAT_OFFSET(pf_guest) }, |
| 64 | { "tlb_flush", &kvm_stat.tlb_flush }, | 64 | { "tlb_flush", STAT_OFFSET(tlb_flush) }, |
| 65 | { "invlpg", &kvm_stat.invlpg }, | 65 | { "invlpg", STAT_OFFSET(invlpg) }, |
| 66 | { "exits", &kvm_stat.exits }, | 66 | { "exits", STAT_OFFSET(exits) }, |
| 67 | { "io_exits", &kvm_stat.io_exits }, | 67 | { "io_exits", STAT_OFFSET(io_exits) }, |
| 68 | { "mmio_exits", &kvm_stat.mmio_exits }, | 68 | { "mmio_exits", STAT_OFFSET(mmio_exits) }, |
| 69 | { "signal_exits", &kvm_stat.signal_exits }, | 69 | { "signal_exits", STAT_OFFSET(signal_exits) }, |
| 70 | { "irq_window", &kvm_stat.irq_window_exits }, | 70 | { "irq_window", STAT_OFFSET(irq_window_exits) }, |
| 71 | { "halt_exits", &kvm_stat.halt_exits }, | 71 | { "halt_exits", STAT_OFFSET(halt_exits) }, |
| 72 | { "request_irq", &kvm_stat.request_irq_exits }, | 72 | { "request_irq", STAT_OFFSET(request_irq_exits) }, |
| 73 | { "irq_exits", &kvm_stat.irq_exits }, | 73 | { "irq_exits", STAT_OFFSET(irq_exits) }, |
| 74 | { NULL, NULL } | 74 | { NULL } |
| 75 | }; | 75 | }; |
| 76 | 76 | ||
| 77 | static struct dentry *debugfs_dir; | 77 | static struct dentry *debugfs_dir; |
| @@ -346,6 +346,17 @@ static void kvm_free_physmem(struct kvm *kvm) | |||
| 346 | kvm_free_physmem_slot(&kvm->memslots[i], NULL); | 346 | kvm_free_physmem_slot(&kvm->memslots[i], NULL); |
| 347 | } | 347 | } |
| 348 | 348 | ||
| 349 | static void free_pio_guest_pages(struct kvm_vcpu *vcpu) | ||
| 350 | { | ||
| 351 | int i; | ||
| 352 | |||
| 353 | for (i = 0; i < 2; ++i) | ||
| 354 | if (vcpu->pio.guest_pages[i]) { | ||
| 355 | __free_page(vcpu->pio.guest_pages[i]); | ||
| 356 | vcpu->pio.guest_pages[i] = NULL; | ||
| 357 | } | ||
| 358 | } | ||
| 359 | |||
| 349 | static void kvm_free_vcpu(struct kvm_vcpu *vcpu) | 360 | static void kvm_free_vcpu(struct kvm_vcpu *vcpu) |
| 350 | { | 361 | { |
| 351 | if (!vcpu->vmcs) | 362 | if (!vcpu->vmcs) |
| @@ -355,6 +366,11 @@ static void kvm_free_vcpu(struct kvm_vcpu *vcpu) | |||
| 355 | kvm_mmu_destroy(vcpu); | 366 | kvm_mmu_destroy(vcpu); |
| 356 | vcpu_put(vcpu); | 367 | vcpu_put(vcpu); |
| 357 | kvm_arch_ops->vcpu_free(vcpu); | 368 | kvm_arch_ops->vcpu_free(vcpu); |
| 369 | free_page((unsigned long)vcpu->run); | ||
| 370 | vcpu->run = NULL; | ||
| 371 | free_page((unsigned long)vcpu->pio_data); | ||
| 372 | vcpu->pio_data = NULL; | ||
| 373 | free_pio_guest_pages(vcpu); | ||
| 358 | } | 374 | } |
| 359 | 375 | ||
| 360 | static void kvm_free_vcpus(struct kvm *kvm) | 376 | static void kvm_free_vcpus(struct kvm *kvm) |
| @@ -404,12 +420,12 @@ static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
| 404 | u64 pdpte; | 420 | u64 pdpte; |
| 405 | u64 *pdpt; | 421 | u64 *pdpt; |
| 406 | int ret; | 422 | int ret; |
| 407 | struct kvm_memory_slot *memslot; | 423 | struct page *page; |
| 408 | 424 | ||
| 409 | spin_lock(&vcpu->kvm->lock); | 425 | spin_lock(&vcpu->kvm->lock); |
| 410 | memslot = gfn_to_memslot(vcpu->kvm, pdpt_gfn); | 426 | page = gfn_to_page(vcpu->kvm, pdpt_gfn); |
| 411 | /* FIXME: !memslot - emulate? 0xff? */ | 427 | /* FIXME: !page - emulate? 0xff? */ |
| 412 | pdpt = kmap_atomic(gfn_to_page(memslot, pdpt_gfn), KM_USER0); | 428 | pdpt = kmap_atomic(page, KM_USER0); |
| 413 | 429 | ||
| 414 | ret = 1; | 430 | ret = 1; |
| 415 | for (i = 0; i < 4; ++i) { | 431 | for (i = 0; i < 4; ++i) { |
| @@ -494,7 +510,6 @@ EXPORT_SYMBOL_GPL(set_cr0); | |||
| 494 | 510 | ||
| 495 | void lmsw(struct kvm_vcpu *vcpu, unsigned long msw) | 511 | void lmsw(struct kvm_vcpu *vcpu, unsigned long msw) |
| 496 | { | 512 | { |
| 497 | kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); | ||
| 498 | set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f)); | 513 | set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f)); |
| 499 | } | 514 | } |
| 500 | EXPORT_SYMBOL_GPL(lmsw); | 515 | EXPORT_SYMBOL_GPL(lmsw); |
| @@ -830,7 +845,73 @@ out: | |||
| 830 | return r; | 845 | return r; |
| 831 | } | 846 | } |
| 832 | 847 | ||
| 833 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | 848 | /* |
| 849 | * Set a new alias region. Aliases map a portion of physical memory into | ||
| 850 | * another portion. This is useful for memory windows, for example the PC | ||
| 851 | * VGA region. | ||
| 852 | */ | ||
| 853 | static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | ||
| 854 | struct kvm_memory_alias *alias) | ||
| 855 | { | ||
| 856 | int r, n; | ||
| 857 | struct kvm_mem_alias *p; | ||
| 858 | |||
| 859 | r = -EINVAL; | ||
| 860 | /* General sanity checks */ | ||
| 861 | if (alias->memory_size & (PAGE_SIZE - 1)) | ||
| 862 | goto out; | ||
| 863 | if (alias->guest_phys_addr & (PAGE_SIZE - 1)) | ||
| 864 | goto out; | ||
| 865 | if (alias->slot >= KVM_ALIAS_SLOTS) | ||
| 866 | goto out; | ||
| 867 | if (alias->guest_phys_addr + alias->memory_size | ||
| 868 | < alias->guest_phys_addr) | ||
| 869 | goto out; | ||
| 870 | if (alias->target_phys_addr + alias->memory_size | ||
| 871 | < alias->target_phys_addr) | ||
| 872 | goto out; | ||
| 873 | |||
| 874 | spin_lock(&kvm->lock); | ||
| 875 | |||
| 876 | p = &kvm->aliases[alias->slot]; | ||
| 877 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; | ||
| 878 | p->npages = alias->memory_size >> PAGE_SHIFT; | ||
| 879 | p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; | ||
| 880 | |||
| 881 | for (n = KVM_ALIAS_SLOTS; n > 0; --n) | ||
| 882 | if (kvm->aliases[n - 1].npages) | ||
| 883 | break; | ||
| 884 | kvm->naliases = n; | ||
| 885 | |||
| 886 | spin_unlock(&kvm->lock); | ||
| 887 | |||
| 888 | vcpu_load(&kvm->vcpus[0]); | ||
| 889 | spin_lock(&kvm->lock); | ||
| 890 | kvm_mmu_zap_all(&kvm->vcpus[0]); | ||
| 891 | spin_unlock(&kvm->lock); | ||
| 892 | vcpu_put(&kvm->vcpus[0]); | ||
| 893 | |||
| 894 | return 0; | ||
| 895 | |||
| 896 | out: | ||
| 897 | return r; | ||
| 898 | } | ||
| 899 | |||
| 900 | static gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | ||
| 901 | { | ||
| 902 | int i; | ||
| 903 | struct kvm_mem_alias *alias; | ||
| 904 | |||
| 905 | for (i = 0; i < kvm->naliases; ++i) { | ||
| 906 | alias = &kvm->aliases[i]; | ||
| 907 | if (gfn >= alias->base_gfn | ||
| 908 | && gfn < alias->base_gfn + alias->npages) | ||
| 909 | return alias->target_gfn + gfn - alias->base_gfn; | ||
| 910 | } | ||
| 911 | return gfn; | ||
| 912 | } | ||
| 913 | |||
| 914 | static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | ||
| 834 | { | 915 | { |
| 835 | int i; | 916 | int i; |
| 836 | 917 | ||
| @@ -843,7 +924,24 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | |||
| 843 | } | 924 | } |
| 844 | return NULL; | 925 | return NULL; |
| 845 | } | 926 | } |
| 846 | EXPORT_SYMBOL_GPL(gfn_to_memslot); | 927 | |
| 928 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | ||
| 929 | { | ||
| 930 | gfn = unalias_gfn(kvm, gfn); | ||
| 931 | return __gfn_to_memslot(kvm, gfn); | ||
| 932 | } | ||
| 933 | |||
| 934 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | ||
| 935 | { | ||
| 936 | struct kvm_memory_slot *slot; | ||
| 937 | |||
| 938 | gfn = unalias_gfn(kvm, gfn); | ||
| 939 | slot = __gfn_to_memslot(kvm, gfn); | ||
| 940 | if (!slot) | ||
| 941 | return NULL; | ||
| 942 | return slot->phys_mem[gfn - slot->base_gfn]; | ||
| 943 | } | ||
| 944 | EXPORT_SYMBOL_GPL(gfn_to_page); | ||
| 847 | 945 | ||
| 848 | void mark_page_dirty(struct kvm *kvm, gfn_t gfn) | 946 | void mark_page_dirty(struct kvm *kvm, gfn_t gfn) |
| 849 | { | 947 | { |
| @@ -871,7 +969,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) | |||
| 871 | } | 969 | } |
| 872 | 970 | ||
| 873 | static int emulator_read_std(unsigned long addr, | 971 | static int emulator_read_std(unsigned long addr, |
| 874 | unsigned long *val, | 972 | void *val, |
| 875 | unsigned int bytes, | 973 | unsigned int bytes, |
| 876 | struct x86_emulate_ctxt *ctxt) | 974 | struct x86_emulate_ctxt *ctxt) |
| 877 | { | 975 | { |
| @@ -883,20 +981,20 @@ static int emulator_read_std(unsigned long addr, | |||
| 883 | unsigned offset = addr & (PAGE_SIZE-1); | 981 | unsigned offset = addr & (PAGE_SIZE-1); |
| 884 | unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset); | 982 | unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset); |
| 885 | unsigned long pfn; | 983 | unsigned long pfn; |
| 886 | struct kvm_memory_slot *memslot; | 984 | struct page *page; |
| 887 | void *page; | 985 | void *page_virt; |
| 888 | 986 | ||
| 889 | if (gpa == UNMAPPED_GVA) | 987 | if (gpa == UNMAPPED_GVA) |
| 890 | return X86EMUL_PROPAGATE_FAULT; | 988 | return X86EMUL_PROPAGATE_FAULT; |
| 891 | pfn = gpa >> PAGE_SHIFT; | 989 | pfn = gpa >> PAGE_SHIFT; |
| 892 | memslot = gfn_to_memslot(vcpu->kvm, pfn); | 990 | page = gfn_to_page(vcpu->kvm, pfn); |
| 893 | if (!memslot) | 991 | if (!page) |
| 894 | return X86EMUL_UNHANDLEABLE; | 992 | return X86EMUL_UNHANDLEABLE; |
| 895 | page = kmap_atomic(gfn_to_page(memslot, pfn), KM_USER0); | 993 | page_virt = kmap_atomic(page, KM_USER0); |
| 896 | 994 | ||
| 897 | memcpy(data, page + offset, tocopy); | 995 | memcpy(data, page_virt + offset, tocopy); |
| 898 | 996 | ||
| 899 | kunmap_atomic(page, KM_USER0); | 997 | kunmap_atomic(page_virt, KM_USER0); |
| 900 | 998 | ||
| 901 | bytes -= tocopy; | 999 | bytes -= tocopy; |
| 902 | data += tocopy; | 1000 | data += tocopy; |
| @@ -907,7 +1005,7 @@ static int emulator_read_std(unsigned long addr, | |||
| 907 | } | 1005 | } |
| 908 | 1006 | ||
| 909 | static int emulator_write_std(unsigned long addr, | 1007 | static int emulator_write_std(unsigned long addr, |
| 910 | unsigned long val, | 1008 | const void *val, |
| 911 | unsigned int bytes, | 1009 | unsigned int bytes, |
| 912 | struct x86_emulate_ctxt *ctxt) | 1010 | struct x86_emulate_ctxt *ctxt) |
| 913 | { | 1011 | { |
| @@ -917,7 +1015,7 @@ static int emulator_write_std(unsigned long addr, | |||
| 917 | } | 1015 | } |
| 918 | 1016 | ||
| 919 | static int emulator_read_emulated(unsigned long addr, | 1017 | static int emulator_read_emulated(unsigned long addr, |
| 920 | unsigned long *val, | 1018 | void *val, |
| 921 | unsigned int bytes, | 1019 | unsigned int bytes, |
| 922 | struct x86_emulate_ctxt *ctxt) | 1020 | struct x86_emulate_ctxt *ctxt) |
| 923 | { | 1021 | { |
| @@ -945,37 +1043,37 @@ static int emulator_read_emulated(unsigned long addr, | |||
| 945 | } | 1043 | } |
| 946 | 1044 | ||
| 947 | static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | 1045 | static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, |
| 948 | unsigned long val, int bytes) | 1046 | const void *val, int bytes) |
| 949 | { | 1047 | { |
| 950 | struct kvm_memory_slot *m; | ||
| 951 | struct page *page; | 1048 | struct page *page; |
| 952 | void *virt; | 1049 | void *virt; |
| 953 | 1050 | ||
| 954 | if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT)) | 1051 | if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT)) |
| 955 | return 0; | 1052 | return 0; |
| 956 | m = gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT); | 1053 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
| 957 | if (!m) | 1054 | if (!page) |
| 958 | return 0; | 1055 | return 0; |
| 959 | page = gfn_to_page(m, gpa >> PAGE_SHIFT); | ||
| 960 | kvm_mmu_pre_write(vcpu, gpa, bytes); | 1056 | kvm_mmu_pre_write(vcpu, gpa, bytes); |
| 961 | mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); | 1057 | mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); |
| 962 | virt = kmap_atomic(page, KM_USER0); | 1058 | virt = kmap_atomic(page, KM_USER0); |
| 963 | memcpy(virt + offset_in_page(gpa), &val, bytes); | 1059 | memcpy(virt + offset_in_page(gpa), val, bytes); |
| 964 | kunmap_atomic(virt, KM_USER0); | 1060 | kunmap_atomic(virt, KM_USER0); |
| 965 | kvm_mmu_post_write(vcpu, gpa, bytes); | 1061 | kvm_mmu_post_write(vcpu, gpa, bytes); |
| 966 | return 1; | 1062 | return 1; |
| 967 | } | 1063 | } |
| 968 | 1064 | ||
| 969 | static int emulator_write_emulated(unsigned long addr, | 1065 | static int emulator_write_emulated(unsigned long addr, |
| 970 | unsigned long val, | 1066 | const void *val, |
| 971 | unsigned int bytes, | 1067 | unsigned int bytes, |
| 972 | struct x86_emulate_ctxt *ctxt) | 1068 | struct x86_emulate_ctxt *ctxt) |
| 973 | { | 1069 | { |
| 974 | struct kvm_vcpu *vcpu = ctxt->vcpu; | 1070 | struct kvm_vcpu *vcpu = ctxt->vcpu; |
| 975 | gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); | 1071 | gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); |
| 976 | 1072 | ||
| 977 | if (gpa == UNMAPPED_GVA) | 1073 | if (gpa == UNMAPPED_GVA) { |
| 1074 | kvm_arch_ops->inject_page_fault(vcpu, addr, 2); | ||
| 978 | return X86EMUL_PROPAGATE_FAULT; | 1075 | return X86EMUL_PROPAGATE_FAULT; |
| 1076 | } | ||
| 979 | 1077 | ||
| 980 | if (emulator_write_phys(vcpu, gpa, val, bytes)) | 1078 | if (emulator_write_phys(vcpu, gpa, val, bytes)) |
| 981 | return X86EMUL_CONTINUE; | 1079 | return X86EMUL_CONTINUE; |
| @@ -984,14 +1082,14 @@ static int emulator_write_emulated(unsigned long addr, | |||
| 984 | vcpu->mmio_phys_addr = gpa; | 1082 | vcpu->mmio_phys_addr = gpa; |
| 985 | vcpu->mmio_size = bytes; | 1083 | vcpu->mmio_size = bytes; |
| 986 | vcpu->mmio_is_write = 1; | 1084 | vcpu->mmio_is_write = 1; |
| 987 | memcpy(vcpu->mmio_data, &val, bytes); | 1085 | memcpy(vcpu->mmio_data, val, bytes); |
| 988 | 1086 | ||
| 989 | return X86EMUL_CONTINUE; | 1087 | return X86EMUL_CONTINUE; |
| 990 | } | 1088 | } |
| 991 | 1089 | ||
| 992 | static int emulator_cmpxchg_emulated(unsigned long addr, | 1090 | static int emulator_cmpxchg_emulated(unsigned long addr, |
| 993 | unsigned long old, | 1091 | const void *old, |
| 994 | unsigned long new, | 1092 | const void *new, |
| 995 | unsigned int bytes, | 1093 | unsigned int bytes, |
| 996 | struct x86_emulate_ctxt *ctxt) | 1094 | struct x86_emulate_ctxt *ctxt) |
| 997 | { | 1095 | { |
| @@ -1004,30 +1102,6 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
| 1004 | return emulator_write_emulated(addr, new, bytes, ctxt); | 1102 | return emulator_write_emulated(addr, new, bytes, ctxt); |
| 1005 | } | 1103 | } |
| 1006 | 1104 | ||
| 1007 | #ifdef CONFIG_X86_32 | ||
| 1008 | |||
| 1009 | static int emulator_cmpxchg8b_emulated(unsigned long addr, | ||
| 1010 | unsigned long old_lo, | ||
| 1011 | unsigned long old_hi, | ||
| 1012 | unsigned long new_lo, | ||
| 1013 | unsigned long new_hi, | ||
| 1014 | struct x86_emulate_ctxt *ctxt) | ||
| 1015 | { | ||
| 1016 | static int reported; | ||
| 1017 | int r; | ||
| 1018 | |||
| 1019 | if (!reported) { | ||
| 1020 | reported = 1; | ||
| 1021 | printk(KERN_WARNING "kvm: emulating exchange8b as write\n"); | ||
| 1022 | } | ||
| 1023 | r = emulator_write_emulated(addr, new_lo, 4, ctxt); | ||
| 1024 | if (r != X86EMUL_CONTINUE) | ||
| 1025 | return r; | ||
| 1026 | return emulator_write_emulated(addr+4, new_hi, 4, ctxt); | ||
| 1027 | } | ||
| 1028 | |||
| 1029 | #endif | ||
| 1030 | |||
| 1031 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) | 1105 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) |
| 1032 | { | 1106 | { |
| 1033 | return kvm_arch_ops->get_segment_base(vcpu, seg); | 1107 | return kvm_arch_ops->get_segment_base(vcpu, seg); |
| @@ -1042,7 +1116,6 @@ int emulate_clts(struct kvm_vcpu *vcpu) | |||
| 1042 | { | 1116 | { |
| 1043 | unsigned long cr0; | 1117 | unsigned long cr0; |
| 1044 | 1118 | ||
| 1045 | kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); | ||
| 1046 | cr0 = vcpu->cr0 & ~CR0_TS_MASK; | 1119 | cr0 = vcpu->cr0 & ~CR0_TS_MASK; |
| 1047 | kvm_arch_ops->set_cr0(vcpu, cr0); | 1120 | kvm_arch_ops->set_cr0(vcpu, cr0); |
| 1048 | return X86EMUL_CONTINUE; | 1121 | return X86EMUL_CONTINUE; |
| @@ -1102,9 +1175,6 @@ struct x86_emulate_ops emulate_ops = { | |||
| 1102 | .read_emulated = emulator_read_emulated, | 1175 | .read_emulated = emulator_read_emulated, |
| 1103 | .write_emulated = emulator_write_emulated, | 1176 | .write_emulated = emulator_write_emulated, |
| 1104 | .cmpxchg_emulated = emulator_cmpxchg_emulated, | 1177 | .cmpxchg_emulated = emulator_cmpxchg_emulated, |
| 1105 | #ifdef CONFIG_X86_32 | ||
| 1106 | .cmpxchg8b_emulated = emulator_cmpxchg8b_emulated, | ||
| 1107 | #endif | ||
| 1108 | }; | 1178 | }; |
| 1109 | 1179 | ||
| 1110 | int emulate_instruction(struct kvm_vcpu *vcpu, | 1180 | int emulate_instruction(struct kvm_vcpu *vcpu, |
| @@ -1116,6 +1186,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 1116 | int r; | 1186 | int r; |
| 1117 | int cs_db, cs_l; | 1187 | int cs_db, cs_l; |
| 1118 | 1188 | ||
| 1189 | vcpu->mmio_fault_cr2 = cr2; | ||
| 1119 | kvm_arch_ops->cache_regs(vcpu); | 1190 | kvm_arch_ops->cache_regs(vcpu); |
| 1120 | 1191 | ||
| 1121 | kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 1192 | kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
| @@ -1166,8 +1237,10 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 1166 | kvm_arch_ops->decache_regs(vcpu); | 1237 | kvm_arch_ops->decache_regs(vcpu); |
| 1167 | kvm_arch_ops->set_rflags(vcpu, emulate_ctxt.eflags); | 1238 | kvm_arch_ops->set_rflags(vcpu, emulate_ctxt.eflags); |
| 1168 | 1239 | ||
| 1169 | if (vcpu->mmio_is_write) | 1240 | if (vcpu->mmio_is_write) { |
| 1241 | vcpu->mmio_needed = 0; | ||
| 1170 | return EMULATE_DO_MMIO; | 1242 | return EMULATE_DO_MMIO; |
| 1243 | } | ||
| 1171 | 1244 | ||
| 1172 | return EMULATE_DONE; | 1245 | return EMULATE_DONE; |
| 1173 | } | 1246 | } |
| @@ -1177,7 +1250,7 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
| 1177 | { | 1250 | { |
| 1178 | unsigned long nr, a0, a1, a2, a3, a4, a5, ret; | 1251 | unsigned long nr, a0, a1, a2, a3, a4, a5, ret; |
| 1179 | 1252 | ||
| 1180 | kvm_arch_ops->decache_regs(vcpu); | 1253 | kvm_arch_ops->cache_regs(vcpu); |
| 1181 | ret = -KVM_EINVAL; | 1254 | ret = -KVM_EINVAL; |
| 1182 | #ifdef CONFIG_X86_64 | 1255 | #ifdef CONFIG_X86_64 |
| 1183 | if (is_long_mode(vcpu)) { | 1256 | if (is_long_mode(vcpu)) { |
| @@ -1201,10 +1274,19 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
| 1201 | } | 1274 | } |
| 1202 | switch (nr) { | 1275 | switch (nr) { |
| 1203 | default: | 1276 | default: |
| 1204 | ; | 1277 | run->hypercall.args[0] = a0; |
| 1278 | run->hypercall.args[1] = a1; | ||
| 1279 | run->hypercall.args[2] = a2; | ||
| 1280 | run->hypercall.args[3] = a3; | ||
| 1281 | run->hypercall.args[4] = a4; | ||
| 1282 | run->hypercall.args[5] = a5; | ||
| 1283 | run->hypercall.ret = ret; | ||
| 1284 | run->hypercall.longmode = is_long_mode(vcpu); | ||
| 1285 | kvm_arch_ops->decache_regs(vcpu); | ||
| 1286 | return 0; | ||
| 1205 | } | 1287 | } |
| 1206 | vcpu->regs[VCPU_REGS_RAX] = ret; | 1288 | vcpu->regs[VCPU_REGS_RAX] = ret; |
| 1207 | kvm_arch_ops->cache_regs(vcpu); | 1289 | kvm_arch_ops->decache_regs(vcpu); |
| 1208 | return 1; | 1290 | return 1; |
| 1209 | } | 1291 | } |
| 1210 | EXPORT_SYMBOL_GPL(kvm_hypercall); | 1292 | EXPORT_SYMBOL_GPL(kvm_hypercall); |
| @@ -1237,7 +1319,7 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, | |||
| 1237 | 1319 | ||
| 1238 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) | 1320 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) |
| 1239 | { | 1321 | { |
| 1240 | kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); | 1322 | kvm_arch_ops->decache_cr4_guest_bits(vcpu); |
| 1241 | switch (cr) { | 1323 | switch (cr) { |
| 1242 | case 0: | 1324 | case 0: |
| 1243 | return vcpu->cr0; | 1325 | return vcpu->cr0; |
| @@ -1442,6 +1524,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
| 1442 | printk(KERN_WARNING "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", | 1524 | printk(KERN_WARNING "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", |
| 1443 | __FUNCTION__, data); | 1525 | __FUNCTION__, data); |
| 1444 | break; | 1526 | break; |
| 1527 | case MSR_IA32_MCG_STATUS: | ||
| 1528 | printk(KERN_WARNING "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", | ||
| 1529 | __FUNCTION__, data); | ||
| 1530 | break; | ||
| 1445 | case MSR_IA32_UCODE_REV: | 1531 | case MSR_IA32_UCODE_REV: |
| 1446 | case MSR_IA32_UCODE_WRITE: | 1532 | case MSR_IA32_UCODE_WRITE: |
| 1447 | case 0x200 ... 0x2ff: /* MTRRs */ | 1533 | case 0x200 ... 0x2ff: /* MTRRs */ |
| @@ -1478,6 +1564,8 @@ static int set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
| 1478 | 1564 | ||
| 1479 | void kvm_resched(struct kvm_vcpu *vcpu) | 1565 | void kvm_resched(struct kvm_vcpu *vcpu) |
| 1480 | { | 1566 | { |
| 1567 | if (!need_resched()) | ||
| 1568 | return; | ||
| 1481 | vcpu_put(vcpu); | 1569 | vcpu_put(vcpu); |
| 1482 | cond_resched(); | 1570 | cond_resched(); |
| 1483 | vcpu_load(vcpu); | 1571 | vcpu_load(vcpu); |
| @@ -1502,29 +1590,250 @@ void save_msrs(struct vmx_msr_entry *e, int n) | |||
| 1502 | } | 1590 | } |
| 1503 | EXPORT_SYMBOL_GPL(save_msrs); | 1591 | EXPORT_SYMBOL_GPL(save_msrs); |
| 1504 | 1592 | ||
| 1593 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | ||
| 1594 | { | ||
| 1595 | int i; | ||
| 1596 | u32 function; | ||
| 1597 | struct kvm_cpuid_entry *e, *best; | ||
| 1598 | |||
| 1599 | kvm_arch_ops->cache_regs(vcpu); | ||
| 1600 | function = vcpu->regs[VCPU_REGS_RAX]; | ||
| 1601 | vcpu->regs[VCPU_REGS_RAX] = 0; | ||
| 1602 | vcpu->regs[VCPU_REGS_RBX] = 0; | ||
| 1603 | vcpu->regs[VCPU_REGS_RCX] = 0; | ||
| 1604 | vcpu->regs[VCPU_REGS_RDX] = 0; | ||
| 1605 | best = NULL; | ||
| 1606 | for (i = 0; i < vcpu->cpuid_nent; ++i) { | ||
| 1607 | e = &vcpu->cpuid_entries[i]; | ||
| 1608 | if (e->function == function) { | ||
| 1609 | best = e; | ||
| 1610 | break; | ||
| 1611 | } | ||
| 1612 | /* | ||
| 1613 | * Both basic or both extended? | ||
| 1614 | */ | ||
| 1615 | if (((e->function ^ function) & 0x80000000) == 0) | ||
| 1616 | if (!best || e->function > best->function) | ||
| 1617 | best = e; | ||
| 1618 | } | ||
| 1619 | if (best) { | ||
| 1620 | vcpu->regs[VCPU_REGS_RAX] = best->eax; | ||
| 1621 | vcpu->regs[VCPU_REGS_RBX] = best->ebx; | ||
| 1622 | vcpu->regs[VCPU_REGS_RCX] = best->ecx; | ||
| 1623 | vcpu->regs[VCPU_REGS_RDX] = best->edx; | ||
| 1624 | } | ||
| 1625 | kvm_arch_ops->decache_regs(vcpu); | ||
| 1626 | kvm_arch_ops->skip_emulated_instruction(vcpu); | ||
| 1627 | } | ||
| 1628 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | ||
| 1629 | |||
| 1630 | static int pio_copy_data(struct kvm_vcpu *vcpu) | ||
| 1631 | { | ||
| 1632 | void *p = vcpu->pio_data; | ||
| 1633 | void *q; | ||
| 1634 | unsigned bytes; | ||
| 1635 | int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1; | ||
| 1636 | |||
| 1637 | kvm_arch_ops->vcpu_put(vcpu); | ||
| 1638 | q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE, | ||
| 1639 | PAGE_KERNEL); | ||
| 1640 | if (!q) { | ||
| 1641 | kvm_arch_ops->vcpu_load(vcpu); | ||
| 1642 | free_pio_guest_pages(vcpu); | ||
| 1643 | return -ENOMEM; | ||
| 1644 | } | ||
| 1645 | q += vcpu->pio.guest_page_offset; | ||
| 1646 | bytes = vcpu->pio.size * vcpu->pio.cur_count; | ||
| 1647 | if (vcpu->pio.in) | ||
| 1648 | memcpy(q, p, bytes); | ||
| 1649 | else | ||
| 1650 | memcpy(p, q, bytes); | ||
| 1651 | q -= vcpu->pio.guest_page_offset; | ||
| 1652 | vunmap(q); | ||
| 1653 | kvm_arch_ops->vcpu_load(vcpu); | ||
| 1654 | free_pio_guest_pages(vcpu); | ||
| 1655 | return 0; | ||
| 1656 | } | ||
| 1657 | |||
| 1658 | static int complete_pio(struct kvm_vcpu *vcpu) | ||
| 1659 | { | ||
| 1660 | struct kvm_pio_request *io = &vcpu->pio; | ||
| 1661 | long delta; | ||
| 1662 | int r; | ||
| 1663 | |||
| 1664 | kvm_arch_ops->cache_regs(vcpu); | ||
| 1665 | |||
| 1666 | if (!io->string) { | ||
| 1667 | if (io->in) | ||
| 1668 | memcpy(&vcpu->regs[VCPU_REGS_RAX], vcpu->pio_data, | ||
| 1669 | io->size); | ||
| 1670 | } else { | ||
| 1671 | if (io->in) { | ||
| 1672 | r = pio_copy_data(vcpu); | ||
| 1673 | if (r) { | ||
| 1674 | kvm_arch_ops->cache_regs(vcpu); | ||
| 1675 | return r; | ||
| 1676 | } | ||
| 1677 | } | ||
| 1678 | |||
| 1679 | delta = 1; | ||
| 1680 | if (io->rep) { | ||
| 1681 | delta *= io->cur_count; | ||
| 1682 | /* | ||
| 1683 | * The size of the register should really depend on | ||
| 1684 | * current address size. | ||
| 1685 | */ | ||
| 1686 | vcpu->regs[VCPU_REGS_RCX] -= delta; | ||
| 1687 | } | ||
| 1688 | if (io->down) | ||
| 1689 | delta = -delta; | ||
| 1690 | delta *= io->size; | ||
| 1691 | if (io->in) | ||
| 1692 | vcpu->regs[VCPU_REGS_RDI] += delta; | ||
| 1693 | else | ||
| 1694 | vcpu->regs[VCPU_REGS_RSI] += delta; | ||
| 1695 | } | ||
| 1696 | |||
| 1697 | kvm_arch_ops->decache_regs(vcpu); | ||
| 1698 | |||
| 1699 | io->count -= io->cur_count; | ||
| 1700 | io->cur_count = 0; | ||
| 1701 | |||
| 1702 | if (!io->count) | ||
| 1703 | kvm_arch_ops->skip_emulated_instruction(vcpu); | ||
| 1704 | return 0; | ||
| 1705 | } | ||
| 1706 | |||
| 1707 | int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | ||
| 1708 | int size, unsigned long count, int string, int down, | ||
| 1709 | gva_t address, int rep, unsigned port) | ||
| 1710 | { | ||
| 1711 | unsigned now, in_page; | ||
| 1712 | int i; | ||
| 1713 | int nr_pages = 1; | ||
| 1714 | struct page *page; | ||
| 1715 | |||
| 1716 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
| 1717 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | ||
| 1718 | vcpu->run->io.size = size; | ||
| 1719 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
| 1720 | vcpu->run->io.count = count; | ||
| 1721 | vcpu->run->io.port = port; | ||
| 1722 | vcpu->pio.count = count; | ||
| 1723 | vcpu->pio.cur_count = count; | ||
| 1724 | vcpu->pio.size = size; | ||
| 1725 | vcpu->pio.in = in; | ||
| 1726 | vcpu->pio.string = string; | ||
| 1727 | vcpu->pio.down = down; | ||
| 1728 | vcpu->pio.guest_page_offset = offset_in_page(address); | ||
| 1729 | vcpu->pio.rep = rep; | ||
| 1730 | |||
| 1731 | if (!string) { | ||
| 1732 | kvm_arch_ops->cache_regs(vcpu); | ||
| 1733 | memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4); | ||
| 1734 | kvm_arch_ops->decache_regs(vcpu); | ||
| 1735 | return 0; | ||
| 1736 | } | ||
| 1737 | |||
| 1738 | if (!count) { | ||
| 1739 | kvm_arch_ops->skip_emulated_instruction(vcpu); | ||
| 1740 | return 1; | ||
| 1741 | } | ||
| 1742 | |||
| 1743 | now = min(count, PAGE_SIZE / size); | ||
| 1744 | |||
| 1745 | if (!down) | ||
| 1746 | in_page = PAGE_SIZE - offset_in_page(address); | ||
| 1747 | else | ||
| 1748 | in_page = offset_in_page(address) + size; | ||
| 1749 | now = min(count, (unsigned long)in_page / size); | ||
| 1750 | if (!now) { | ||
| 1751 | /* | ||
| 1752 | * String I/O straddles page boundary. Pin two guest pages | ||
| 1753 | * so that we satisfy atomicity constraints. Do just one | ||
| 1754 | * transaction to avoid complexity. | ||
| 1755 | */ | ||
| 1756 | nr_pages = 2; | ||
| 1757 | now = 1; | ||
| 1758 | } | ||
| 1759 | if (down) { | ||
| 1760 | /* | ||
| 1761 | * String I/O in reverse. Yuck. Kill the guest, fix later. | ||
| 1762 | */ | ||
| 1763 | printk(KERN_ERR "kvm: guest string pio down\n"); | ||
| 1764 | inject_gp(vcpu); | ||
| 1765 | return 1; | ||
| 1766 | } | ||
| 1767 | vcpu->run->io.count = now; | ||
| 1768 | vcpu->pio.cur_count = now; | ||
| 1769 | |||
| 1770 | for (i = 0; i < nr_pages; ++i) { | ||
| 1771 | spin_lock(&vcpu->kvm->lock); | ||
| 1772 | page = gva_to_page(vcpu, address + i * PAGE_SIZE); | ||
| 1773 | if (page) | ||
| 1774 | get_page(page); | ||
| 1775 | vcpu->pio.guest_pages[i] = page; | ||
| 1776 | spin_unlock(&vcpu->kvm->lock); | ||
| 1777 | if (!page) { | ||
| 1778 | inject_gp(vcpu); | ||
| 1779 | free_pio_guest_pages(vcpu); | ||
| 1780 | return 1; | ||
| 1781 | } | ||
| 1782 | } | ||
| 1783 | |||
| 1784 | if (!vcpu->pio.in) | ||
| 1785 | return pio_copy_data(vcpu); | ||
| 1786 | return 0; | ||
| 1787 | } | ||
| 1788 | EXPORT_SYMBOL_GPL(kvm_setup_pio); | ||
| 1789 | |||
| 1505 | static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1790 | static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 1506 | { | 1791 | { |
| 1507 | int r; | 1792 | int r; |
| 1793 | sigset_t sigsaved; | ||
| 1508 | 1794 | ||
| 1509 | vcpu_load(vcpu); | 1795 | vcpu_load(vcpu); |
| 1510 | 1796 | ||
| 1797 | if (vcpu->sigset_active) | ||
| 1798 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | ||
| 1799 | |||
| 1511 | /* re-sync apic's tpr */ | 1800 | /* re-sync apic's tpr */ |
| 1512 | vcpu->cr8 = kvm_run->cr8; | 1801 | vcpu->cr8 = kvm_run->cr8; |
| 1513 | 1802 | ||
| 1514 | if (kvm_run->emulated) { | 1803 | if (vcpu->pio.cur_count) { |
| 1515 | kvm_arch_ops->skip_emulated_instruction(vcpu); | 1804 | r = complete_pio(vcpu); |
| 1516 | kvm_run->emulated = 0; | 1805 | if (r) |
| 1806 | goto out; | ||
| 1517 | } | 1807 | } |
| 1518 | 1808 | ||
| 1519 | if (kvm_run->mmio_completed) { | 1809 | if (vcpu->mmio_needed) { |
| 1520 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); | 1810 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); |
| 1521 | vcpu->mmio_read_completed = 1; | 1811 | vcpu->mmio_read_completed = 1; |
| 1812 | vcpu->mmio_needed = 0; | ||
| 1813 | r = emulate_instruction(vcpu, kvm_run, | ||
| 1814 | vcpu->mmio_fault_cr2, 0); | ||
| 1815 | if (r == EMULATE_DO_MMIO) { | ||
| 1816 | /* | ||
| 1817 | * Read-modify-write. Back to userspace. | ||
| 1818 | */ | ||
| 1819 | kvm_run->exit_reason = KVM_EXIT_MMIO; | ||
| 1820 | r = 0; | ||
| 1821 | goto out; | ||
| 1822 | } | ||
| 1522 | } | 1823 | } |
| 1523 | 1824 | ||
| 1524 | vcpu->mmio_needed = 0; | 1825 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { |
| 1826 | kvm_arch_ops->cache_regs(vcpu); | ||
| 1827 | vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret; | ||
| 1828 | kvm_arch_ops->decache_regs(vcpu); | ||
| 1829 | } | ||
| 1525 | 1830 | ||
| 1526 | r = kvm_arch_ops->run(vcpu, kvm_run); | 1831 | r = kvm_arch_ops->run(vcpu, kvm_run); |
| 1527 | 1832 | ||
| 1833 | out: | ||
| 1834 | if (vcpu->sigset_active) | ||
| 1835 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | ||
| 1836 | |||
| 1528 | vcpu_put(vcpu); | 1837 | vcpu_put(vcpu); |
| 1529 | return r; | 1838 | return r; |
| 1530 | } | 1839 | } |
| @@ -1633,7 +1942,7 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
| 1633 | sregs->gdt.limit = dt.limit; | 1942 | sregs->gdt.limit = dt.limit; |
| 1634 | sregs->gdt.base = dt.base; | 1943 | sregs->gdt.base = dt.base; |
| 1635 | 1944 | ||
| 1636 | kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); | 1945 | kvm_arch_ops->decache_cr4_guest_bits(vcpu); |
| 1637 | sregs->cr0 = vcpu->cr0; | 1946 | sregs->cr0 = vcpu->cr0; |
| 1638 | sregs->cr2 = vcpu->cr2; | 1947 | sregs->cr2 = vcpu->cr2; |
| 1639 | sregs->cr3 = vcpu->cr3; | 1948 | sregs->cr3 = vcpu->cr3; |
| @@ -1665,16 +1974,6 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 1665 | 1974 | ||
| 1666 | vcpu_load(vcpu); | 1975 | vcpu_load(vcpu); |
| 1667 | 1976 | ||
| 1668 | set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); | ||
| 1669 | set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); | ||
| 1670 | set_segment(vcpu, &sregs->es, VCPU_SREG_ES); | ||
| 1671 | set_segment(vcpu, &sregs->fs, VCPU_SREG_FS); | ||
| 1672 | set_segment(vcpu, &sregs->gs, VCPU_SREG_GS); | ||
| 1673 | set_segment(vcpu, &sregs->ss, VCPU_SREG_SS); | ||
| 1674 | |||
| 1675 | set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); | ||
| 1676 | set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); | ||
| 1677 | |||
| 1678 | dt.limit = sregs->idt.limit; | 1977 | dt.limit = sregs->idt.limit; |
| 1679 | dt.base = sregs->idt.base; | 1978 | dt.base = sregs->idt.base; |
| 1680 | kvm_arch_ops->set_idt(vcpu, &dt); | 1979 | kvm_arch_ops->set_idt(vcpu, &dt); |
| @@ -1694,10 +1993,10 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 1694 | #endif | 1993 | #endif |
| 1695 | vcpu->apic_base = sregs->apic_base; | 1994 | vcpu->apic_base = sregs->apic_base; |
| 1696 | 1995 | ||
| 1697 | kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); | 1996 | kvm_arch_ops->decache_cr4_guest_bits(vcpu); |
| 1698 | 1997 | ||
| 1699 | mmu_reset_needed |= vcpu->cr0 != sregs->cr0; | 1998 | mmu_reset_needed |= vcpu->cr0 != sregs->cr0; |
| 1700 | kvm_arch_ops->set_cr0_no_modeswitch(vcpu, sregs->cr0); | 1999 | kvm_arch_ops->set_cr0(vcpu, sregs->cr0); |
| 1701 | 2000 | ||
| 1702 | mmu_reset_needed |= vcpu->cr4 != sregs->cr4; | 2001 | mmu_reset_needed |= vcpu->cr4 != sregs->cr4; |
| 1703 | kvm_arch_ops->set_cr4(vcpu, sregs->cr4); | 2002 | kvm_arch_ops->set_cr4(vcpu, sregs->cr4); |
| @@ -1714,6 +2013,16 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 1714 | if (vcpu->irq_pending[i]) | 2013 | if (vcpu->irq_pending[i]) |
| 1715 | __set_bit(i, &vcpu->irq_summary); | 2014 | __set_bit(i, &vcpu->irq_summary); |
| 1716 | 2015 | ||
| 2016 | set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); | ||
| 2017 | set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); | ||
| 2018 | set_segment(vcpu, &sregs->es, VCPU_SREG_ES); | ||
| 2019 | set_segment(vcpu, &sregs->fs, VCPU_SREG_FS); | ||
| 2020 | set_segment(vcpu, &sregs->gs, VCPU_SREG_GS); | ||
| 2021 | set_segment(vcpu, &sregs->ss, VCPU_SREG_SS); | ||
| 2022 | |||
| 2023 | set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); | ||
| 2024 | set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); | ||
| 2025 | |||
| 1717 | vcpu_put(vcpu); | 2026 | vcpu_put(vcpu); |
| 1718 | 2027 | ||
| 1719 | return 0; | 2028 | return 0; |
| @@ -1887,6 +2196,36 @@ static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, | |||
| 1887 | return r; | 2196 | return r; |
| 1888 | } | 2197 | } |
| 1889 | 2198 | ||
| 2199 | static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma, | ||
| 2200 | unsigned long address, | ||
| 2201 | int *type) | ||
| 2202 | { | ||
| 2203 | struct kvm_vcpu *vcpu = vma->vm_file->private_data; | ||
| 2204 | unsigned long pgoff; | ||
| 2205 | struct page *page; | ||
| 2206 | |||
| 2207 | *type = VM_FAULT_MINOR; | ||
| 2208 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | ||
| 2209 | if (pgoff == 0) | ||
| 2210 | page = virt_to_page(vcpu->run); | ||
| 2211 | else if (pgoff == KVM_PIO_PAGE_OFFSET) | ||
| 2212 | page = virt_to_page(vcpu->pio_data); | ||
| 2213 | else | ||
| 2214 | return NOPAGE_SIGBUS; | ||
| 2215 | get_page(page); | ||
| 2216 | return page; | ||
| 2217 | } | ||
| 2218 | |||
| 2219 | static struct vm_operations_struct kvm_vcpu_vm_ops = { | ||
| 2220 | .nopage = kvm_vcpu_nopage, | ||
| 2221 | }; | ||
| 2222 | |||
| 2223 | static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma) | ||
| 2224 | { | ||
| 2225 | vma->vm_ops = &kvm_vcpu_vm_ops; | ||
| 2226 | return 0; | ||
| 2227 | } | ||
| 2228 | |||
| 1890 | static int kvm_vcpu_release(struct inode *inode, struct file *filp) | 2229 | static int kvm_vcpu_release(struct inode *inode, struct file *filp) |
| 1891 | { | 2230 | { |
| 1892 | struct kvm_vcpu *vcpu = filp->private_data; | 2231 | struct kvm_vcpu *vcpu = filp->private_data; |
| @@ -1899,6 +2238,7 @@ static struct file_operations kvm_vcpu_fops = { | |||
| 1899 | .release = kvm_vcpu_release, | 2238 | .release = kvm_vcpu_release, |
| 1900 | .unlocked_ioctl = kvm_vcpu_ioctl, | 2239 | .unlocked_ioctl = kvm_vcpu_ioctl, |
| 1901 | .compat_ioctl = kvm_vcpu_ioctl, | 2240 | .compat_ioctl = kvm_vcpu_ioctl, |
| 2241 | .mmap = kvm_vcpu_mmap, | ||
| 1902 | }; | 2242 | }; |
| 1903 | 2243 | ||
| 1904 | /* | 2244 | /* |
| @@ -1947,6 +2287,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) | |||
| 1947 | { | 2287 | { |
| 1948 | int r; | 2288 | int r; |
| 1949 | struct kvm_vcpu *vcpu; | 2289 | struct kvm_vcpu *vcpu; |
| 2290 | struct page *page; | ||
| 1950 | 2291 | ||
| 1951 | r = -EINVAL; | 2292 | r = -EINVAL; |
| 1952 | if (!valid_vcpu(n)) | 2293 | if (!valid_vcpu(n)) |
| @@ -1961,9 +2302,22 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) | |||
| 1961 | return -EEXIST; | 2302 | return -EEXIST; |
| 1962 | } | 2303 | } |
| 1963 | 2304 | ||
| 2305 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
| 2306 | r = -ENOMEM; | ||
| 2307 | if (!page) | ||
| 2308 | goto out_unlock; | ||
| 2309 | vcpu->run = page_address(page); | ||
| 2310 | |||
| 2311 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
| 2312 | r = -ENOMEM; | ||
| 2313 | if (!page) | ||
| 2314 | goto out_free_run; | ||
| 2315 | vcpu->pio_data = page_address(page); | ||
| 2316 | |||
| 1964 | vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, | 2317 | vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, |
| 1965 | FX_IMAGE_ALIGN); | 2318 | FX_IMAGE_ALIGN); |
| 1966 | vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; | 2319 | vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; |
| 2320 | vcpu->cr0 = 0x10; | ||
| 1967 | 2321 | ||
| 1968 | r = kvm_arch_ops->vcpu_create(vcpu); | 2322 | r = kvm_arch_ops->vcpu_create(vcpu); |
| 1969 | if (r < 0) | 2323 | if (r < 0) |
| @@ -1990,11 +2344,107 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) | |||
| 1990 | 2344 | ||
| 1991 | out_free_vcpus: | 2345 | out_free_vcpus: |
| 1992 | kvm_free_vcpu(vcpu); | 2346 | kvm_free_vcpu(vcpu); |
| 2347 | out_free_run: | ||
| 2348 | free_page((unsigned long)vcpu->run); | ||
| 2349 | vcpu->run = NULL; | ||
| 2350 | out_unlock: | ||
| 1993 | mutex_unlock(&vcpu->mutex); | 2351 | mutex_unlock(&vcpu->mutex); |
| 1994 | out: | 2352 | out: |
| 1995 | return r; | 2353 | return r; |
| 1996 | } | 2354 | } |
| 1997 | 2355 | ||
| 2356 | static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | ||
| 2357 | struct kvm_cpuid *cpuid, | ||
| 2358 | struct kvm_cpuid_entry __user *entries) | ||
| 2359 | { | ||
| 2360 | int r; | ||
| 2361 | |||
| 2362 | r = -E2BIG; | ||
| 2363 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | ||
| 2364 | goto out; | ||
| 2365 | r = -EFAULT; | ||
| 2366 | if (copy_from_user(&vcpu->cpuid_entries, entries, | ||
| 2367 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) | ||
| 2368 | goto out; | ||
| 2369 | vcpu->cpuid_nent = cpuid->nent; | ||
| 2370 | return 0; | ||
| 2371 | |||
| 2372 | out: | ||
| 2373 | return r; | ||
| 2374 | } | ||
| 2375 | |||
| 2376 | static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) | ||
| 2377 | { | ||
| 2378 | if (sigset) { | ||
| 2379 | sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP)); | ||
| 2380 | vcpu->sigset_active = 1; | ||
| 2381 | vcpu->sigset = *sigset; | ||
| 2382 | } else | ||
| 2383 | vcpu->sigset_active = 0; | ||
| 2384 | return 0; | ||
| 2385 | } | ||
| 2386 | |||
| 2387 | /* | ||
| 2388 | * fxsave fpu state. Taken from x86_64/processor.h. To be killed when | ||
| 2389 | * we have asm/x86/processor.h | ||
| 2390 | */ | ||
| 2391 | struct fxsave { | ||
| 2392 | u16 cwd; | ||
| 2393 | u16 swd; | ||
| 2394 | u16 twd; | ||
| 2395 | u16 fop; | ||
| 2396 | u64 rip; | ||
| 2397 | u64 rdp; | ||
| 2398 | u32 mxcsr; | ||
| 2399 | u32 mxcsr_mask; | ||
| 2400 | u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ | ||
| 2401 | #ifdef CONFIG_X86_64 | ||
| 2402 | u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ | ||
| 2403 | #else | ||
| 2404 | u32 xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ | ||
| 2405 | #endif | ||
| 2406 | }; | ||
| 2407 | |||
| 2408 | static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | ||
| 2409 | { | ||
| 2410 | struct fxsave *fxsave = (struct fxsave *)vcpu->guest_fx_image; | ||
| 2411 | |||
| 2412 | vcpu_load(vcpu); | ||
| 2413 | |||
| 2414 | memcpy(fpu->fpr, fxsave->st_space, 128); | ||
| 2415 | fpu->fcw = fxsave->cwd; | ||
| 2416 | fpu->fsw = fxsave->swd; | ||
| 2417 | fpu->ftwx = fxsave->twd; | ||
| 2418 | fpu->last_opcode = fxsave->fop; | ||
| 2419 | fpu->last_ip = fxsave->rip; | ||
| 2420 | fpu->last_dp = fxsave->rdp; | ||
| 2421 | memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space); | ||
| 2422 | |||
| 2423 | vcpu_put(vcpu); | ||
| 2424 | |||
| 2425 | return 0; | ||
| 2426 | } | ||
| 2427 | |||
| 2428 | static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | ||
| 2429 | { | ||
| 2430 | struct fxsave *fxsave = (struct fxsave *)vcpu->guest_fx_image; | ||
| 2431 | |||
| 2432 | vcpu_load(vcpu); | ||
| 2433 | |||
| 2434 | memcpy(fxsave->st_space, fpu->fpr, 128); | ||
| 2435 | fxsave->cwd = fpu->fcw; | ||
| 2436 | fxsave->swd = fpu->fsw; | ||
| 2437 | fxsave->twd = fpu->ftwx; | ||
| 2438 | fxsave->fop = fpu->last_opcode; | ||
| 2439 | fxsave->rip = fpu->last_ip; | ||
| 2440 | fxsave->rdp = fpu->last_dp; | ||
| 2441 | memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space); | ||
| 2442 | |||
| 2443 | vcpu_put(vcpu); | ||
| 2444 | |||
| 2445 | return 0; | ||
| 2446 | } | ||
| 2447 | |||
| 1998 | static long kvm_vcpu_ioctl(struct file *filp, | 2448 | static long kvm_vcpu_ioctl(struct file *filp, |
| 1999 | unsigned int ioctl, unsigned long arg) | 2449 | unsigned int ioctl, unsigned long arg) |
| 2000 | { | 2450 | { |
| @@ -2003,21 +2453,12 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
| 2003 | int r = -EINVAL; | 2453 | int r = -EINVAL; |
| 2004 | 2454 | ||
| 2005 | switch (ioctl) { | 2455 | switch (ioctl) { |
| 2006 | case KVM_RUN: { | 2456 | case KVM_RUN: |
| 2007 | struct kvm_run kvm_run; | 2457 | r = -EINVAL; |
| 2008 | 2458 | if (arg) | |
| 2009 | r = -EFAULT; | ||
| 2010 | if (copy_from_user(&kvm_run, argp, sizeof kvm_run)) | ||
| 2011 | goto out; | 2459 | goto out; |
| 2012 | r = kvm_vcpu_ioctl_run(vcpu, &kvm_run); | 2460 | r = kvm_vcpu_ioctl_run(vcpu, vcpu->run); |
| 2013 | if (r < 0 && r != -EINTR) | ||
| 2014 | goto out; | ||
| 2015 | if (copy_to_user(argp, &kvm_run, sizeof kvm_run)) { | ||
| 2016 | r = -EFAULT; | ||
| 2017 | goto out; | ||
| 2018 | } | ||
| 2019 | break; | 2461 | break; |
| 2020 | } | ||
| 2021 | case KVM_GET_REGS: { | 2462 | case KVM_GET_REGS: { |
| 2022 | struct kvm_regs kvm_regs; | 2463 | struct kvm_regs kvm_regs; |
| 2023 | 2464 | ||
| @@ -2113,6 +2554,66 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
| 2113 | case KVM_SET_MSRS: | 2554 | case KVM_SET_MSRS: |
| 2114 | r = msr_io(vcpu, argp, do_set_msr, 0); | 2555 | r = msr_io(vcpu, argp, do_set_msr, 0); |
| 2115 | break; | 2556 | break; |
| 2557 | case KVM_SET_CPUID: { | ||
| 2558 | struct kvm_cpuid __user *cpuid_arg = argp; | ||
| 2559 | struct kvm_cpuid cpuid; | ||
| 2560 | |||
| 2561 | r = -EFAULT; | ||
| 2562 | if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) | ||
| 2563 | goto out; | ||
| 2564 | r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries); | ||
| 2565 | if (r) | ||
| 2566 | goto out; | ||
| 2567 | break; | ||
| 2568 | } | ||
| 2569 | case KVM_SET_SIGNAL_MASK: { | ||
| 2570 | struct kvm_signal_mask __user *sigmask_arg = argp; | ||
| 2571 | struct kvm_signal_mask kvm_sigmask; | ||
| 2572 | sigset_t sigset, *p; | ||
| 2573 | |||
| 2574 | p = NULL; | ||
| 2575 | if (argp) { | ||
| 2576 | r = -EFAULT; | ||
| 2577 | if (copy_from_user(&kvm_sigmask, argp, | ||
| 2578 | sizeof kvm_sigmask)) | ||
| 2579 | goto out; | ||
| 2580 | r = -EINVAL; | ||
| 2581 | if (kvm_sigmask.len != sizeof sigset) | ||
| 2582 | goto out; | ||
| 2583 | r = -EFAULT; | ||
| 2584 | if (copy_from_user(&sigset, sigmask_arg->sigset, | ||
| 2585 | sizeof sigset)) | ||
| 2586 | goto out; | ||
| 2587 | p = &sigset; | ||
| 2588 | } | ||
| 2589 | r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); | ||
| 2590 | break; | ||
| 2591 | } | ||
| 2592 | case KVM_GET_FPU: { | ||
| 2593 | struct kvm_fpu fpu; | ||
| 2594 | |||
| 2595 | memset(&fpu, 0, sizeof fpu); | ||
| 2596 | r = kvm_vcpu_ioctl_get_fpu(vcpu, &fpu); | ||
| 2597 | if (r) | ||
| 2598 | goto out; | ||
| 2599 | r = -EFAULT; | ||
| 2600 | if (copy_to_user(argp, &fpu, sizeof fpu)) | ||
| 2601 | goto out; | ||
| 2602 | r = 0; | ||
| 2603 | break; | ||
| 2604 | } | ||
| 2605 | case KVM_SET_FPU: { | ||
| 2606 | struct kvm_fpu fpu; | ||
| 2607 | |||
| 2608 | r = -EFAULT; | ||
| 2609 | if (copy_from_user(&fpu, argp, sizeof fpu)) | ||
| 2610 | goto out; | ||
| 2611 | r = kvm_vcpu_ioctl_set_fpu(vcpu, &fpu); | ||
| 2612 | if (r) | ||
| 2613 | goto out; | ||
| 2614 | r = 0; | ||
| 2615 | break; | ||
| 2616 | } | ||
| 2116 | default: | 2617 | default: |
| 2117 | ; | 2618 | ; |
| 2118 | } | 2619 | } |
| @@ -2155,6 +2656,17 @@ static long kvm_vm_ioctl(struct file *filp, | |||
| 2155 | goto out; | 2656 | goto out; |
| 2156 | break; | 2657 | break; |
| 2157 | } | 2658 | } |
| 2659 | case KVM_SET_MEMORY_ALIAS: { | ||
| 2660 | struct kvm_memory_alias alias; | ||
| 2661 | |||
| 2662 | r = -EFAULT; | ||
| 2663 | if (copy_from_user(&alias, argp, sizeof alias)) | ||
| 2664 | goto out; | ||
| 2665 | r = kvm_vm_ioctl_set_memory_alias(kvm, &alias); | ||
| 2666 | if (r) | ||
| 2667 | goto out; | ||
| 2668 | break; | ||
| 2669 | } | ||
| 2158 | default: | 2670 | default: |
| 2159 | ; | 2671 | ; |
| 2160 | } | 2672 | } |
| @@ -2168,15 +2680,11 @@ static struct page *kvm_vm_nopage(struct vm_area_struct *vma, | |||
| 2168 | { | 2680 | { |
| 2169 | struct kvm *kvm = vma->vm_file->private_data; | 2681 | struct kvm *kvm = vma->vm_file->private_data; |
| 2170 | unsigned long pgoff; | 2682 | unsigned long pgoff; |
| 2171 | struct kvm_memory_slot *slot; | ||
| 2172 | struct page *page; | 2683 | struct page *page; |
| 2173 | 2684 | ||
| 2174 | *type = VM_FAULT_MINOR; | 2685 | *type = VM_FAULT_MINOR; |
| 2175 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | 2686 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; |
| 2176 | slot = gfn_to_memslot(kvm, pgoff); | 2687 | page = gfn_to_page(kvm, pgoff); |
| 2177 | if (!slot) | ||
| 2178 | return NOPAGE_SIGBUS; | ||
| 2179 | page = gfn_to_page(slot, pgoff); | ||
| 2180 | if (!page) | 2688 | if (!page) |
| 2181 | return NOPAGE_SIGBUS; | 2689 | return NOPAGE_SIGBUS; |
| 2182 | get_page(page); | 2690 | get_page(page); |
| @@ -2248,13 +2756,19 @@ static long kvm_dev_ioctl(struct file *filp, | |||
| 2248 | unsigned int ioctl, unsigned long arg) | 2756 | unsigned int ioctl, unsigned long arg) |
| 2249 | { | 2757 | { |
| 2250 | void __user *argp = (void __user *)arg; | 2758 | void __user *argp = (void __user *)arg; |
| 2251 | int r = -EINVAL; | 2759 | long r = -EINVAL; |
| 2252 | 2760 | ||
| 2253 | switch (ioctl) { | 2761 | switch (ioctl) { |
| 2254 | case KVM_GET_API_VERSION: | 2762 | case KVM_GET_API_VERSION: |
| 2763 | r = -EINVAL; | ||
| 2764 | if (arg) | ||
| 2765 | goto out; | ||
| 2255 | r = KVM_API_VERSION; | 2766 | r = KVM_API_VERSION; |
| 2256 | break; | 2767 | break; |
| 2257 | case KVM_CREATE_VM: | 2768 | case KVM_CREATE_VM: |
| 2769 | r = -EINVAL; | ||
| 2770 | if (arg) | ||
| 2771 | goto out; | ||
| 2258 | r = kvm_dev_ioctl_create_vm(); | 2772 | r = kvm_dev_ioctl_create_vm(); |
| 2259 | break; | 2773 | break; |
| 2260 | case KVM_GET_MSR_INDEX_LIST: { | 2774 | case KVM_GET_MSR_INDEX_LIST: { |
| @@ -2284,6 +2798,18 @@ static long kvm_dev_ioctl(struct file *filp, | |||
| 2284 | r = 0; | 2798 | r = 0; |
| 2285 | break; | 2799 | break; |
| 2286 | } | 2800 | } |
| 2801 | case KVM_CHECK_EXTENSION: | ||
| 2802 | /* | ||
| 2803 | * No extensions defined at present. | ||
| 2804 | */ | ||
| 2805 | r = 0; | ||
| 2806 | break; | ||
| 2807 | case KVM_GET_VCPU_MMAP_SIZE: | ||
| 2808 | r = -EINVAL; | ||
| 2809 | if (arg) | ||
| 2810 | goto out; | ||
| 2811 | r = 2 * PAGE_SIZE; | ||
| 2812 | break; | ||
| 2287 | default: | 2813 | default: |
| 2288 | ; | 2814 | ; |
| 2289 | } | 2815 | } |
| @@ -2299,7 +2825,7 @@ static struct file_operations kvm_chardev_ops = { | |||
| 2299 | }; | 2825 | }; |
| 2300 | 2826 | ||
| 2301 | static struct miscdevice kvm_dev = { | 2827 | static struct miscdevice kvm_dev = { |
| 2302 | MISC_DYNAMIC_MINOR, | 2828 | KVM_MINOR, |
| 2303 | "kvm", | 2829 | "kvm", |
| 2304 | &kvm_chardev_ops, | 2830 | &kvm_chardev_ops, |
| 2305 | }; | 2831 | }; |
| @@ -2385,14 +2911,39 @@ static struct notifier_block kvm_cpu_notifier = { | |||
| 2385 | .priority = 20, /* must be > scheduler priority */ | 2911 | .priority = 20, /* must be > scheduler priority */ |
| 2386 | }; | 2912 | }; |
| 2387 | 2913 | ||
| 2914 | static u64 stat_get(void *_offset) | ||
| 2915 | { | ||
| 2916 | unsigned offset = (long)_offset; | ||
| 2917 | u64 total = 0; | ||
| 2918 | struct kvm *kvm; | ||
| 2919 | struct kvm_vcpu *vcpu; | ||
| 2920 | int i; | ||
| 2921 | |||
| 2922 | spin_lock(&kvm_lock); | ||
| 2923 | list_for_each_entry(kvm, &vm_list, vm_list) | ||
| 2924 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | ||
| 2925 | vcpu = &kvm->vcpus[i]; | ||
| 2926 | total += *(u32 *)((void *)vcpu + offset); | ||
| 2927 | } | ||
| 2928 | spin_unlock(&kvm_lock); | ||
| 2929 | return total; | ||
| 2930 | } | ||
| 2931 | |||
| 2932 | static void stat_set(void *offset, u64 val) | ||
| 2933 | { | ||
| 2934 | } | ||
| 2935 | |||
| 2936 | DEFINE_SIMPLE_ATTRIBUTE(stat_fops, stat_get, stat_set, "%llu\n"); | ||
| 2937 | |||
| 2388 | static __init void kvm_init_debug(void) | 2938 | static __init void kvm_init_debug(void) |
| 2389 | { | 2939 | { |
| 2390 | struct kvm_stats_debugfs_item *p; | 2940 | struct kvm_stats_debugfs_item *p; |
| 2391 | 2941 | ||
| 2392 | debugfs_dir = debugfs_create_dir("kvm", NULL); | 2942 | debugfs_dir = debugfs_create_dir("kvm", NULL); |
| 2393 | for (p = debugfs_entries; p->name; ++p) | 2943 | for (p = debugfs_entries; p->name; ++p) |
| 2394 | p->dentry = debugfs_create_u32(p->name, 0444, debugfs_dir, | 2944 | p->dentry = debugfs_create_file(p->name, 0444, debugfs_dir, |
| 2395 | p->data); | 2945 | (void *)(long)p->offset, |
| 2946 | &stat_fops); | ||
| 2396 | } | 2947 | } |
| 2397 | 2948 | ||
| 2398 | static void kvm_exit_debug(void) | 2949 | static void kvm_exit_debug(void) |
| @@ -2522,6 +3073,10 @@ static __init int kvm_init(void) | |||
| 2522 | static struct page *bad_page; | 3073 | static struct page *bad_page; |
| 2523 | int r; | 3074 | int r; |
| 2524 | 3075 | ||
| 3076 | r = kvm_mmu_module_init(); | ||
| 3077 | if (r) | ||
| 3078 | goto out4; | ||
| 3079 | |||
| 2525 | r = register_filesystem(&kvm_fs_type); | 3080 | r = register_filesystem(&kvm_fs_type); |
| 2526 | if (r) | 3081 | if (r) |
| 2527 | goto out3; | 3082 | goto out3; |
| @@ -2550,6 +3105,8 @@ out: | |||
| 2550 | out2: | 3105 | out2: |
| 2551 | unregister_filesystem(&kvm_fs_type); | 3106 | unregister_filesystem(&kvm_fs_type); |
| 2552 | out3: | 3107 | out3: |
| 3108 | kvm_mmu_module_exit(); | ||
| 3109 | out4: | ||
| 2553 | return r; | 3110 | return r; |
| 2554 | } | 3111 | } |
| 2555 | 3112 | ||
| @@ -2559,6 +3116,7 @@ static __exit void kvm_exit(void) | |||
| 2559 | __free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT)); | 3116 | __free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT)); |
| 2560 | mntput(kvmfs_mnt); | 3117 | mntput(kvmfs_mnt); |
| 2561 | unregister_filesystem(&kvm_fs_type); | 3118 | unregister_filesystem(&kvm_fs_type); |
| 3119 | kvm_mmu_module_exit(); | ||
| 2562 | } | 3120 | } |
| 2563 | 3121 | ||
| 2564 | module_init(kvm_init) | 3122 | module_init(kvm_init) |
diff --git a/drivers/kvm/kvm_svm.h b/drivers/kvm/kvm_svm.h index 624f1ca48657..a869983d683d 100644 --- a/drivers/kvm/kvm_svm.h +++ b/drivers/kvm/kvm_svm.h | |||
| @@ -9,17 +9,15 @@ | |||
| 9 | #include "svm.h" | 9 | #include "svm.h" |
| 10 | #include "kvm.h" | 10 | #include "kvm.h" |
| 11 | 11 | ||
| 12 | static const u32 host_save_msrs[] = { | 12 | static const u32 host_save_user_msrs[] = { |
| 13 | #ifdef CONFIG_X86_64 | 13 | #ifdef CONFIG_X86_64 |
| 14 | MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, | 14 | MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, |
| 15 | MSR_FS_BASE, MSR_GS_BASE, | 15 | MSR_FS_BASE, |
| 16 | #endif | 16 | #endif |
| 17 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 17 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
| 18 | MSR_IA32_DEBUGCTLMSR, /*MSR_IA32_LASTBRANCHFROMIP, | ||
| 19 | MSR_IA32_LASTBRANCHTOIP, MSR_IA32_LASTINTFROMIP,MSR_IA32_LASTINTTOIP,*/ | ||
| 20 | }; | 18 | }; |
| 21 | 19 | ||
| 22 | #define NR_HOST_SAVE_MSRS ARRAY_SIZE(host_save_msrs) | 20 | #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) |
| 23 | #define NUM_DB_REGS 4 | 21 | #define NUM_DB_REGS 4 |
| 24 | 22 | ||
| 25 | struct vcpu_svm { | 23 | struct vcpu_svm { |
| @@ -28,13 +26,12 @@ struct vcpu_svm { | |||
| 28 | struct svm_cpu_data *svm_data; | 26 | struct svm_cpu_data *svm_data; |
| 29 | uint64_t asid_generation; | 27 | uint64_t asid_generation; |
| 30 | 28 | ||
| 31 | unsigned long cr0; | ||
| 32 | unsigned long cr4; | ||
| 33 | unsigned long db_regs[NUM_DB_REGS]; | 29 | unsigned long db_regs[NUM_DB_REGS]; |
| 34 | 30 | ||
| 35 | u64 next_rip; | 31 | u64 next_rip; |
| 36 | 32 | ||
| 37 | u64 host_msrs[NR_HOST_SAVE_MSRS]; | 33 | u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; |
| 34 | u64 host_gs_base; | ||
| 38 | unsigned long host_cr2; | 35 | unsigned long host_cr2; |
| 39 | unsigned long host_db_regs[NUM_DB_REGS]; | 36 | unsigned long host_db_regs[NUM_DB_REGS]; |
| 40 | unsigned long host_dr6; | 37 | unsigned long host_dr6; |
diff --git a/drivers/kvm/kvm_vmx.h b/drivers/kvm/kvm_vmx.h deleted file mode 100644 index d139f73fb6e1..000000000000 --- a/drivers/kvm/kvm_vmx.h +++ /dev/null | |||
| @@ -1,14 +0,0 @@ | |||
| 1 | #ifndef __KVM_VMX_H | ||
| 2 | #define __KVM_VMX_H | ||
| 3 | |||
| 4 | #ifdef CONFIG_X86_64 | ||
| 5 | /* | ||
| 6 | * avoid save/load MSR_SYSCALL_MASK and MSR_LSTAR by std vt | ||
| 7 | * mechanism (cpu bug AA24) | ||
| 8 | */ | ||
| 9 | #define NR_BAD_MSRS 2 | ||
| 10 | #else | ||
| 11 | #define NR_BAD_MSRS 0 | ||
| 12 | #endif | ||
| 13 | |||
| 14 | #endif | ||
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index cab26f301eab..e8e228118de9 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c | |||
| @@ -52,11 +52,15 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {} | |||
| 52 | static int dbg = 1; | 52 | static int dbg = 1; |
| 53 | #endif | 53 | #endif |
| 54 | 54 | ||
| 55 | #ifndef MMU_DEBUG | ||
| 56 | #define ASSERT(x) do { } while (0) | ||
| 57 | #else | ||
| 55 | #define ASSERT(x) \ | 58 | #define ASSERT(x) \ |
| 56 | if (!(x)) { \ | 59 | if (!(x)) { \ |
| 57 | printk(KERN_WARNING "assertion failed %s:%d: %s\n", \ | 60 | printk(KERN_WARNING "assertion failed %s:%d: %s\n", \ |
| 58 | __FILE__, __LINE__, #x); \ | 61 | __FILE__, __LINE__, #x); \ |
| 59 | } | 62 | } |
| 63 | #endif | ||
| 60 | 64 | ||
| 61 | #define PT64_PT_BITS 9 | 65 | #define PT64_PT_BITS 9 |
| 62 | #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) | 66 | #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) |
| @@ -159,6 +163,9 @@ struct kvm_rmap_desc { | |||
| 159 | struct kvm_rmap_desc *more; | 163 | struct kvm_rmap_desc *more; |
| 160 | }; | 164 | }; |
| 161 | 165 | ||
| 166 | static struct kmem_cache *pte_chain_cache; | ||
| 167 | static struct kmem_cache *rmap_desc_cache; | ||
| 168 | |||
| 162 | static int is_write_protection(struct kvm_vcpu *vcpu) | 169 | static int is_write_protection(struct kvm_vcpu *vcpu) |
| 163 | { | 170 | { |
| 164 | return vcpu->cr0 & CR0_WP_MASK; | 171 | return vcpu->cr0 & CR0_WP_MASK; |
| @@ -196,14 +203,15 @@ static int is_rmap_pte(u64 pte) | |||
| 196 | } | 203 | } |
| 197 | 204 | ||
| 198 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | 205 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, |
| 199 | size_t objsize, int min) | 206 | struct kmem_cache *base_cache, int min, |
| 207 | gfp_t gfp_flags) | ||
| 200 | { | 208 | { |
| 201 | void *obj; | 209 | void *obj; |
| 202 | 210 | ||
| 203 | if (cache->nobjs >= min) | 211 | if (cache->nobjs >= min) |
| 204 | return 0; | 212 | return 0; |
| 205 | while (cache->nobjs < ARRAY_SIZE(cache->objects)) { | 213 | while (cache->nobjs < ARRAY_SIZE(cache->objects)) { |
| 206 | obj = kzalloc(objsize, GFP_NOWAIT); | 214 | obj = kmem_cache_zalloc(base_cache, gfp_flags); |
| 207 | if (!obj) | 215 | if (!obj) |
| 208 | return -ENOMEM; | 216 | return -ENOMEM; |
| 209 | cache->objects[cache->nobjs++] = obj; | 217 | cache->objects[cache->nobjs++] = obj; |
| @@ -217,20 +225,35 @@ static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) | |||
| 217 | kfree(mc->objects[--mc->nobjs]); | 225 | kfree(mc->objects[--mc->nobjs]); |
| 218 | } | 226 | } |
| 219 | 227 | ||
| 220 | static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) | 228 | static int __mmu_topup_memory_caches(struct kvm_vcpu *vcpu, gfp_t gfp_flags) |
| 221 | { | 229 | { |
| 222 | int r; | 230 | int r; |
| 223 | 231 | ||
| 224 | r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache, | 232 | r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache, |
| 225 | sizeof(struct kvm_pte_chain), 4); | 233 | pte_chain_cache, 4, gfp_flags); |
| 226 | if (r) | 234 | if (r) |
| 227 | goto out; | 235 | goto out; |
| 228 | r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache, | 236 | r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache, |
| 229 | sizeof(struct kvm_rmap_desc), 1); | 237 | rmap_desc_cache, 1, gfp_flags); |
| 230 | out: | 238 | out: |
| 231 | return r; | 239 | return r; |
| 232 | } | 240 | } |
| 233 | 241 | ||
| 242 | static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) | ||
| 243 | { | ||
| 244 | int r; | ||
| 245 | |||
| 246 | r = __mmu_topup_memory_caches(vcpu, GFP_NOWAIT); | ||
| 247 | if (r < 0) { | ||
| 248 | spin_unlock(&vcpu->kvm->lock); | ||
| 249 | kvm_arch_ops->vcpu_put(vcpu); | ||
| 250 | r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL); | ||
| 251 | kvm_arch_ops->vcpu_load(vcpu); | ||
| 252 | spin_lock(&vcpu->kvm->lock); | ||
| 253 | } | ||
| 254 | return r; | ||
| 255 | } | ||
| 256 | |||
| 234 | static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) | 257 | static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) |
| 235 | { | 258 | { |
| 236 | mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache); | 259 | mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache); |
| @@ -390,13 +413,11 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) | |||
| 390 | { | 413 | { |
| 391 | struct kvm *kvm = vcpu->kvm; | 414 | struct kvm *kvm = vcpu->kvm; |
| 392 | struct page *page; | 415 | struct page *page; |
| 393 | struct kvm_memory_slot *slot; | ||
| 394 | struct kvm_rmap_desc *desc; | 416 | struct kvm_rmap_desc *desc; |
| 395 | u64 *spte; | 417 | u64 *spte; |
| 396 | 418 | ||
| 397 | slot = gfn_to_memslot(kvm, gfn); | 419 | page = gfn_to_page(kvm, gfn); |
| 398 | BUG_ON(!slot); | 420 | BUG_ON(!page); |
| 399 | page = gfn_to_page(slot, gfn); | ||
| 400 | 421 | ||
| 401 | while (page_private(page)) { | 422 | while (page_private(page)) { |
| 402 | if (!(page_private(page) & 1)) | 423 | if (!(page_private(page) & 1)) |
| @@ -417,6 +438,7 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) | |||
| 417 | } | 438 | } |
| 418 | } | 439 | } |
| 419 | 440 | ||
| 441 | #ifdef MMU_DEBUG | ||
| 420 | static int is_empty_shadow_page(hpa_t page_hpa) | 442 | static int is_empty_shadow_page(hpa_t page_hpa) |
| 421 | { | 443 | { |
| 422 | u64 *pos; | 444 | u64 *pos; |
| @@ -431,15 +453,15 @@ static int is_empty_shadow_page(hpa_t page_hpa) | |||
| 431 | } | 453 | } |
| 432 | return 1; | 454 | return 1; |
| 433 | } | 455 | } |
| 456 | #endif | ||
| 434 | 457 | ||
| 435 | static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa) | 458 | static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa) |
| 436 | { | 459 | { |
| 437 | struct kvm_mmu_page *page_head = page_header(page_hpa); | 460 | struct kvm_mmu_page *page_head = page_header(page_hpa); |
| 438 | 461 | ||
| 439 | ASSERT(is_empty_shadow_page(page_hpa)); | 462 | ASSERT(is_empty_shadow_page(page_hpa)); |
| 440 | list_del(&page_head->link); | ||
| 441 | page_head->page_hpa = page_hpa; | 463 | page_head->page_hpa = page_hpa; |
| 442 | list_add(&page_head->link, &vcpu->free_pages); | 464 | list_move(&page_head->link, &vcpu->free_pages); |
| 443 | ++vcpu->kvm->n_free_mmu_pages; | 465 | ++vcpu->kvm->n_free_mmu_pages; |
| 444 | } | 466 | } |
| 445 | 467 | ||
| @@ -457,11 +479,9 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
| 457 | return NULL; | 479 | return NULL; |
| 458 | 480 | ||
| 459 | page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link); | 481 | page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link); |
| 460 | list_del(&page->link); | 482 | list_move(&page->link, &vcpu->kvm->active_mmu_pages); |
| 461 | list_add(&page->link, &vcpu->kvm->active_mmu_pages); | ||
| 462 | ASSERT(is_empty_shadow_page(page->page_hpa)); | 483 | ASSERT(is_empty_shadow_page(page->page_hpa)); |
| 463 | page->slot_bitmap = 0; | 484 | page->slot_bitmap = 0; |
| 464 | page->global = 1; | ||
| 465 | page->multimapped = 0; | 485 | page->multimapped = 0; |
| 466 | page->parent_pte = parent_pte; | 486 | page->parent_pte = parent_pte; |
| 467 | --vcpu->kvm->n_free_mmu_pages; | 487 | --vcpu->kvm->n_free_mmu_pages; |
| @@ -569,6 +589,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
| 569 | gva_t gaddr, | 589 | gva_t gaddr, |
| 570 | unsigned level, | 590 | unsigned level, |
| 571 | int metaphysical, | 591 | int metaphysical, |
| 592 | unsigned hugepage_access, | ||
| 572 | u64 *parent_pte) | 593 | u64 *parent_pte) |
| 573 | { | 594 | { |
| 574 | union kvm_mmu_page_role role; | 595 | union kvm_mmu_page_role role; |
| @@ -582,6 +603,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
| 582 | role.glevels = vcpu->mmu.root_level; | 603 | role.glevels = vcpu->mmu.root_level; |
| 583 | role.level = level; | 604 | role.level = level; |
| 584 | role.metaphysical = metaphysical; | 605 | role.metaphysical = metaphysical; |
| 606 | role.hugepage_access = hugepage_access; | ||
| 585 | if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) { | 607 | if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) { |
| 586 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); | 608 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); |
| 587 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; | 609 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; |
| @@ -669,10 +691,8 @@ static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu, | |||
| 669 | if (!page->root_count) { | 691 | if (!page->root_count) { |
| 670 | hlist_del(&page->hash_link); | 692 | hlist_del(&page->hash_link); |
| 671 | kvm_mmu_free_page(vcpu, page->page_hpa); | 693 | kvm_mmu_free_page(vcpu, page->page_hpa); |
| 672 | } else { | 694 | } else |
| 673 | list_del(&page->link); | 695 | list_move(&page->link, &vcpu->kvm->active_mmu_pages); |
| 674 | list_add(&page->link, &vcpu->kvm->active_mmu_pages); | ||
| 675 | } | ||
| 676 | } | 696 | } |
| 677 | 697 | ||
| 678 | static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn) | 698 | static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn) |
| @@ -714,14 +734,12 @@ hpa_t safe_gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa) | |||
| 714 | 734 | ||
| 715 | hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa) | 735 | hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa) |
| 716 | { | 736 | { |
| 717 | struct kvm_memory_slot *slot; | ||
| 718 | struct page *page; | 737 | struct page *page; |
| 719 | 738 | ||
| 720 | ASSERT((gpa & HPA_ERR_MASK) == 0); | 739 | ASSERT((gpa & HPA_ERR_MASK) == 0); |
| 721 | slot = gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT); | 740 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
| 722 | if (!slot) | 741 | if (!page) |
| 723 | return gpa | HPA_ERR_MASK; | 742 | return gpa | HPA_ERR_MASK; |
| 724 | page = gfn_to_page(slot, gpa >> PAGE_SHIFT); | ||
| 725 | return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT) | 743 | return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT) |
| 726 | | (gpa & (PAGE_SIZE-1)); | 744 | | (gpa & (PAGE_SIZE-1)); |
| 727 | } | 745 | } |
| @@ -735,6 +753,15 @@ hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva) | |||
| 735 | return gpa_to_hpa(vcpu, gpa); | 753 | return gpa_to_hpa(vcpu, gpa); |
| 736 | } | 754 | } |
| 737 | 755 | ||
| 756 | struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) | ||
| 757 | { | ||
| 758 | gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva); | ||
| 759 | |||
| 760 | if (gpa == UNMAPPED_GVA) | ||
| 761 | return NULL; | ||
| 762 | return pfn_to_page(gpa_to_hpa(vcpu, gpa) >> PAGE_SHIFT); | ||
| 763 | } | ||
| 764 | |||
| 738 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | 765 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) |
| 739 | { | 766 | { |
| 740 | } | 767 | } |
| @@ -772,7 +799,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) | |||
| 772 | >> PAGE_SHIFT; | 799 | >> PAGE_SHIFT; |
| 773 | new_table = kvm_mmu_get_page(vcpu, pseudo_gfn, | 800 | new_table = kvm_mmu_get_page(vcpu, pseudo_gfn, |
| 774 | v, level - 1, | 801 | v, level - 1, |
| 775 | 1, &table[index]); | 802 | 1, 0, &table[index]); |
| 776 | if (!new_table) { | 803 | if (!new_table) { |
| 777 | pgprintk("nonpaging_map: ENOMEM\n"); | 804 | pgprintk("nonpaging_map: ENOMEM\n"); |
| 778 | return -ENOMEM; | 805 | return -ENOMEM; |
| @@ -804,10 +831,12 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
| 804 | for (i = 0; i < 4; ++i) { | 831 | for (i = 0; i < 4; ++i) { |
| 805 | hpa_t root = vcpu->mmu.pae_root[i]; | 832 | hpa_t root = vcpu->mmu.pae_root[i]; |
| 806 | 833 | ||
| 807 | ASSERT(VALID_PAGE(root)); | 834 | if (root) { |
| 808 | root &= PT64_BASE_ADDR_MASK; | 835 | ASSERT(VALID_PAGE(root)); |
| 809 | page = page_header(root); | 836 | root &= PT64_BASE_ADDR_MASK; |
| 810 | --page->root_count; | 837 | page = page_header(root); |
| 838 | --page->root_count; | ||
| 839 | } | ||
| 811 | vcpu->mmu.pae_root[i] = INVALID_PAGE; | 840 | vcpu->mmu.pae_root[i] = INVALID_PAGE; |
| 812 | } | 841 | } |
| 813 | vcpu->mmu.root_hpa = INVALID_PAGE; | 842 | vcpu->mmu.root_hpa = INVALID_PAGE; |
| @@ -827,7 +856,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
| 827 | 856 | ||
| 828 | ASSERT(!VALID_PAGE(root)); | 857 | ASSERT(!VALID_PAGE(root)); |
| 829 | page = kvm_mmu_get_page(vcpu, root_gfn, 0, | 858 | page = kvm_mmu_get_page(vcpu, root_gfn, 0, |
| 830 | PT64_ROOT_LEVEL, 0, NULL); | 859 | PT64_ROOT_LEVEL, 0, 0, NULL); |
| 831 | root = page->page_hpa; | 860 | root = page->page_hpa; |
| 832 | ++page->root_count; | 861 | ++page->root_count; |
| 833 | vcpu->mmu.root_hpa = root; | 862 | vcpu->mmu.root_hpa = root; |
| @@ -838,13 +867,17 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
| 838 | hpa_t root = vcpu->mmu.pae_root[i]; | 867 | hpa_t root = vcpu->mmu.pae_root[i]; |
| 839 | 868 | ||
| 840 | ASSERT(!VALID_PAGE(root)); | 869 | ASSERT(!VALID_PAGE(root)); |
| 841 | if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL) | 870 | if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL) { |
| 871 | if (!is_present_pte(vcpu->pdptrs[i])) { | ||
| 872 | vcpu->mmu.pae_root[i] = 0; | ||
| 873 | continue; | ||
| 874 | } | ||
| 842 | root_gfn = vcpu->pdptrs[i] >> PAGE_SHIFT; | 875 | root_gfn = vcpu->pdptrs[i] >> PAGE_SHIFT; |
| 843 | else if (vcpu->mmu.root_level == 0) | 876 | } else if (vcpu->mmu.root_level == 0) |
| 844 | root_gfn = 0; | 877 | root_gfn = 0; |
| 845 | page = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | 878 | page = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
| 846 | PT32_ROOT_LEVEL, !is_paging(vcpu), | 879 | PT32_ROOT_LEVEL, !is_paging(vcpu), |
| 847 | NULL); | 880 | 0, NULL); |
| 848 | root = page->page_hpa; | 881 | root = page->page_hpa; |
| 849 | ++page->root_count; | 882 | ++page->root_count; |
| 850 | vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK; | 883 | vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK; |
| @@ -903,7 +936,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) | |||
| 903 | 936 | ||
| 904 | static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) | 937 | static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) |
| 905 | { | 938 | { |
| 906 | ++kvm_stat.tlb_flush; | 939 | ++vcpu->stat.tlb_flush; |
| 907 | kvm_arch_ops->tlb_flush(vcpu); | 940 | kvm_arch_ops->tlb_flush(vcpu); |
| 908 | } | 941 | } |
| 909 | 942 | ||
| @@ -918,11 +951,6 @@ static void paging_new_cr3(struct kvm_vcpu *vcpu) | |||
| 918 | kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); | 951 | kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); |
| 919 | } | 952 | } |
| 920 | 953 | ||
| 921 | static void mark_pagetable_nonglobal(void *shadow_pte) | ||
| 922 | { | ||
| 923 | page_header(__pa(shadow_pte))->global = 0; | ||
| 924 | } | ||
| 925 | |||
| 926 | static inline void set_pte_common(struct kvm_vcpu *vcpu, | 954 | static inline void set_pte_common(struct kvm_vcpu *vcpu, |
| 927 | u64 *shadow_pte, | 955 | u64 *shadow_pte, |
| 928 | gpa_t gaddr, | 956 | gpa_t gaddr, |
| @@ -940,9 +968,6 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu, | |||
| 940 | 968 | ||
| 941 | *shadow_pte |= access_bits; | 969 | *shadow_pte |= access_bits; |
| 942 | 970 | ||
| 943 | if (!(*shadow_pte & PT_GLOBAL_MASK)) | ||
| 944 | mark_pagetable_nonglobal(shadow_pte); | ||
| 945 | |||
| 946 | if (is_error_hpa(paddr)) { | 971 | if (is_error_hpa(paddr)) { |
| 947 | *shadow_pte |= gaddr; | 972 | *shadow_pte |= gaddr; |
| 948 | *shadow_pte |= PT_SHADOW_IO_MARK; | 973 | *shadow_pte |= PT_SHADOW_IO_MARK; |
| @@ -1316,6 +1341,51 @@ void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot) | |||
| 1316 | } | 1341 | } |
| 1317 | } | 1342 | } |
| 1318 | 1343 | ||
| 1344 | void kvm_mmu_zap_all(struct kvm_vcpu *vcpu) | ||
| 1345 | { | ||
| 1346 | destroy_kvm_mmu(vcpu); | ||
| 1347 | |||
| 1348 | while (!list_empty(&vcpu->kvm->active_mmu_pages)) { | ||
| 1349 | struct kvm_mmu_page *page; | ||
| 1350 | |||
| 1351 | page = container_of(vcpu->kvm->active_mmu_pages.next, | ||
| 1352 | struct kvm_mmu_page, link); | ||
| 1353 | kvm_mmu_zap_page(vcpu, page); | ||
| 1354 | } | ||
| 1355 | |||
| 1356 | mmu_free_memory_caches(vcpu); | ||
| 1357 | kvm_arch_ops->tlb_flush(vcpu); | ||
| 1358 | init_kvm_mmu(vcpu); | ||
| 1359 | } | ||
| 1360 | |||
| 1361 | void kvm_mmu_module_exit(void) | ||
| 1362 | { | ||
| 1363 | if (pte_chain_cache) | ||
| 1364 | kmem_cache_destroy(pte_chain_cache); | ||
| 1365 | if (rmap_desc_cache) | ||
| 1366 | kmem_cache_destroy(rmap_desc_cache); | ||
| 1367 | } | ||
| 1368 | |||
| 1369 | int kvm_mmu_module_init(void) | ||
| 1370 | { | ||
| 1371 | pte_chain_cache = kmem_cache_create("kvm_pte_chain", | ||
| 1372 | sizeof(struct kvm_pte_chain), | ||
| 1373 | 0, 0, NULL, NULL); | ||
| 1374 | if (!pte_chain_cache) | ||
| 1375 | goto nomem; | ||
| 1376 | rmap_desc_cache = kmem_cache_create("kvm_rmap_desc", | ||
| 1377 | sizeof(struct kvm_rmap_desc), | ||
| 1378 | 0, 0, NULL, NULL); | ||
| 1379 | if (!rmap_desc_cache) | ||
| 1380 | goto nomem; | ||
| 1381 | |||
| 1382 | return 0; | ||
| 1383 | |||
| 1384 | nomem: | ||
| 1385 | kvm_mmu_module_exit(); | ||
| 1386 | return -ENOMEM; | ||
| 1387 | } | ||
| 1388 | |||
| 1319 | #ifdef AUDIT | 1389 | #ifdef AUDIT |
| 1320 | 1390 | ||
| 1321 | static const char *audit_msg; | 1391 | static const char *audit_msg; |
| @@ -1338,7 +1408,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, | |||
| 1338 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) { | 1408 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) { |
| 1339 | u64 ent = pt[i]; | 1409 | u64 ent = pt[i]; |
| 1340 | 1410 | ||
| 1341 | if (!ent & PT_PRESENT_MASK) | 1411 | if (!(ent & PT_PRESENT_MASK)) |
| 1342 | continue; | 1412 | continue; |
| 1343 | 1413 | ||
| 1344 | va = canonicalize(va); | 1414 | va = canonicalize(va); |
| @@ -1360,7 +1430,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, | |||
| 1360 | 1430 | ||
| 1361 | static void audit_mappings(struct kvm_vcpu *vcpu) | 1431 | static void audit_mappings(struct kvm_vcpu *vcpu) |
| 1362 | { | 1432 | { |
| 1363 | int i; | 1433 | unsigned i; |
| 1364 | 1434 | ||
| 1365 | if (vcpu->mmu.root_level == 4) | 1435 | if (vcpu->mmu.root_level == 4) |
| 1366 | audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4); | 1436 | audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4); |
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index f3bcee904651..73ffbffb1097 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h | |||
| @@ -148,8 +148,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, | |||
| 148 | break; | 148 | break; |
| 149 | } | 149 | } |
| 150 | 150 | ||
| 151 | if (walker->level != 3 || is_long_mode(vcpu)) | 151 | walker->inherited_ar &= walker->table[index]; |
| 152 | walker->inherited_ar &= walker->table[index]; | ||
| 153 | table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; | 152 | table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; |
| 154 | paddr = safe_gpa_to_hpa(vcpu, *ptep & PT_BASE_ADDR_MASK); | 153 | paddr = safe_gpa_to_hpa(vcpu, *ptep & PT_BASE_ADDR_MASK); |
| 155 | kunmap_atomic(walker->table, KM_USER0); | 154 | kunmap_atomic(walker->table, KM_USER0); |
| @@ -248,6 +247,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
| 248 | u64 shadow_pte; | 247 | u64 shadow_pte; |
| 249 | int metaphysical; | 248 | int metaphysical; |
| 250 | gfn_t table_gfn; | 249 | gfn_t table_gfn; |
| 250 | unsigned hugepage_access = 0; | ||
| 251 | 251 | ||
| 252 | if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) { | 252 | if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) { |
| 253 | if (level == PT_PAGE_TABLE_LEVEL) | 253 | if (level == PT_PAGE_TABLE_LEVEL) |
| @@ -277,6 +277,9 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
| 277 | if (level - 1 == PT_PAGE_TABLE_LEVEL | 277 | if (level - 1 == PT_PAGE_TABLE_LEVEL |
| 278 | && walker->level == PT_DIRECTORY_LEVEL) { | 278 | && walker->level == PT_DIRECTORY_LEVEL) { |
| 279 | metaphysical = 1; | 279 | metaphysical = 1; |
| 280 | hugepage_access = *guest_ent; | ||
| 281 | hugepage_access &= PT_USER_MASK | PT_WRITABLE_MASK; | ||
| 282 | hugepage_access >>= PT_WRITABLE_SHIFT; | ||
| 280 | table_gfn = (*guest_ent & PT_BASE_ADDR_MASK) | 283 | table_gfn = (*guest_ent & PT_BASE_ADDR_MASK) |
| 281 | >> PAGE_SHIFT; | 284 | >> PAGE_SHIFT; |
| 282 | } else { | 285 | } else { |
| @@ -284,7 +287,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
| 284 | table_gfn = walker->table_gfn[level - 2]; | 287 | table_gfn = walker->table_gfn[level - 2]; |
| 285 | } | 288 | } |
| 286 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, | 289 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, |
| 287 | metaphysical, shadow_ent); | 290 | metaphysical, hugepage_access, |
| 291 | shadow_ent); | ||
| 288 | shadow_addr = shadow_page->page_hpa; | 292 | shadow_addr = shadow_page->page_hpa; |
| 289 | shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK | 293 | shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK |
| 290 | | PT_WRITABLE_MASK | PT_USER_MASK; | 294 | | PT_WRITABLE_MASK | PT_USER_MASK; |
| @@ -444,7 +448,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
| 444 | if (is_io_pte(*shadow_pte)) | 448 | if (is_io_pte(*shadow_pte)) |
| 445 | return 1; | 449 | return 1; |
| 446 | 450 | ||
| 447 | ++kvm_stat.pf_fixed; | 451 | ++vcpu->stat.pf_fixed; |
| 448 | kvm_mmu_audit(vcpu, "post page fault (fixed)"); | 452 | kvm_mmu_audit(vcpu, "post page fault (fixed)"); |
| 449 | 453 | ||
| 450 | return write_pt; | 454 | return write_pt; |
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 3d8ea7ac2ecc..9c15f32eea18 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c | |||
| @@ -44,6 +44,10 @@ MODULE_LICENSE("GPL"); | |||
| 44 | #define KVM_EFER_LMA (1 << 10) | 44 | #define KVM_EFER_LMA (1 << 10) |
| 45 | #define KVM_EFER_LME (1 << 8) | 45 | #define KVM_EFER_LME (1 << 8) |
| 46 | 46 | ||
| 47 | #define SVM_FEATURE_NPT (1 << 0) | ||
| 48 | #define SVM_FEATURE_LBRV (1 << 1) | ||
| 49 | #define SVM_DEATURE_SVML (1 << 2) | ||
| 50 | |||
| 47 | unsigned long iopm_base; | 51 | unsigned long iopm_base; |
| 48 | unsigned long msrpm_base; | 52 | unsigned long msrpm_base; |
| 49 | 53 | ||
| @@ -59,15 +63,16 @@ struct kvm_ldttss_desc { | |||
| 59 | struct svm_cpu_data { | 63 | struct svm_cpu_data { |
| 60 | int cpu; | 64 | int cpu; |
| 61 | 65 | ||
| 62 | uint64_t asid_generation; | 66 | u64 asid_generation; |
| 63 | uint32_t max_asid; | 67 | u32 max_asid; |
| 64 | uint32_t next_asid; | 68 | u32 next_asid; |
| 65 | struct kvm_ldttss_desc *tss_desc; | 69 | struct kvm_ldttss_desc *tss_desc; |
| 66 | 70 | ||
| 67 | struct page *save_area; | 71 | struct page *save_area; |
| 68 | }; | 72 | }; |
| 69 | 73 | ||
| 70 | static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); | 74 | static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); |
| 75 | static uint32_t svm_features; | ||
| 71 | 76 | ||
| 72 | struct svm_init_data { | 77 | struct svm_init_data { |
| 73 | int cpu; | 78 | int cpu; |
| @@ -82,6 +87,11 @@ static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; | |||
| 82 | 87 | ||
| 83 | #define MAX_INST_SIZE 15 | 88 | #define MAX_INST_SIZE 15 |
| 84 | 89 | ||
| 90 | static inline u32 svm_has(u32 feat) | ||
| 91 | { | ||
| 92 | return svm_features & feat; | ||
| 93 | } | ||
| 94 | |||
| 85 | static unsigned get_addr_size(struct kvm_vcpu *vcpu) | 95 | static unsigned get_addr_size(struct kvm_vcpu *vcpu) |
| 86 | { | 96 | { |
| 87 | struct vmcb_save_area *sa = &vcpu->svm->vmcb->save; | 97 | struct vmcb_save_area *sa = &vcpu->svm->vmcb->save; |
| @@ -203,13 +213,6 @@ static void inject_ud(struct kvm_vcpu *vcpu) | |||
| 203 | UD_VECTOR; | 213 | UD_VECTOR; |
| 204 | } | 214 | } |
| 205 | 215 | ||
| 206 | static void inject_db(struct kvm_vcpu *vcpu) | ||
| 207 | { | ||
| 208 | vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | | ||
| 209 | SVM_EVTINJ_TYPE_EXEPT | | ||
| 210 | DB_VECTOR; | ||
| 211 | } | ||
| 212 | |||
| 213 | static int is_page_fault(uint32_t info) | 216 | static int is_page_fault(uint32_t info) |
| 214 | { | 217 | { |
| 215 | info &= SVM_EVTINJ_VEC_MASK | SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; | 218 | info &= SVM_EVTINJ_VEC_MASK | SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; |
| @@ -309,6 +312,7 @@ static void svm_hardware_enable(void *garbage) | |||
| 309 | svm_data->asid_generation = 1; | 312 | svm_data->asid_generation = 1; |
| 310 | svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; | 313 | svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; |
| 311 | svm_data->next_asid = svm_data->max_asid + 1; | 314 | svm_data->next_asid = svm_data->max_asid + 1; |
| 315 | svm_features = cpuid_edx(SVM_CPUID_FUNC); | ||
| 312 | 316 | ||
| 313 | asm volatile ( "sgdt %0" : "=m"(gdt_descr) ); | 317 | asm volatile ( "sgdt %0" : "=m"(gdt_descr) ); |
| 314 | gdt = (struct desc_struct *)gdt_descr.address; | 318 | gdt = (struct desc_struct *)gdt_descr.address; |
| @@ -459,7 +463,6 @@ static void init_vmcb(struct vmcb *vmcb) | |||
| 459 | { | 463 | { |
| 460 | struct vmcb_control_area *control = &vmcb->control; | 464 | struct vmcb_control_area *control = &vmcb->control; |
| 461 | struct vmcb_save_area *save = &vmcb->save; | 465 | struct vmcb_save_area *save = &vmcb->save; |
| 462 | u64 tsc; | ||
| 463 | 466 | ||
| 464 | control->intercept_cr_read = INTERCEPT_CR0_MASK | | 467 | control->intercept_cr_read = INTERCEPT_CR0_MASK | |
| 465 | INTERCEPT_CR3_MASK | | 468 | INTERCEPT_CR3_MASK | |
| @@ -511,12 +514,13 @@ static void init_vmcb(struct vmcb *vmcb) | |||
| 511 | (1ULL << INTERCEPT_VMSAVE) | | 514 | (1ULL << INTERCEPT_VMSAVE) | |
| 512 | (1ULL << INTERCEPT_STGI) | | 515 | (1ULL << INTERCEPT_STGI) | |
| 513 | (1ULL << INTERCEPT_CLGI) | | 516 | (1ULL << INTERCEPT_CLGI) | |
| 514 | (1ULL << INTERCEPT_SKINIT); | 517 | (1ULL << INTERCEPT_SKINIT) | |
| 518 | (1ULL << INTERCEPT_MONITOR) | | ||
| 519 | (1ULL << INTERCEPT_MWAIT); | ||
| 515 | 520 | ||
| 516 | control->iopm_base_pa = iopm_base; | 521 | control->iopm_base_pa = iopm_base; |
| 517 | control->msrpm_base_pa = msrpm_base; | 522 | control->msrpm_base_pa = msrpm_base; |
| 518 | rdtscll(tsc); | 523 | control->tsc_offset = 0; |
| 519 | control->tsc_offset = -tsc; | ||
| 520 | control->int_ctl = V_INTR_MASKING_MASK; | 524 | control->int_ctl = V_INTR_MASKING_MASK; |
| 521 | 525 | ||
| 522 | init_seg(&save->es); | 526 | init_seg(&save->es); |
| @@ -576,12 +580,15 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) | |||
| 576 | vcpu->svm->vmcb = page_address(page); | 580 | vcpu->svm->vmcb = page_address(page); |
| 577 | memset(vcpu->svm->vmcb, 0, PAGE_SIZE); | 581 | memset(vcpu->svm->vmcb, 0, PAGE_SIZE); |
| 578 | vcpu->svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; | 582 | vcpu->svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; |
| 579 | vcpu->svm->cr0 = 0x00000010; | ||
| 580 | vcpu->svm->asid_generation = 0; | 583 | vcpu->svm->asid_generation = 0; |
| 581 | memset(vcpu->svm->db_regs, 0, sizeof(vcpu->svm->db_regs)); | 584 | memset(vcpu->svm->db_regs, 0, sizeof(vcpu->svm->db_regs)); |
| 582 | init_vmcb(vcpu->svm->vmcb); | 585 | init_vmcb(vcpu->svm->vmcb); |
| 583 | 586 | ||
| 584 | fx_init(vcpu); | 587 | fx_init(vcpu); |
| 588 | vcpu->fpu_active = 1; | ||
| 589 | vcpu->apic_base = 0xfee00000 | | ||
| 590 | /*for vcpu 0*/ MSR_IA32_APICBASE_BSP | | ||
| 591 | MSR_IA32_APICBASE_ENABLE; | ||
| 585 | 592 | ||
| 586 | return 0; | 593 | return 0; |
| 587 | 594 | ||
| @@ -602,11 +609,34 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) | |||
| 602 | 609 | ||
| 603 | static void svm_vcpu_load(struct kvm_vcpu *vcpu) | 610 | static void svm_vcpu_load(struct kvm_vcpu *vcpu) |
| 604 | { | 611 | { |
| 605 | get_cpu(); | 612 | int cpu, i; |
| 613 | |||
| 614 | cpu = get_cpu(); | ||
| 615 | if (unlikely(cpu != vcpu->cpu)) { | ||
| 616 | u64 tsc_this, delta; | ||
| 617 | |||
| 618 | /* | ||
| 619 | * Make sure that the guest sees a monotonically | ||
| 620 | * increasing TSC. | ||
| 621 | */ | ||
| 622 | rdtscll(tsc_this); | ||
| 623 | delta = vcpu->host_tsc - tsc_this; | ||
| 624 | vcpu->svm->vmcb->control.tsc_offset += delta; | ||
| 625 | vcpu->cpu = cpu; | ||
| 626 | } | ||
| 627 | |||
| 628 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) | ||
| 629 | rdmsrl(host_save_user_msrs[i], vcpu->svm->host_user_msrs[i]); | ||
| 606 | } | 630 | } |
| 607 | 631 | ||
| 608 | static void svm_vcpu_put(struct kvm_vcpu *vcpu) | 632 | static void svm_vcpu_put(struct kvm_vcpu *vcpu) |
| 609 | { | 633 | { |
| 634 | int i; | ||
| 635 | |||
| 636 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) | ||
| 637 | wrmsrl(host_save_user_msrs[i], vcpu->svm->host_user_msrs[i]); | ||
| 638 | |||
| 639 | rdtscll(vcpu->host_tsc); | ||
| 610 | put_cpu(); | 640 | put_cpu(); |
| 611 | } | 641 | } |
| 612 | 642 | ||
| @@ -714,7 +744,7 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | |||
| 714 | vcpu->svm->vmcb->save.gdtr.base = dt->base ; | 744 | vcpu->svm->vmcb->save.gdtr.base = dt->base ; |
| 715 | } | 745 | } |
| 716 | 746 | ||
| 717 | static void svm_decache_cr0_cr4_guest_bits(struct kvm_vcpu *vcpu) | 747 | static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) |
| 718 | { | 748 | { |
| 719 | } | 749 | } |
| 720 | 750 | ||
| @@ -733,9 +763,15 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
| 733 | } | 763 | } |
| 734 | } | 764 | } |
| 735 | #endif | 765 | #endif |
| 736 | vcpu->svm->cr0 = cr0; | 766 | if ((vcpu->cr0 & CR0_TS_MASK) && !(cr0 & CR0_TS_MASK)) { |
| 737 | vcpu->svm->vmcb->save.cr0 = cr0 | CR0_PG_MASK | CR0_WP_MASK; | 767 | vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); |
| 768 | vcpu->fpu_active = 1; | ||
| 769 | } | ||
| 770 | |||
| 738 | vcpu->cr0 = cr0; | 771 | vcpu->cr0 = cr0; |
| 772 | cr0 |= CR0_PG_MASK | CR0_WP_MASK; | ||
| 773 | cr0 &= ~(CR0_CD_MASK | CR0_NW_MASK); | ||
| 774 | vcpu->svm->vmcb->save.cr0 = cr0; | ||
| 739 | } | 775 | } |
| 740 | 776 | ||
| 741 | static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 777 | static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
| @@ -785,18 +821,16 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) | |||
| 785 | 821 | ||
| 786 | static void load_host_msrs(struct kvm_vcpu *vcpu) | 822 | static void load_host_msrs(struct kvm_vcpu *vcpu) |
| 787 | { | 823 | { |
| 788 | int i; | 824 | #ifdef CONFIG_X86_64 |
| 789 | 825 | wrmsrl(MSR_GS_BASE, vcpu->svm->host_gs_base); | |
| 790 | for ( i = 0; i < NR_HOST_SAVE_MSRS; i++) | 826 | #endif |
| 791 | wrmsrl(host_save_msrs[i], vcpu->svm->host_msrs[i]); | ||
| 792 | } | 827 | } |
| 793 | 828 | ||
| 794 | static void save_host_msrs(struct kvm_vcpu *vcpu) | 829 | static void save_host_msrs(struct kvm_vcpu *vcpu) |
| 795 | { | 830 | { |
| 796 | int i; | 831 | #ifdef CONFIG_X86_64 |
| 797 | 832 | rdmsrl(MSR_GS_BASE, vcpu->svm->host_gs_base); | |
| 798 | for ( i = 0; i < NR_HOST_SAVE_MSRS; i++) | 833 | #endif |
| 799 | rdmsrl(host_save_msrs[i], vcpu->svm->host_msrs[i]); | ||
| 800 | } | 834 | } |
| 801 | 835 | ||
| 802 | static void new_asid(struct kvm_vcpu *vcpu, struct svm_cpu_data *svm_data) | 836 | static void new_asid(struct kvm_vcpu *vcpu, struct svm_cpu_data *svm_data) |
| @@ -890,7 +924,7 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 890 | case EMULATE_DONE: | 924 | case EMULATE_DONE: |
| 891 | return 1; | 925 | return 1; |
| 892 | case EMULATE_DO_MMIO: | 926 | case EMULATE_DO_MMIO: |
| 893 | ++kvm_stat.mmio_exits; | 927 | ++vcpu->stat.mmio_exits; |
| 894 | kvm_run->exit_reason = KVM_EXIT_MMIO; | 928 | kvm_run->exit_reason = KVM_EXIT_MMIO; |
| 895 | return 0; | 929 | return 0; |
| 896 | case EMULATE_FAIL: | 930 | case EMULATE_FAIL: |
| @@ -904,6 +938,16 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 904 | return 0; | 938 | return 0; |
| 905 | } | 939 | } |
| 906 | 940 | ||
| 941 | static int nm_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
| 942 | { | ||
| 943 | vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); | ||
| 944 | if (!(vcpu->cr0 & CR0_TS_MASK)) | ||
| 945 | vcpu->svm->vmcb->save.cr0 &= ~CR0_TS_MASK; | ||
| 946 | vcpu->fpu_active = 1; | ||
| 947 | |||
| 948 | return 1; | ||
| 949 | } | ||
| 950 | |||
| 907 | static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 951 | static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 908 | { | 952 | { |
| 909 | /* | 953 | /* |
| @@ -981,7 +1025,7 @@ static int io_get_override(struct kvm_vcpu *vcpu, | |||
| 981 | return 0; | 1025 | return 0; |
| 982 | } | 1026 | } |
| 983 | 1027 | ||
| 984 | static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, u64 *address) | 1028 | static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, gva_t *address) |
| 985 | { | 1029 | { |
| 986 | unsigned long addr_mask; | 1030 | unsigned long addr_mask; |
| 987 | unsigned long *reg; | 1031 | unsigned long *reg; |
| @@ -1025,38 +1069,38 @@ static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, u64 *address) | |||
| 1025 | static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1069 | static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 1026 | { | 1070 | { |
| 1027 | u32 io_info = vcpu->svm->vmcb->control.exit_info_1; //address size bug? | 1071 | u32 io_info = vcpu->svm->vmcb->control.exit_info_1; //address size bug? |
| 1028 | int _in = io_info & SVM_IOIO_TYPE_MASK; | 1072 | int size, down, in, string, rep; |
| 1073 | unsigned port; | ||
| 1074 | unsigned long count; | ||
| 1075 | gva_t address = 0; | ||
| 1029 | 1076 | ||
| 1030 | ++kvm_stat.io_exits; | 1077 | ++vcpu->stat.io_exits; |
| 1031 | 1078 | ||
| 1032 | vcpu->svm->next_rip = vcpu->svm->vmcb->control.exit_info_2; | 1079 | vcpu->svm->next_rip = vcpu->svm->vmcb->control.exit_info_2; |
| 1033 | 1080 | ||
| 1034 | kvm_run->exit_reason = KVM_EXIT_IO; | 1081 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; |
| 1035 | kvm_run->io.port = io_info >> 16; | 1082 | port = io_info >> 16; |
| 1036 | kvm_run->io.direction = (_in) ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | 1083 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; |
| 1037 | kvm_run->io.size = ((io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT); | 1084 | string = (io_info & SVM_IOIO_STR_MASK) != 0; |
| 1038 | kvm_run->io.string = (io_info & SVM_IOIO_STR_MASK) != 0; | 1085 | rep = (io_info & SVM_IOIO_REP_MASK) != 0; |
| 1039 | kvm_run->io.rep = (io_info & SVM_IOIO_REP_MASK) != 0; | 1086 | count = 1; |
| 1087 | down = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; | ||
| 1040 | 1088 | ||
| 1041 | if (kvm_run->io.string) { | 1089 | if (string) { |
| 1042 | unsigned addr_mask; | 1090 | unsigned addr_mask; |
| 1043 | 1091 | ||
| 1044 | addr_mask = io_adress(vcpu, _in, &kvm_run->io.address); | 1092 | addr_mask = io_adress(vcpu, in, &address); |
| 1045 | if (!addr_mask) { | 1093 | if (!addr_mask) { |
| 1046 | printk(KERN_DEBUG "%s: get io address failed\n", | 1094 | printk(KERN_DEBUG "%s: get io address failed\n", |
| 1047 | __FUNCTION__); | 1095 | __FUNCTION__); |
| 1048 | return 1; | 1096 | return 1; |
| 1049 | } | 1097 | } |
| 1050 | 1098 | ||
| 1051 | if (kvm_run->io.rep) { | 1099 | if (rep) |
| 1052 | kvm_run->io.count | 1100 | count = vcpu->regs[VCPU_REGS_RCX] & addr_mask; |
| 1053 | = vcpu->regs[VCPU_REGS_RCX] & addr_mask; | 1101 | } |
| 1054 | kvm_run->io.string_down = (vcpu->svm->vmcb->save.rflags | 1102 | return kvm_setup_pio(vcpu, kvm_run, in, size, count, string, down, |
| 1055 | & X86_EFLAGS_DF) != 0; | 1103 | address, rep, port); |
| 1056 | } | ||
| 1057 | } else | ||
| 1058 | kvm_run->io.value = vcpu->svm->vmcb->save.rax; | ||
| 1059 | return 0; | ||
| 1060 | } | 1104 | } |
| 1061 | 1105 | ||
| 1062 | static int nop_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1106 | static int nop_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| @@ -1072,13 +1116,14 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1072 | return 1; | 1116 | return 1; |
| 1073 | 1117 | ||
| 1074 | kvm_run->exit_reason = KVM_EXIT_HLT; | 1118 | kvm_run->exit_reason = KVM_EXIT_HLT; |
| 1075 | ++kvm_stat.halt_exits; | 1119 | ++vcpu->stat.halt_exits; |
| 1076 | return 0; | 1120 | return 0; |
| 1077 | } | 1121 | } |
| 1078 | 1122 | ||
| 1079 | static int vmmcall_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1123 | static int vmmcall_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 1080 | { | 1124 | { |
| 1081 | vcpu->svm->vmcb->save.rip += 3; | 1125 | vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 3; |
| 1126 | skip_emulated_instruction(vcpu); | ||
| 1082 | return kvm_hypercall(vcpu, kvm_run); | 1127 | return kvm_hypercall(vcpu, kvm_run); |
| 1083 | } | 1128 | } |
| 1084 | 1129 | ||
| @@ -1098,8 +1143,8 @@ static int task_switch_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_r | |||
| 1098 | static int cpuid_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1143 | static int cpuid_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 1099 | { | 1144 | { |
| 1100 | vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2; | 1145 | vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2; |
| 1101 | kvm_run->exit_reason = KVM_EXIT_CPUID; | 1146 | kvm_emulate_cpuid(vcpu); |
| 1102 | return 0; | 1147 | return 1; |
| 1103 | } | 1148 | } |
| 1104 | 1149 | ||
| 1105 | static int emulate_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1150 | static int emulate_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| @@ -1239,7 +1284,7 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu, | |||
| 1239 | */ | 1284 | */ |
| 1240 | if (kvm_run->request_interrupt_window && | 1285 | if (kvm_run->request_interrupt_window && |
| 1241 | !vcpu->irq_summary) { | 1286 | !vcpu->irq_summary) { |
| 1242 | ++kvm_stat.irq_window_exits; | 1287 | ++vcpu->stat.irq_window_exits; |
| 1243 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; | 1288 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; |
| 1244 | return 0; | 1289 | return 0; |
| 1245 | } | 1290 | } |
| @@ -1267,6 +1312,7 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu, | |||
| 1267 | [SVM_EXIT_WRITE_DR5] = emulate_on_interception, | 1312 | [SVM_EXIT_WRITE_DR5] = emulate_on_interception, |
| 1268 | [SVM_EXIT_WRITE_DR7] = emulate_on_interception, | 1313 | [SVM_EXIT_WRITE_DR7] = emulate_on_interception, |
| 1269 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, | 1314 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, |
| 1315 | [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, | ||
| 1270 | [SVM_EXIT_INTR] = nop_on_interception, | 1316 | [SVM_EXIT_INTR] = nop_on_interception, |
| 1271 | [SVM_EXIT_NMI] = nop_on_interception, | 1317 | [SVM_EXIT_NMI] = nop_on_interception, |
| 1272 | [SVM_EXIT_SMI] = nop_on_interception, | 1318 | [SVM_EXIT_SMI] = nop_on_interception, |
| @@ -1288,6 +1334,8 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu, | |||
| 1288 | [SVM_EXIT_STGI] = invalid_op_interception, | 1334 | [SVM_EXIT_STGI] = invalid_op_interception, |
| 1289 | [SVM_EXIT_CLGI] = invalid_op_interception, | 1335 | [SVM_EXIT_CLGI] = invalid_op_interception, |
| 1290 | [SVM_EXIT_SKINIT] = invalid_op_interception, | 1336 | [SVM_EXIT_SKINIT] = invalid_op_interception, |
| 1337 | [SVM_EXIT_MONITOR] = invalid_op_interception, | ||
| 1338 | [SVM_EXIT_MWAIT] = invalid_op_interception, | ||
| 1291 | }; | 1339 | }; |
| 1292 | 1340 | ||
| 1293 | 1341 | ||
| @@ -1295,8 +1343,6 @@ static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1295 | { | 1343 | { |
| 1296 | u32 exit_code = vcpu->svm->vmcb->control.exit_code; | 1344 | u32 exit_code = vcpu->svm->vmcb->control.exit_code; |
| 1297 | 1345 | ||
| 1298 | kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT; | ||
| 1299 | |||
| 1300 | if (is_external_interrupt(vcpu->svm->vmcb->control.exit_int_info) && | 1346 | if (is_external_interrupt(vcpu->svm->vmcb->control.exit_int_info) && |
| 1301 | exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR) | 1347 | exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR) |
| 1302 | printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " | 1348 | printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " |
| @@ -1307,12 +1353,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1307 | if (exit_code >= ARRAY_SIZE(svm_exit_handlers) | 1353 | if (exit_code >= ARRAY_SIZE(svm_exit_handlers) |
| 1308 | || svm_exit_handlers[exit_code] == 0) { | 1354 | || svm_exit_handlers[exit_code] == 0) { |
| 1309 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 1355 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; |
| 1310 | printk(KERN_ERR "%s: 0x%x @ 0x%llx cr0 0x%lx rflags 0x%llx\n", | 1356 | kvm_run->hw.hardware_exit_reason = exit_code; |
| 1311 | __FUNCTION__, | ||
| 1312 | exit_code, | ||
| 1313 | vcpu->svm->vmcb->save.rip, | ||
| 1314 | vcpu->cr0, | ||
| 1315 | vcpu->svm->vmcb->save.rflags); | ||
| 1316 | return 0; | 1357 | return 0; |
| 1317 | } | 1358 | } |
| 1318 | 1359 | ||
| @@ -1461,8 +1502,10 @@ again: | |||
| 1461 | load_db_regs(vcpu->svm->db_regs); | 1502 | load_db_regs(vcpu->svm->db_regs); |
| 1462 | } | 1503 | } |
| 1463 | 1504 | ||
| 1464 | fx_save(vcpu->host_fx_image); | 1505 | if (vcpu->fpu_active) { |
| 1465 | fx_restore(vcpu->guest_fx_image); | 1506 | fx_save(vcpu->host_fx_image); |
| 1507 | fx_restore(vcpu->guest_fx_image); | ||
| 1508 | } | ||
| 1466 | 1509 | ||
| 1467 | asm volatile ( | 1510 | asm volatile ( |
| 1468 | #ifdef CONFIG_X86_64 | 1511 | #ifdef CONFIG_X86_64 |
| @@ -1573,8 +1616,10 @@ again: | |||
| 1573 | #endif | 1616 | #endif |
| 1574 | : "cc", "memory" ); | 1617 | : "cc", "memory" ); |
| 1575 | 1618 | ||
| 1576 | fx_save(vcpu->guest_fx_image); | 1619 | if (vcpu->fpu_active) { |
| 1577 | fx_restore(vcpu->host_fx_image); | 1620 | fx_save(vcpu->guest_fx_image); |
| 1621 | fx_restore(vcpu->host_fx_image); | ||
| 1622 | } | ||
| 1578 | 1623 | ||
| 1579 | if ((vcpu->svm->vmcb->save.dr7 & 0xff)) | 1624 | if ((vcpu->svm->vmcb->save.dr7 & 0xff)) |
| 1580 | load_db_regs(vcpu->svm->host_db_regs); | 1625 | load_db_regs(vcpu->svm->host_db_regs); |
| @@ -1606,8 +1651,9 @@ again: | |||
| 1606 | vcpu->svm->next_rip = 0; | 1651 | vcpu->svm->next_rip = 0; |
| 1607 | 1652 | ||
| 1608 | if (vcpu->svm->vmcb->control.exit_code == SVM_EXIT_ERR) { | 1653 | if (vcpu->svm->vmcb->control.exit_code == SVM_EXIT_ERR) { |
| 1609 | kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY; | 1654 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
| 1610 | kvm_run->exit_reason = vcpu->svm->vmcb->control.exit_code; | 1655 | kvm_run->fail_entry.hardware_entry_failure_reason |
| 1656 | = vcpu->svm->vmcb->control.exit_code; | ||
| 1611 | post_kvm_run_save(vcpu, kvm_run); | 1657 | post_kvm_run_save(vcpu, kvm_run); |
| 1612 | return 0; | 1658 | return 0; |
| 1613 | } | 1659 | } |
| @@ -1615,14 +1661,16 @@ again: | |||
| 1615 | r = handle_exit(vcpu, kvm_run); | 1661 | r = handle_exit(vcpu, kvm_run); |
| 1616 | if (r > 0) { | 1662 | if (r > 0) { |
| 1617 | if (signal_pending(current)) { | 1663 | if (signal_pending(current)) { |
| 1618 | ++kvm_stat.signal_exits; | 1664 | ++vcpu->stat.signal_exits; |
| 1619 | post_kvm_run_save(vcpu, kvm_run); | 1665 | post_kvm_run_save(vcpu, kvm_run); |
| 1666 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
| 1620 | return -EINTR; | 1667 | return -EINTR; |
| 1621 | } | 1668 | } |
| 1622 | 1669 | ||
| 1623 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { | 1670 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { |
| 1624 | ++kvm_stat.request_irq_exits; | 1671 | ++vcpu->stat.request_irq_exits; |
| 1625 | post_kvm_run_save(vcpu, kvm_run); | 1672 | post_kvm_run_save(vcpu, kvm_run); |
| 1673 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
| 1626 | return -EINTR; | 1674 | return -EINTR; |
| 1627 | } | 1675 | } |
| 1628 | kvm_resched(vcpu); | 1676 | kvm_resched(vcpu); |
| @@ -1641,6 +1689,12 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) | |||
| 1641 | { | 1689 | { |
| 1642 | vcpu->svm->vmcb->save.cr3 = root; | 1690 | vcpu->svm->vmcb->save.cr3 = root; |
| 1643 | force_new_asid(vcpu); | 1691 | force_new_asid(vcpu); |
| 1692 | |||
| 1693 | if (vcpu->fpu_active) { | ||
| 1694 | vcpu->svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); | ||
| 1695 | vcpu->svm->vmcb->save.cr0 |= CR0_TS_MASK; | ||
| 1696 | vcpu->fpu_active = 0; | ||
| 1697 | } | ||
| 1644 | } | 1698 | } |
| 1645 | 1699 | ||
| 1646 | static void svm_inject_page_fault(struct kvm_vcpu *vcpu, | 1700 | static void svm_inject_page_fault(struct kvm_vcpu *vcpu, |
| @@ -1649,7 +1703,7 @@ static void svm_inject_page_fault(struct kvm_vcpu *vcpu, | |||
| 1649 | { | 1703 | { |
| 1650 | uint32_t exit_int_info = vcpu->svm->vmcb->control.exit_int_info; | 1704 | uint32_t exit_int_info = vcpu->svm->vmcb->control.exit_int_info; |
| 1651 | 1705 | ||
| 1652 | ++kvm_stat.pf_guest; | 1706 | ++vcpu->stat.pf_guest; |
| 1653 | 1707 | ||
| 1654 | if (is_page_fault(exit_int_info)) { | 1708 | if (is_page_fault(exit_int_info)) { |
| 1655 | 1709 | ||
| @@ -1709,9 +1763,8 @@ static struct kvm_arch_ops svm_arch_ops = { | |||
| 1709 | .get_segment = svm_get_segment, | 1763 | .get_segment = svm_get_segment, |
| 1710 | .set_segment = svm_set_segment, | 1764 | .set_segment = svm_set_segment, |
| 1711 | .get_cs_db_l_bits = svm_get_cs_db_l_bits, | 1765 | .get_cs_db_l_bits = svm_get_cs_db_l_bits, |
| 1712 | .decache_cr0_cr4_guest_bits = svm_decache_cr0_cr4_guest_bits, | 1766 | .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, |
| 1713 | .set_cr0 = svm_set_cr0, | 1767 | .set_cr0 = svm_set_cr0, |
| 1714 | .set_cr0_no_modeswitch = svm_set_cr0, | ||
| 1715 | .set_cr3 = svm_set_cr3, | 1768 | .set_cr3 = svm_set_cr3, |
| 1716 | .set_cr4 = svm_set_cr4, | 1769 | .set_cr4 = svm_set_cr4, |
| 1717 | .set_efer = svm_set_efer, | 1770 | .set_efer = svm_set_efer, |
diff --git a/drivers/kvm/svm.h b/drivers/kvm/svm.h index df731c3fb588..5e93814400ce 100644 --- a/drivers/kvm/svm.h +++ b/drivers/kvm/svm.h | |||
| @@ -44,6 +44,9 @@ enum { | |||
| 44 | INTERCEPT_RDTSCP, | 44 | INTERCEPT_RDTSCP, |
| 45 | INTERCEPT_ICEBP, | 45 | INTERCEPT_ICEBP, |
| 46 | INTERCEPT_WBINVD, | 46 | INTERCEPT_WBINVD, |
| 47 | INTERCEPT_MONITOR, | ||
| 48 | INTERCEPT_MWAIT, | ||
| 49 | INTERCEPT_MWAIT_COND, | ||
| 47 | }; | 50 | }; |
| 48 | 51 | ||
| 49 | 52 | ||
| @@ -298,6 +301,9 @@ struct __attribute__ ((__packed__)) vmcb { | |||
| 298 | #define SVM_EXIT_RDTSCP 0x087 | 301 | #define SVM_EXIT_RDTSCP 0x087 |
| 299 | #define SVM_EXIT_ICEBP 0x088 | 302 | #define SVM_EXIT_ICEBP 0x088 |
| 300 | #define SVM_EXIT_WBINVD 0x089 | 303 | #define SVM_EXIT_WBINVD 0x089 |
| 304 | #define SVM_EXIT_MONITOR 0x08a | ||
| 305 | #define SVM_EXIT_MWAIT 0x08b | ||
| 306 | #define SVM_EXIT_MWAIT_COND 0x08c | ||
| 301 | #define SVM_EXIT_NPF 0x400 | 307 | #define SVM_EXIT_NPF 0x400 |
| 302 | 308 | ||
| 303 | #define SVM_EXIT_ERR -1 | 309 | #define SVM_EXIT_ERR -1 |
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index fbbf9d6b299f..724db0027f00 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c | |||
| @@ -17,7 +17,6 @@ | |||
| 17 | 17 | ||
| 18 | #include "kvm.h" | 18 | #include "kvm.h" |
| 19 | #include "vmx.h" | 19 | #include "vmx.h" |
| 20 | #include "kvm_vmx.h" | ||
| 21 | #include <linux/module.h> | 20 | #include <linux/module.h> |
| 22 | #include <linux/kernel.h> | 21 | #include <linux/kernel.h> |
| 23 | #include <linux/mm.h> | 22 | #include <linux/mm.h> |
| @@ -70,6 +69,10 @@ static struct kvm_vmx_segment_field { | |||
| 70 | VMX_SEGMENT_FIELD(LDTR), | 69 | VMX_SEGMENT_FIELD(LDTR), |
| 71 | }; | 70 | }; |
| 72 | 71 | ||
| 72 | /* | ||
| 73 | * Keep MSR_K6_STAR at the end, as setup_msrs() will try to optimize it | ||
| 74 | * away by decrementing the array size. | ||
| 75 | */ | ||
| 73 | static const u32 vmx_msr_index[] = { | 76 | static const u32 vmx_msr_index[] = { |
| 74 | #ifdef CONFIG_X86_64 | 77 | #ifdef CONFIG_X86_64 |
| 75 | MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE, | 78 | MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE, |
| @@ -78,6 +81,19 @@ static const u32 vmx_msr_index[] = { | |||
| 78 | }; | 81 | }; |
| 79 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) | 82 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) |
| 80 | 83 | ||
| 84 | #ifdef CONFIG_X86_64 | ||
| 85 | static unsigned msr_offset_kernel_gs_base; | ||
| 86 | #define NR_64BIT_MSRS 4 | ||
| 87 | /* | ||
| 88 | * avoid save/load MSR_SYSCALL_MASK and MSR_LSTAR by std vt | ||
| 89 | * mechanism (cpu bug AA24) | ||
| 90 | */ | ||
| 91 | #define NR_BAD_MSRS 2 | ||
| 92 | #else | ||
| 93 | #define NR_64BIT_MSRS 0 | ||
| 94 | #define NR_BAD_MSRS 0 | ||
| 95 | #endif | ||
| 96 | |||
| 81 | static inline int is_page_fault(u32 intr_info) | 97 | static inline int is_page_fault(u32 intr_info) |
| 82 | { | 98 | { |
| 83 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 99 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
| @@ -85,6 +101,13 @@ static inline int is_page_fault(u32 intr_info) | |||
| 85 | (INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); | 101 | (INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); |
| 86 | } | 102 | } |
| 87 | 103 | ||
| 104 | static inline int is_no_device(u32 intr_info) | ||
| 105 | { | ||
| 106 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | ||
| 107 | INTR_INFO_VALID_MASK)) == | ||
| 108 | (INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); | ||
| 109 | } | ||
| 110 | |||
| 88 | static inline int is_external_interrupt(u32 intr_info) | 111 | static inline int is_external_interrupt(u32 intr_info) |
| 89 | { | 112 | { |
| 90 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) | 113 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) |
| @@ -200,6 +223,16 @@ static void vmcs_write64(unsigned long field, u64 value) | |||
| 200 | #endif | 223 | #endif |
| 201 | } | 224 | } |
| 202 | 225 | ||
| 226 | static void vmcs_clear_bits(unsigned long field, u32 mask) | ||
| 227 | { | ||
| 228 | vmcs_writel(field, vmcs_readl(field) & ~mask); | ||
| 229 | } | ||
| 230 | |||
| 231 | static void vmcs_set_bits(unsigned long field, u32 mask) | ||
| 232 | { | ||
| 233 | vmcs_writel(field, vmcs_readl(field) | mask); | ||
| 234 | } | ||
| 235 | |||
| 203 | /* | 236 | /* |
| 204 | * Switches to specified vcpu, until a matching vcpu_put(), but assumes | 237 | * Switches to specified vcpu, until a matching vcpu_put(), but assumes |
| 205 | * vcpu mutex is already taken. | 238 | * vcpu mutex is already taken. |
| @@ -297,6 +330,44 @@ static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code) | |||
| 297 | } | 330 | } |
| 298 | 331 | ||
| 299 | /* | 332 | /* |
| 333 | * Set up the vmcs to automatically save and restore system | ||
| 334 | * msrs. Don't touch the 64-bit msrs if the guest is in legacy | ||
| 335 | * mode, as fiddling with msrs is very expensive. | ||
| 336 | */ | ||
| 337 | static void setup_msrs(struct kvm_vcpu *vcpu) | ||
| 338 | { | ||
| 339 | int nr_skip, nr_good_msrs; | ||
| 340 | |||
| 341 | if (is_long_mode(vcpu)) | ||
| 342 | nr_skip = NR_BAD_MSRS; | ||
| 343 | else | ||
| 344 | nr_skip = NR_64BIT_MSRS; | ||
| 345 | nr_good_msrs = vcpu->nmsrs - nr_skip; | ||
| 346 | |||
| 347 | /* | ||
| 348 | * MSR_K6_STAR is only needed on long mode guests, and only | ||
| 349 | * if efer.sce is enabled. | ||
| 350 | */ | ||
| 351 | if (find_msr_entry(vcpu, MSR_K6_STAR)) { | ||
| 352 | --nr_good_msrs; | ||
| 353 | #ifdef CONFIG_X86_64 | ||
| 354 | if (is_long_mode(vcpu) && (vcpu->shadow_efer & EFER_SCE)) | ||
| 355 | ++nr_good_msrs; | ||
| 356 | #endif | ||
| 357 | } | ||
| 358 | |||
| 359 | vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR, | ||
| 360 | virt_to_phys(vcpu->guest_msrs + nr_skip)); | ||
| 361 | vmcs_writel(VM_EXIT_MSR_STORE_ADDR, | ||
| 362 | virt_to_phys(vcpu->guest_msrs + nr_skip)); | ||
| 363 | vmcs_writel(VM_EXIT_MSR_LOAD_ADDR, | ||
| 364 | virt_to_phys(vcpu->host_msrs + nr_skip)); | ||
| 365 | vmcs_write32(VM_EXIT_MSR_STORE_COUNT, nr_good_msrs); /* 22.2.2 */ | ||
| 366 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */ | ||
| 367 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */ | ||
| 368 | } | ||
| 369 | |||
| 370 | /* | ||
| 300 | * reads and returns guest's timestamp counter "register" | 371 | * reads and returns guest's timestamp counter "register" |
| 301 | * guest_tsc = host_tsc + tsc_offset -- 21.3 | 372 | * guest_tsc = host_tsc + tsc_offset -- 21.3 |
| 302 | */ | 373 | */ |
| @@ -712,6 +783,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
| 712 | 783 | ||
| 713 | vmcs_write32(GUEST_CS_AR_BYTES, 0xf3); | 784 | vmcs_write32(GUEST_CS_AR_BYTES, 0xf3); |
| 714 | vmcs_write32(GUEST_CS_LIMIT, 0xffff); | 785 | vmcs_write32(GUEST_CS_LIMIT, 0xffff); |
| 786 | if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000) | ||
| 787 | vmcs_writel(GUEST_CS_BASE, 0xf0000); | ||
| 715 | vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4); | 788 | vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4); |
| 716 | 789 | ||
| 717 | fix_rmode_seg(VCPU_SREG_ES, &vcpu->rmode.es); | 790 | fix_rmode_seg(VCPU_SREG_ES, &vcpu->rmode.es); |
| @@ -754,11 +827,8 @@ static void exit_lmode(struct kvm_vcpu *vcpu) | |||
| 754 | 827 | ||
| 755 | #endif | 828 | #endif |
| 756 | 829 | ||
| 757 | static void vmx_decache_cr0_cr4_guest_bits(struct kvm_vcpu *vcpu) | 830 | static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) |
| 758 | { | 831 | { |
| 759 | vcpu->cr0 &= KVM_GUEST_CR0_MASK; | ||
| 760 | vcpu->cr0 |= vmcs_readl(GUEST_CR0) & ~KVM_GUEST_CR0_MASK; | ||
| 761 | |||
| 762 | vcpu->cr4 &= KVM_GUEST_CR4_MASK; | 832 | vcpu->cr4 &= KVM_GUEST_CR4_MASK; |
| 763 | vcpu->cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; | 833 | vcpu->cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; |
| 764 | } | 834 | } |
| @@ -780,22 +850,11 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
| 780 | } | 850 | } |
| 781 | #endif | 851 | #endif |
| 782 | 852 | ||
| 783 | vmcs_writel(CR0_READ_SHADOW, cr0); | 853 | if (!(cr0 & CR0_TS_MASK)) { |
| 784 | vmcs_writel(GUEST_CR0, | 854 | vcpu->fpu_active = 1; |
| 785 | (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); | 855 | vmcs_clear_bits(EXCEPTION_BITMAP, CR0_TS_MASK); |
| 786 | vcpu->cr0 = cr0; | 856 | } |
| 787 | } | ||
| 788 | |||
| 789 | /* | ||
| 790 | * Used when restoring the VM to avoid corrupting segment registers | ||
| 791 | */ | ||
| 792 | static void vmx_set_cr0_no_modeswitch(struct kvm_vcpu *vcpu, unsigned long cr0) | ||
| 793 | { | ||
| 794 | if (!vcpu->rmode.active && !(cr0 & CR0_PE_MASK)) | ||
| 795 | enter_rmode(vcpu); | ||
| 796 | 857 | ||
| 797 | vcpu->rmode.active = ((cr0 & CR0_PE_MASK) == 0); | ||
| 798 | update_exception_bitmap(vcpu); | ||
| 799 | vmcs_writel(CR0_READ_SHADOW, cr0); | 858 | vmcs_writel(CR0_READ_SHADOW, cr0); |
| 800 | vmcs_writel(GUEST_CR0, | 859 | vmcs_writel(GUEST_CR0, |
| 801 | (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); | 860 | (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); |
| @@ -805,6 +864,12 @@ static void vmx_set_cr0_no_modeswitch(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
| 805 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | 864 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) |
| 806 | { | 865 | { |
| 807 | vmcs_writel(GUEST_CR3, cr3); | 866 | vmcs_writel(GUEST_CR3, cr3); |
| 867 | |||
| 868 | if (!(vcpu->cr0 & CR0_TS_MASK)) { | ||
| 869 | vcpu->fpu_active = 0; | ||
| 870 | vmcs_set_bits(GUEST_CR0, CR0_TS_MASK); | ||
| 871 | vmcs_set_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR); | ||
| 872 | } | ||
| 808 | } | 873 | } |
| 809 | 874 | ||
| 810 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 875 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
| @@ -835,6 +900,7 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
| 835 | 900 | ||
| 836 | msr->data = efer & ~EFER_LME; | 901 | msr->data = efer & ~EFER_LME; |
| 837 | } | 902 | } |
| 903 | setup_msrs(vcpu); | ||
| 838 | } | 904 | } |
| 839 | 905 | ||
| 840 | #endif | 906 | #endif |
| @@ -878,7 +944,14 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
| 878 | vmcs_writel(sf->base, var->base); | 944 | vmcs_writel(sf->base, var->base); |
| 879 | vmcs_write32(sf->limit, var->limit); | 945 | vmcs_write32(sf->limit, var->limit); |
| 880 | vmcs_write16(sf->selector, var->selector); | 946 | vmcs_write16(sf->selector, var->selector); |
| 881 | if (var->unusable) | 947 | if (vcpu->rmode.active && var->s) { |
| 948 | /* | ||
| 949 | * Hack real-mode segments into vm86 compatibility. | ||
| 950 | */ | ||
| 951 | if (var->base == 0xffff0000 && var->selector == 0xf000) | ||
| 952 | vmcs_writel(sf->base, 0xf0000); | ||
| 953 | ar = 0xf3; | ||
| 954 | } else if (var->unusable) | ||
| 882 | ar = 1 << 16; | 955 | ar = 1 << 16; |
| 883 | else { | 956 | else { |
| 884 | ar = var->type & 15; | 957 | ar = var->type & 15; |
| @@ -933,9 +1006,9 @@ static int init_rmode_tss(struct kvm* kvm) | |||
| 933 | gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT; | 1006 | gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT; |
| 934 | char *page; | 1007 | char *page; |
| 935 | 1008 | ||
| 936 | p1 = _gfn_to_page(kvm, fn++); | 1009 | p1 = gfn_to_page(kvm, fn++); |
| 937 | p2 = _gfn_to_page(kvm, fn++); | 1010 | p2 = gfn_to_page(kvm, fn++); |
| 938 | p3 = _gfn_to_page(kvm, fn); | 1011 | p3 = gfn_to_page(kvm, fn); |
| 939 | 1012 | ||
| 940 | if (!p1 || !p2 || !p3) { | 1013 | if (!p1 || !p2 || !p3) { |
| 941 | kvm_printf(kvm,"%s: gfn_to_page failed\n", __FUNCTION__); | 1014 | kvm_printf(kvm,"%s: gfn_to_page failed\n", __FUNCTION__); |
| @@ -991,7 +1064,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) | |||
| 991 | struct descriptor_table dt; | 1064 | struct descriptor_table dt; |
| 992 | int i; | 1065 | int i; |
| 993 | int ret = 0; | 1066 | int ret = 0; |
| 994 | int nr_good_msrs; | ||
| 995 | extern asmlinkage void kvm_vmx_return(void); | 1067 | extern asmlinkage void kvm_vmx_return(void); |
| 996 | 1068 | ||
| 997 | if (!init_rmode_tss(vcpu->kvm)) { | 1069 | if (!init_rmode_tss(vcpu->kvm)) { |
| @@ -1136,23 +1208,17 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) | |||
| 1136 | vcpu->host_msrs[j].reserved = 0; | 1208 | vcpu->host_msrs[j].reserved = 0; |
| 1137 | vcpu->host_msrs[j].data = data; | 1209 | vcpu->host_msrs[j].data = data; |
| 1138 | vcpu->guest_msrs[j] = vcpu->host_msrs[j]; | 1210 | vcpu->guest_msrs[j] = vcpu->host_msrs[j]; |
| 1211 | #ifdef CONFIG_X86_64 | ||
| 1212 | if (index == MSR_KERNEL_GS_BASE) | ||
| 1213 | msr_offset_kernel_gs_base = j; | ||
| 1214 | #endif | ||
| 1139 | ++vcpu->nmsrs; | 1215 | ++vcpu->nmsrs; |
| 1140 | } | 1216 | } |
| 1141 | printk(KERN_DEBUG "kvm: msrs: %d\n", vcpu->nmsrs); | ||
| 1142 | 1217 | ||
| 1143 | nr_good_msrs = vcpu->nmsrs - NR_BAD_MSRS; | 1218 | setup_msrs(vcpu); |
| 1144 | vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR, | 1219 | |
| 1145 | virt_to_phys(vcpu->guest_msrs + NR_BAD_MSRS)); | ||
| 1146 | vmcs_writel(VM_EXIT_MSR_STORE_ADDR, | ||
| 1147 | virt_to_phys(vcpu->guest_msrs + NR_BAD_MSRS)); | ||
| 1148 | vmcs_writel(VM_EXIT_MSR_LOAD_ADDR, | ||
| 1149 | virt_to_phys(vcpu->host_msrs + NR_BAD_MSRS)); | ||
| 1150 | vmcs_write32_fixedbits(MSR_IA32_VMX_EXIT_CTLS, VM_EXIT_CONTROLS, | 1220 | vmcs_write32_fixedbits(MSR_IA32_VMX_EXIT_CTLS, VM_EXIT_CONTROLS, |
| 1151 | (HOST_IS_64 << 9)); /* 22.2,1, 20.7.1 */ | 1221 | (HOST_IS_64 << 9)); /* 22.2,1, 20.7.1 */ |
| 1152 | vmcs_write32(VM_EXIT_MSR_STORE_COUNT, nr_good_msrs); /* 22.2.2 */ | ||
| 1153 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */ | ||
| 1154 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */ | ||
| 1155 | |||
| 1156 | 1222 | ||
| 1157 | /* 22.2.1, 20.8.1 */ | 1223 | /* 22.2.1, 20.8.1 */ |
| 1158 | vmcs_write32_fixedbits(MSR_IA32_VMX_ENTRY_CTLS, | 1224 | vmcs_write32_fixedbits(MSR_IA32_VMX_ENTRY_CTLS, |
| @@ -1164,7 +1230,7 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) | |||
| 1164 | vmcs_writel(TPR_THRESHOLD, 0); | 1230 | vmcs_writel(TPR_THRESHOLD, 0); |
| 1165 | #endif | 1231 | #endif |
| 1166 | 1232 | ||
| 1167 | vmcs_writel(CR0_GUEST_HOST_MASK, KVM_GUEST_CR0_MASK); | 1233 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); |
| 1168 | vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); | 1234 | vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); |
| 1169 | 1235 | ||
| 1170 | vcpu->cr0 = 0x60000010; | 1236 | vcpu->cr0 = 0x60000010; |
| @@ -1190,7 +1256,7 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq) | |||
| 1190 | u16 sp = vmcs_readl(GUEST_RSP); | 1256 | u16 sp = vmcs_readl(GUEST_RSP); |
| 1191 | u32 ss_limit = vmcs_read32(GUEST_SS_LIMIT); | 1257 | u32 ss_limit = vmcs_read32(GUEST_SS_LIMIT); |
| 1192 | 1258 | ||
| 1193 | if (sp > ss_limit || sp - 6 > sp) { | 1259 | if (sp > ss_limit || sp < 6 ) { |
| 1194 | vcpu_printf(vcpu, "%s: #SS, rsp 0x%lx ss 0x%lx limit 0x%x\n", | 1260 | vcpu_printf(vcpu, "%s: #SS, rsp 0x%lx ss 0x%lx limit 0x%x\n", |
| 1195 | __FUNCTION__, | 1261 | __FUNCTION__, |
| 1196 | vmcs_readl(GUEST_RSP), | 1262 | vmcs_readl(GUEST_RSP), |
| @@ -1330,6 +1396,15 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1330 | asm ("int $2"); | 1396 | asm ("int $2"); |
| 1331 | return 1; | 1397 | return 1; |
| 1332 | } | 1398 | } |
| 1399 | |||
| 1400 | if (is_no_device(intr_info)) { | ||
| 1401 | vcpu->fpu_active = 1; | ||
| 1402 | vmcs_clear_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR); | ||
| 1403 | if (!(vcpu->cr0 & CR0_TS_MASK)) | ||
| 1404 | vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK); | ||
| 1405 | return 1; | ||
| 1406 | } | ||
| 1407 | |||
| 1333 | error_code = 0; | 1408 | error_code = 0; |
| 1334 | rip = vmcs_readl(GUEST_RIP); | 1409 | rip = vmcs_readl(GUEST_RIP); |
| 1335 | if (intr_info & INTR_INFO_DELIEVER_CODE_MASK) | 1410 | if (intr_info & INTR_INFO_DELIEVER_CODE_MASK) |
| @@ -1355,7 +1430,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1355 | case EMULATE_DONE: | 1430 | case EMULATE_DONE: |
| 1356 | return 1; | 1431 | return 1; |
| 1357 | case EMULATE_DO_MMIO: | 1432 | case EMULATE_DO_MMIO: |
| 1358 | ++kvm_stat.mmio_exits; | 1433 | ++vcpu->stat.mmio_exits; |
| 1359 | kvm_run->exit_reason = KVM_EXIT_MMIO; | 1434 | kvm_run->exit_reason = KVM_EXIT_MMIO; |
| 1360 | return 0; | 1435 | return 0; |
| 1361 | case EMULATE_FAIL: | 1436 | case EMULATE_FAIL: |
| @@ -1384,7 +1459,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1384 | static int handle_external_interrupt(struct kvm_vcpu *vcpu, | 1459 | static int handle_external_interrupt(struct kvm_vcpu *vcpu, |
| 1385 | struct kvm_run *kvm_run) | 1460 | struct kvm_run *kvm_run) |
| 1386 | { | 1461 | { |
| 1387 | ++kvm_stat.irq_exits; | 1462 | ++vcpu->stat.irq_exits; |
| 1388 | return 1; | 1463 | return 1; |
| 1389 | } | 1464 | } |
| 1390 | 1465 | ||
| @@ -1394,7 +1469,7 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1394 | return 0; | 1469 | return 0; |
| 1395 | } | 1470 | } |
| 1396 | 1471 | ||
| 1397 | static int get_io_count(struct kvm_vcpu *vcpu, u64 *count) | 1472 | static int get_io_count(struct kvm_vcpu *vcpu, unsigned long *count) |
| 1398 | { | 1473 | { |
| 1399 | u64 inst; | 1474 | u64 inst; |
| 1400 | gva_t rip; | 1475 | gva_t rip; |
| @@ -1439,33 +1514,35 @@ static int get_io_count(struct kvm_vcpu *vcpu, u64 *count) | |||
| 1439 | done: | 1514 | done: |
| 1440 | countr_size *= 8; | 1515 | countr_size *= 8; |
| 1441 | *count = vcpu->regs[VCPU_REGS_RCX] & (~0ULL >> (64 - countr_size)); | 1516 | *count = vcpu->regs[VCPU_REGS_RCX] & (~0ULL >> (64 - countr_size)); |
| 1517 | //printk("cx: %lx\n", vcpu->regs[VCPU_REGS_RCX]); | ||
| 1442 | return 1; | 1518 | return 1; |
| 1443 | } | 1519 | } |
| 1444 | 1520 | ||
| 1445 | static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1521 | static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 1446 | { | 1522 | { |
| 1447 | u64 exit_qualification; | 1523 | u64 exit_qualification; |
| 1524 | int size, down, in, string, rep; | ||
| 1525 | unsigned port; | ||
| 1526 | unsigned long count; | ||
| 1527 | gva_t address; | ||
| 1448 | 1528 | ||
| 1449 | ++kvm_stat.io_exits; | 1529 | ++vcpu->stat.io_exits; |
| 1450 | exit_qualification = vmcs_read64(EXIT_QUALIFICATION); | 1530 | exit_qualification = vmcs_read64(EXIT_QUALIFICATION); |
| 1451 | kvm_run->exit_reason = KVM_EXIT_IO; | 1531 | in = (exit_qualification & 8) != 0; |
| 1452 | if (exit_qualification & 8) | 1532 | size = (exit_qualification & 7) + 1; |
| 1453 | kvm_run->io.direction = KVM_EXIT_IO_IN; | 1533 | string = (exit_qualification & 16) != 0; |
| 1454 | else | 1534 | down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; |
| 1455 | kvm_run->io.direction = KVM_EXIT_IO_OUT; | 1535 | count = 1; |
| 1456 | kvm_run->io.size = (exit_qualification & 7) + 1; | 1536 | rep = (exit_qualification & 32) != 0; |
| 1457 | kvm_run->io.string = (exit_qualification & 16) != 0; | 1537 | port = exit_qualification >> 16; |
| 1458 | kvm_run->io.string_down | 1538 | address = 0; |
| 1459 | = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; | 1539 | if (string) { |
| 1460 | kvm_run->io.rep = (exit_qualification & 32) != 0; | 1540 | if (rep && !get_io_count(vcpu, &count)) |
| 1461 | kvm_run->io.port = exit_qualification >> 16; | ||
| 1462 | if (kvm_run->io.string) { | ||
| 1463 | if (!get_io_count(vcpu, &kvm_run->io.count)) | ||
| 1464 | return 1; | 1541 | return 1; |
| 1465 | kvm_run->io.address = vmcs_readl(GUEST_LINEAR_ADDRESS); | 1542 | address = vmcs_readl(GUEST_LINEAR_ADDRESS); |
| 1466 | } else | 1543 | } |
| 1467 | kvm_run->io.value = vcpu->regs[VCPU_REGS_RAX]; /* rax */ | 1544 | return kvm_setup_pio(vcpu, kvm_run, in, size, count, string, down, |
| 1468 | return 0; | 1545 | address, rep, port); |
| 1469 | } | 1546 | } |
| 1470 | 1547 | ||
| 1471 | static void | 1548 | static void |
| @@ -1514,6 +1591,15 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1514 | return 1; | 1591 | return 1; |
| 1515 | }; | 1592 | }; |
| 1516 | break; | 1593 | break; |
| 1594 | case 2: /* clts */ | ||
| 1595 | vcpu_load_rsp_rip(vcpu); | ||
| 1596 | vcpu->fpu_active = 1; | ||
| 1597 | vmcs_clear_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR); | ||
| 1598 | vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK); | ||
| 1599 | vcpu->cr0 &= ~CR0_TS_MASK; | ||
| 1600 | vmcs_writel(CR0_READ_SHADOW, vcpu->cr0); | ||
| 1601 | skip_emulated_instruction(vcpu); | ||
| 1602 | return 1; | ||
| 1517 | case 1: /*mov from cr*/ | 1603 | case 1: /*mov from cr*/ |
| 1518 | switch (cr) { | 1604 | switch (cr) { |
| 1519 | case 3: | 1605 | case 3: |
| @@ -1523,8 +1609,6 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1523 | skip_emulated_instruction(vcpu); | 1609 | skip_emulated_instruction(vcpu); |
| 1524 | return 1; | 1610 | return 1; |
| 1525 | case 8: | 1611 | case 8: |
| 1526 | printk(KERN_DEBUG "handle_cr: read CR8 " | ||
| 1527 | "cpu erratum AA15\n"); | ||
| 1528 | vcpu_load_rsp_rip(vcpu); | 1612 | vcpu_load_rsp_rip(vcpu); |
| 1529 | vcpu->regs[reg] = vcpu->cr8; | 1613 | vcpu->regs[reg] = vcpu->cr8; |
| 1530 | vcpu_put_rsp_rip(vcpu); | 1614 | vcpu_put_rsp_rip(vcpu); |
| @@ -1583,8 +1667,8 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1583 | 1667 | ||
| 1584 | static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1668 | static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 1585 | { | 1669 | { |
| 1586 | kvm_run->exit_reason = KVM_EXIT_CPUID; | 1670 | kvm_emulate_cpuid(vcpu); |
| 1587 | return 0; | 1671 | return 1; |
| 1588 | } | 1672 | } |
| 1589 | 1673 | ||
| 1590 | static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1674 | static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| @@ -1639,7 +1723,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, | |||
| 1639 | if (kvm_run->request_interrupt_window && | 1723 | if (kvm_run->request_interrupt_window && |
| 1640 | !vcpu->irq_summary) { | 1724 | !vcpu->irq_summary) { |
| 1641 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; | 1725 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; |
| 1642 | ++kvm_stat.irq_window_exits; | 1726 | ++vcpu->stat.irq_window_exits; |
| 1643 | return 0; | 1727 | return 0; |
| 1644 | } | 1728 | } |
| 1645 | return 1; | 1729 | return 1; |
| @@ -1652,13 +1736,13 @@ static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1652 | return 1; | 1736 | return 1; |
| 1653 | 1737 | ||
| 1654 | kvm_run->exit_reason = KVM_EXIT_HLT; | 1738 | kvm_run->exit_reason = KVM_EXIT_HLT; |
| 1655 | ++kvm_stat.halt_exits; | 1739 | ++vcpu->stat.halt_exits; |
| 1656 | return 0; | 1740 | return 0; |
| 1657 | } | 1741 | } |
| 1658 | 1742 | ||
| 1659 | static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1743 | static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 1660 | { | 1744 | { |
| 1661 | vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP)+3); | 1745 | skip_emulated_instruction(vcpu); |
| 1662 | return kvm_hypercall(vcpu, kvm_run); | 1746 | return kvm_hypercall(vcpu, kvm_run); |
| 1663 | } | 1747 | } |
| 1664 | 1748 | ||
| @@ -1699,7 +1783,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
| 1699 | exit_reason != EXIT_REASON_EXCEPTION_NMI ) | 1783 | exit_reason != EXIT_REASON_EXCEPTION_NMI ) |
| 1700 | printk(KERN_WARNING "%s: unexpected, valid vectoring info and " | 1784 | printk(KERN_WARNING "%s: unexpected, valid vectoring info and " |
| 1701 | "exit reason is 0x%x\n", __FUNCTION__, exit_reason); | 1785 | "exit reason is 0x%x\n", __FUNCTION__, exit_reason); |
| 1702 | kvm_run->instruction_length = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | ||
| 1703 | if (exit_reason < kvm_vmx_max_exit_handlers | 1786 | if (exit_reason < kvm_vmx_max_exit_handlers |
| 1704 | && kvm_vmx_exit_handlers[exit_reason]) | 1787 | && kvm_vmx_exit_handlers[exit_reason]) |
| 1705 | return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); | 1788 | return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); |
| @@ -1763,11 +1846,21 @@ again: | |||
| 1763 | if (vcpu->guest_debug.enabled) | 1846 | if (vcpu->guest_debug.enabled) |
| 1764 | kvm_guest_debug_pre(vcpu); | 1847 | kvm_guest_debug_pre(vcpu); |
| 1765 | 1848 | ||
| 1766 | fx_save(vcpu->host_fx_image); | 1849 | if (vcpu->fpu_active) { |
| 1767 | fx_restore(vcpu->guest_fx_image); | 1850 | fx_save(vcpu->host_fx_image); |
| 1851 | fx_restore(vcpu->guest_fx_image); | ||
| 1852 | } | ||
| 1853 | /* | ||
| 1854 | * Loading guest fpu may have cleared host cr0.ts | ||
| 1855 | */ | ||
| 1856 | vmcs_writel(HOST_CR0, read_cr0()); | ||
| 1768 | 1857 | ||
| 1769 | save_msrs(vcpu->host_msrs, vcpu->nmsrs); | 1858 | #ifdef CONFIG_X86_64 |
| 1770 | load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); | 1859 | if (is_long_mode(vcpu)) { |
| 1860 | save_msrs(vcpu->host_msrs + msr_offset_kernel_gs_base, 1); | ||
| 1861 | load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); | ||
| 1862 | } | ||
| 1863 | #endif | ||
| 1771 | 1864 | ||
| 1772 | asm ( | 1865 | asm ( |
| 1773 | /* Store host registers */ | 1866 | /* Store host registers */ |
| @@ -1909,21 +2002,28 @@ again: | |||
| 1909 | 2002 | ||
| 1910 | reload_tss(); | 2003 | reload_tss(); |
| 1911 | } | 2004 | } |
| 1912 | ++kvm_stat.exits; | 2005 | ++vcpu->stat.exits; |
| 1913 | 2006 | ||
| 1914 | save_msrs(vcpu->guest_msrs, NR_BAD_MSRS); | 2007 | #ifdef CONFIG_X86_64 |
| 1915 | load_msrs(vcpu->host_msrs, NR_BAD_MSRS); | 2008 | if (is_long_mode(vcpu)) { |
| 2009 | save_msrs(vcpu->guest_msrs, NR_BAD_MSRS); | ||
| 2010 | load_msrs(vcpu->host_msrs, NR_BAD_MSRS); | ||
| 2011 | } | ||
| 2012 | #endif | ||
| 2013 | |||
| 2014 | if (vcpu->fpu_active) { | ||
| 2015 | fx_save(vcpu->guest_fx_image); | ||
| 2016 | fx_restore(vcpu->host_fx_image); | ||
| 2017 | } | ||
| 1916 | 2018 | ||
| 1917 | fx_save(vcpu->guest_fx_image); | ||
| 1918 | fx_restore(vcpu->host_fx_image); | ||
| 1919 | vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; | 2019 | vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; |
| 1920 | 2020 | ||
| 1921 | asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); | 2021 | asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); |
| 1922 | 2022 | ||
| 1923 | kvm_run->exit_type = 0; | ||
| 1924 | if (fail) { | 2023 | if (fail) { |
| 1925 | kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY; | 2024 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
| 1926 | kvm_run->exit_reason = vmcs_read32(VM_INSTRUCTION_ERROR); | 2025 | kvm_run->fail_entry.hardware_entry_failure_reason |
| 2026 | = vmcs_read32(VM_INSTRUCTION_ERROR); | ||
| 1927 | r = 0; | 2027 | r = 0; |
| 1928 | } else { | 2028 | } else { |
| 1929 | /* | 2029 | /* |
| @@ -1933,19 +2033,20 @@ again: | |||
| 1933 | profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); | 2033 | profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); |
| 1934 | 2034 | ||
| 1935 | vcpu->launched = 1; | 2035 | vcpu->launched = 1; |
| 1936 | kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT; | ||
| 1937 | r = kvm_handle_exit(kvm_run, vcpu); | 2036 | r = kvm_handle_exit(kvm_run, vcpu); |
| 1938 | if (r > 0) { | 2037 | if (r > 0) { |
| 1939 | /* Give scheduler a change to reschedule. */ | 2038 | /* Give scheduler a change to reschedule. */ |
| 1940 | if (signal_pending(current)) { | 2039 | if (signal_pending(current)) { |
| 1941 | ++kvm_stat.signal_exits; | 2040 | ++vcpu->stat.signal_exits; |
| 1942 | post_kvm_run_save(vcpu, kvm_run); | 2041 | post_kvm_run_save(vcpu, kvm_run); |
| 2042 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
| 1943 | return -EINTR; | 2043 | return -EINTR; |
| 1944 | } | 2044 | } |
| 1945 | 2045 | ||
| 1946 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { | 2046 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { |
| 1947 | ++kvm_stat.request_irq_exits; | 2047 | ++vcpu->stat.request_irq_exits; |
| 1948 | post_kvm_run_save(vcpu, kvm_run); | 2048 | post_kvm_run_save(vcpu, kvm_run); |
| 2049 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
| 1949 | return -EINTR; | 2050 | return -EINTR; |
| 1950 | } | 2051 | } |
| 1951 | 2052 | ||
| @@ -1969,7 +2070,7 @@ static void vmx_inject_page_fault(struct kvm_vcpu *vcpu, | |||
| 1969 | { | 2070 | { |
| 1970 | u32 vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); | 2071 | u32 vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); |
| 1971 | 2072 | ||
| 1972 | ++kvm_stat.pf_guest; | 2073 | ++vcpu->stat.pf_guest; |
| 1973 | 2074 | ||
| 1974 | if (is_page_fault(vect_info)) { | 2075 | if (is_page_fault(vect_info)) { |
| 1975 | printk(KERN_DEBUG "inject_page_fault: " | 2076 | printk(KERN_DEBUG "inject_page_fault: " |
| @@ -2026,6 +2127,7 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) | |||
| 2026 | vmcs_clear(vmcs); | 2127 | vmcs_clear(vmcs); |
| 2027 | vcpu->vmcs = vmcs; | 2128 | vcpu->vmcs = vmcs; |
| 2028 | vcpu->launched = 0; | 2129 | vcpu->launched = 0; |
| 2130 | vcpu->fpu_active = 1; | ||
| 2029 | 2131 | ||
| 2030 | return 0; | 2132 | return 0; |
| 2031 | 2133 | ||
| @@ -2062,9 +2164,8 @@ static struct kvm_arch_ops vmx_arch_ops = { | |||
| 2062 | .get_segment = vmx_get_segment, | 2164 | .get_segment = vmx_get_segment, |
| 2063 | .set_segment = vmx_set_segment, | 2165 | .set_segment = vmx_set_segment, |
| 2064 | .get_cs_db_l_bits = vmx_get_cs_db_l_bits, | 2166 | .get_cs_db_l_bits = vmx_get_cs_db_l_bits, |
| 2065 | .decache_cr0_cr4_guest_bits = vmx_decache_cr0_cr4_guest_bits, | 2167 | .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, |
| 2066 | .set_cr0 = vmx_set_cr0, | 2168 | .set_cr0 = vmx_set_cr0, |
| 2067 | .set_cr0_no_modeswitch = vmx_set_cr0_no_modeswitch, | ||
| 2068 | .set_cr3 = vmx_set_cr3, | 2169 | .set_cr3 = vmx_set_cr3, |
| 2069 | .set_cr4 = vmx_set_cr4, | 2170 | .set_cr4 = vmx_set_cr4, |
| 2070 | #ifdef CONFIG_X86_64 | 2171 | #ifdef CONFIG_X86_64 |
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index 7513cddb929f..7ade09086aa5 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c | |||
| @@ -833,8 +833,9 @@ done_prefixes: | |||
| 833 | dst.ptr = (unsigned long *)cr2; | 833 | dst.ptr = (unsigned long *)cr2; |
| 834 | dst.bytes = (d & ByteOp) ? 1 : op_bytes; | 834 | dst.bytes = (d & ByteOp) ? 1 : op_bytes; |
| 835 | if (d & BitOp) { | 835 | if (d & BitOp) { |
| 836 | dst.ptr += src.val / BITS_PER_LONG; | 836 | unsigned long mask = ~(dst.bytes * 8 - 1); |
| 837 | dst.bytes = sizeof(long); | 837 | |
| 838 | dst.ptr = (void *)dst.ptr + (src.val & mask) / 8; | ||
| 838 | } | 839 | } |
| 839 | if (!(d & Mov) && /* optimisation - avoid slow emulated read */ | 840 | if (!(d & Mov) && /* optimisation - avoid slow emulated read */ |
| 840 | ((rc = ops->read_emulated((unsigned long)dst.ptr, | 841 | ((rc = ops->read_emulated((unsigned long)dst.ptr, |
| @@ -1044,7 +1045,7 @@ done_prefixes: | |||
| 1044 | if ((rc = ops->write_std( | 1045 | if ((rc = ops->write_std( |
| 1045 | register_address(ctxt->ss_base, | 1046 | register_address(ctxt->ss_base, |
| 1046 | _regs[VCPU_REGS_RSP]), | 1047 | _regs[VCPU_REGS_RSP]), |
| 1047 | dst.val, dst.bytes, ctxt)) != 0) | 1048 | &dst.val, dst.bytes, ctxt)) != 0) |
| 1048 | goto done; | 1049 | goto done; |
| 1049 | dst.val = dst.orig_val; /* skanky: disable writeback */ | 1050 | dst.val = dst.orig_val; /* skanky: disable writeback */ |
| 1050 | break; | 1051 | break; |
| @@ -1077,12 +1078,12 @@ writeback: | |||
| 1077 | case OP_MEM: | 1078 | case OP_MEM: |
| 1078 | if (lock_prefix) | 1079 | if (lock_prefix) |
| 1079 | rc = ops->cmpxchg_emulated((unsigned long)dst. | 1080 | rc = ops->cmpxchg_emulated((unsigned long)dst. |
| 1080 | ptr, dst.orig_val, | 1081 | ptr, &dst.orig_val, |
| 1081 | dst.val, dst.bytes, | 1082 | &dst.val, dst.bytes, |
| 1082 | ctxt); | 1083 | ctxt); |
| 1083 | else | 1084 | else |
| 1084 | rc = ops->write_emulated((unsigned long)dst.ptr, | 1085 | rc = ops->write_emulated((unsigned long)dst.ptr, |
| 1085 | dst.val, dst.bytes, | 1086 | &dst.val, dst.bytes, |
| 1086 | ctxt); | 1087 | ctxt); |
| 1087 | if (rc != 0) | 1088 | if (rc != 0) |
| 1088 | goto done; | 1089 | goto done; |
| @@ -1320,36 +1321,8 @@ twobyte_special_insn: | |||
| 1320 | realmode_set_cr(ctxt->vcpu, modrm_reg, modrm_val, &_eflags); | 1321 | realmode_set_cr(ctxt->vcpu, modrm_reg, modrm_val, &_eflags); |
| 1321 | break; | 1322 | break; |
| 1322 | case 0xc7: /* Grp9 (cmpxchg8b) */ | 1323 | case 0xc7: /* Grp9 (cmpxchg8b) */ |
| 1323 | #if defined(__i386__) | ||
| 1324 | { | ||
| 1325 | unsigned long old_lo, old_hi; | ||
| 1326 | if (((rc = ops->read_emulated(cr2 + 0, &old_lo, 4, | ||
| 1327 | ctxt)) != 0) | ||
| 1328 | || ((rc = ops->read_emulated(cr2 + 4, &old_hi, 4, | ||
| 1329 | ctxt)) != 0)) | ||
| 1330 | goto done; | ||
| 1331 | if ((old_lo != _regs[VCPU_REGS_RAX]) | ||
| 1332 | || (old_hi != _regs[VCPU_REGS_RDX])) { | ||
| 1333 | _regs[VCPU_REGS_RAX] = old_lo; | ||
| 1334 | _regs[VCPU_REGS_RDX] = old_hi; | ||
| 1335 | _eflags &= ~EFLG_ZF; | ||
| 1336 | } else if (ops->cmpxchg8b_emulated == NULL) { | ||
| 1337 | rc = X86EMUL_UNHANDLEABLE; | ||
| 1338 | goto done; | ||
| 1339 | } else { | ||
| 1340 | if ((rc = ops->cmpxchg8b_emulated(cr2, old_lo, | ||
| 1341 | old_hi, | ||
| 1342 | _regs[VCPU_REGS_RBX], | ||
| 1343 | _regs[VCPU_REGS_RCX], | ||
| 1344 | ctxt)) != 0) | ||
| 1345 | goto done; | ||
| 1346 | _eflags |= EFLG_ZF; | ||
| 1347 | } | ||
| 1348 | break; | ||
| 1349 | } | ||
| 1350 | #elif defined(CONFIG_X86_64) | ||
| 1351 | { | 1324 | { |
| 1352 | unsigned long old, new; | 1325 | u64 old, new; |
| 1353 | if ((rc = ops->read_emulated(cr2, &old, 8, ctxt)) != 0) | 1326 | if ((rc = ops->read_emulated(cr2, &old, 8, ctxt)) != 0) |
| 1354 | goto done; | 1327 | goto done; |
| 1355 | if (((u32) (old >> 0) != (u32) _regs[VCPU_REGS_RAX]) || | 1328 | if (((u32) (old >> 0) != (u32) _regs[VCPU_REGS_RAX]) || |
| @@ -1358,15 +1331,15 @@ twobyte_special_insn: | |||
| 1358 | _regs[VCPU_REGS_RDX] = (u32) (old >> 32); | 1331 | _regs[VCPU_REGS_RDX] = (u32) (old >> 32); |
| 1359 | _eflags &= ~EFLG_ZF; | 1332 | _eflags &= ~EFLG_ZF; |
| 1360 | } else { | 1333 | } else { |
| 1361 | new = (_regs[VCPU_REGS_RCX] << 32) | (u32) _regs[VCPU_REGS_RBX]; | 1334 | new = ((u64)_regs[VCPU_REGS_RCX] << 32) |
| 1362 | if ((rc = ops->cmpxchg_emulated(cr2, old, | 1335 | | (u32) _regs[VCPU_REGS_RBX]; |
| 1363 | new, 8, ctxt)) != 0) | 1336 | if ((rc = ops->cmpxchg_emulated(cr2, &old, |
| 1337 | &new, 8, ctxt)) != 0) | ||
| 1364 | goto done; | 1338 | goto done; |
| 1365 | _eflags |= EFLG_ZF; | 1339 | _eflags |= EFLG_ZF; |
| 1366 | } | 1340 | } |
| 1367 | break; | 1341 | break; |
| 1368 | } | 1342 | } |
| 1369 | #endif | ||
| 1370 | } | 1343 | } |
| 1371 | goto writeback; | 1344 | goto writeback; |
| 1372 | 1345 | ||
diff --git a/drivers/kvm/x86_emulate.h b/drivers/kvm/x86_emulate.h index 5d41bd55125e..ea3407d7feee 100644 --- a/drivers/kvm/x86_emulate.h +++ b/drivers/kvm/x86_emulate.h | |||
| @@ -59,8 +59,7 @@ struct x86_emulate_ops { | |||
| 59 | * @val: [OUT] Value read from memory, zero-extended to 'u_long'. | 59 | * @val: [OUT] Value read from memory, zero-extended to 'u_long'. |
| 60 | * @bytes: [IN ] Number of bytes to read from memory. | 60 | * @bytes: [IN ] Number of bytes to read from memory. |
| 61 | */ | 61 | */ |
| 62 | int (*read_std)(unsigned long addr, | 62 | int (*read_std)(unsigned long addr, void *val, |
| 63 | unsigned long *val, | ||
| 64 | unsigned int bytes, struct x86_emulate_ctxt * ctxt); | 63 | unsigned int bytes, struct x86_emulate_ctxt * ctxt); |
| 65 | 64 | ||
| 66 | /* | 65 | /* |
| @@ -71,8 +70,7 @@ struct x86_emulate_ops { | |||
| 71 | * required). | 70 | * required). |
| 72 | * @bytes: [IN ] Number of bytes to write to memory. | 71 | * @bytes: [IN ] Number of bytes to write to memory. |
| 73 | */ | 72 | */ |
| 74 | int (*write_std)(unsigned long addr, | 73 | int (*write_std)(unsigned long addr, const void *val, |
| 75 | unsigned long val, | ||
| 76 | unsigned int bytes, struct x86_emulate_ctxt * ctxt); | 74 | unsigned int bytes, struct x86_emulate_ctxt * ctxt); |
| 77 | 75 | ||
| 78 | /* | 76 | /* |
| @@ -82,7 +80,7 @@ struct x86_emulate_ops { | |||
| 82 | * @bytes: [IN ] Number of bytes to read from memory. | 80 | * @bytes: [IN ] Number of bytes to read from memory. |
| 83 | */ | 81 | */ |
| 84 | int (*read_emulated) (unsigned long addr, | 82 | int (*read_emulated) (unsigned long addr, |
| 85 | unsigned long *val, | 83 | void *val, |
| 86 | unsigned int bytes, | 84 | unsigned int bytes, |
| 87 | struct x86_emulate_ctxt * ctxt); | 85 | struct x86_emulate_ctxt * ctxt); |
| 88 | 86 | ||
| @@ -94,7 +92,7 @@ struct x86_emulate_ops { | |||
| 94 | * @bytes: [IN ] Number of bytes to write to memory. | 92 | * @bytes: [IN ] Number of bytes to write to memory. |
| 95 | */ | 93 | */ |
| 96 | int (*write_emulated) (unsigned long addr, | 94 | int (*write_emulated) (unsigned long addr, |
| 97 | unsigned long val, | 95 | const void *val, |
| 98 | unsigned int bytes, | 96 | unsigned int bytes, |
| 99 | struct x86_emulate_ctxt * ctxt); | 97 | struct x86_emulate_ctxt * ctxt); |
| 100 | 98 | ||
| @@ -107,29 +105,11 @@ struct x86_emulate_ops { | |||
| 107 | * @bytes: [IN ] Number of bytes to access using CMPXCHG. | 105 | * @bytes: [IN ] Number of bytes to access using CMPXCHG. |
| 108 | */ | 106 | */ |
| 109 | int (*cmpxchg_emulated) (unsigned long addr, | 107 | int (*cmpxchg_emulated) (unsigned long addr, |
| 110 | unsigned long old, | 108 | const void *old, |
| 111 | unsigned long new, | 109 | const void *new, |
| 112 | unsigned int bytes, | 110 | unsigned int bytes, |
| 113 | struct x86_emulate_ctxt * ctxt); | 111 | struct x86_emulate_ctxt * ctxt); |
| 114 | 112 | ||
| 115 | /* | ||
| 116 | * cmpxchg8b_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an | ||
| 117 | * emulated/special memory area. | ||
| 118 | * @addr: [IN ] Linear address to access. | ||
| 119 | * @old: [IN ] Value expected to be current at @addr. | ||
| 120 | * @new: [IN ] Value to write to @addr. | ||
| 121 | * NOTES: | ||
| 122 | * 1. This function is only ever called when emulating a real CMPXCHG8B. | ||
| 123 | * 2. This function is *never* called on x86/64 systems. | ||
| 124 | * 2. Not defining this function (i.e., specifying NULL) is equivalent | ||
| 125 | * to defining a function that always returns X86EMUL_UNHANDLEABLE. | ||
| 126 | */ | ||
| 127 | int (*cmpxchg8b_emulated) (unsigned long addr, | ||
| 128 | unsigned long old_lo, | ||
| 129 | unsigned long old_hi, | ||
| 130 | unsigned long new_lo, | ||
| 131 | unsigned long new_hi, | ||
| 132 | struct x86_emulate_ctxt * ctxt); | ||
| 133 | }; | 113 | }; |
| 134 | 114 | ||
| 135 | struct cpu_user_regs; | 115 | struct cpu_user_regs; |
diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 4ff0f57d0add..9f05279e7dd3 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild | |||
| @@ -96,6 +96,7 @@ header-y += iso_fs.h | |||
| 96 | header-y += ixjuser.h | 96 | header-y += ixjuser.h |
| 97 | header-y += jffs2.h | 97 | header-y += jffs2.h |
| 98 | header-y += keyctl.h | 98 | header-y += keyctl.h |
| 99 | header-y += kvm.h | ||
| 99 | header-y += limits.h | 100 | header-y += limits.h |
| 100 | header-y += lock_dlm_plock.h | 101 | header-y += lock_dlm_plock.h |
| 101 | header-y += magic.h | 102 | header-y += magic.h |
diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 275354ffa1cb..e6edca81ab84 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h | |||
| @@ -11,7 +11,7 @@ | |||
| 11 | #include <asm/types.h> | 11 | #include <asm/types.h> |
| 12 | #include <linux/ioctl.h> | 12 | #include <linux/ioctl.h> |
| 13 | 13 | ||
| 14 | #define KVM_API_VERSION 4 | 14 | #define KVM_API_VERSION 12 |
| 15 | 15 | ||
| 16 | /* | 16 | /* |
| 17 | * Architectural interrupt line count, and the size of the bitmap needed | 17 | * Architectural interrupt line count, and the size of the bitmap needed |
| @@ -33,37 +33,39 @@ struct kvm_memory_region { | |||
| 33 | /* for kvm_memory_region::flags */ | 33 | /* for kvm_memory_region::flags */ |
| 34 | #define KVM_MEM_LOG_DIRTY_PAGES 1UL | 34 | #define KVM_MEM_LOG_DIRTY_PAGES 1UL |
| 35 | 35 | ||
| 36 | 36 | struct kvm_memory_alias { | |
| 37 | #define KVM_EXIT_TYPE_FAIL_ENTRY 1 | 37 | __u32 slot; /* this has a different namespace than memory slots */ |
| 38 | #define KVM_EXIT_TYPE_VM_EXIT 2 | 38 | __u32 flags; |
| 39 | __u64 guest_phys_addr; | ||
| 40 | __u64 memory_size; | ||
| 41 | __u64 target_phys_addr; | ||
| 42 | }; | ||
| 39 | 43 | ||
| 40 | enum kvm_exit_reason { | 44 | enum kvm_exit_reason { |
| 41 | KVM_EXIT_UNKNOWN = 0, | 45 | KVM_EXIT_UNKNOWN = 0, |
| 42 | KVM_EXIT_EXCEPTION = 1, | 46 | KVM_EXIT_EXCEPTION = 1, |
| 43 | KVM_EXIT_IO = 2, | 47 | KVM_EXIT_IO = 2, |
| 44 | KVM_EXIT_CPUID = 3, | 48 | KVM_EXIT_HYPERCALL = 3, |
| 45 | KVM_EXIT_DEBUG = 4, | 49 | KVM_EXIT_DEBUG = 4, |
| 46 | KVM_EXIT_HLT = 5, | 50 | KVM_EXIT_HLT = 5, |
| 47 | KVM_EXIT_MMIO = 6, | 51 | KVM_EXIT_MMIO = 6, |
| 48 | KVM_EXIT_IRQ_WINDOW_OPEN = 7, | 52 | KVM_EXIT_IRQ_WINDOW_OPEN = 7, |
| 49 | KVM_EXIT_SHUTDOWN = 8, | 53 | KVM_EXIT_SHUTDOWN = 8, |
| 54 | KVM_EXIT_FAIL_ENTRY = 9, | ||
| 55 | KVM_EXIT_INTR = 10, | ||
| 50 | }; | 56 | }; |
| 51 | 57 | ||
| 52 | /* for KVM_RUN */ | 58 | /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ |
| 53 | struct kvm_run { | 59 | struct kvm_run { |
| 54 | /* in */ | 60 | /* in */ |
| 55 | __u32 emulated; /* skip current instruction */ | ||
| 56 | __u32 mmio_completed; /* mmio request completed */ | ||
| 57 | __u8 request_interrupt_window; | 61 | __u8 request_interrupt_window; |
| 58 | __u8 padding1[7]; | 62 | __u8 padding1[7]; |
| 59 | 63 | ||
| 60 | /* out */ | 64 | /* out */ |
| 61 | __u32 exit_type; | ||
| 62 | __u32 exit_reason; | 65 | __u32 exit_reason; |
| 63 | __u32 instruction_length; | ||
| 64 | __u8 ready_for_interrupt_injection; | 66 | __u8 ready_for_interrupt_injection; |
| 65 | __u8 if_flag; | 67 | __u8 if_flag; |
| 66 | __u16 padding2; | 68 | __u8 padding2[2]; |
| 67 | 69 | ||
| 68 | /* in (pre_kvm_run), out (post_kvm_run) */ | 70 | /* in (pre_kvm_run), out (post_kvm_run) */ |
| 69 | __u64 cr8; | 71 | __u64 cr8; |
| @@ -72,29 +74,26 @@ struct kvm_run { | |||
| 72 | union { | 74 | union { |
| 73 | /* KVM_EXIT_UNKNOWN */ | 75 | /* KVM_EXIT_UNKNOWN */ |
| 74 | struct { | 76 | struct { |
| 75 | __u32 hardware_exit_reason; | 77 | __u64 hardware_exit_reason; |
| 76 | } hw; | 78 | } hw; |
| 79 | /* KVM_EXIT_FAIL_ENTRY */ | ||
| 80 | struct { | ||
| 81 | __u64 hardware_entry_failure_reason; | ||
| 82 | } fail_entry; | ||
| 77 | /* KVM_EXIT_EXCEPTION */ | 83 | /* KVM_EXIT_EXCEPTION */ |
| 78 | struct { | 84 | struct { |
| 79 | __u32 exception; | 85 | __u32 exception; |
| 80 | __u32 error_code; | 86 | __u32 error_code; |
| 81 | } ex; | 87 | } ex; |
| 82 | /* KVM_EXIT_IO */ | 88 | /* KVM_EXIT_IO */ |
| 83 | struct { | 89 | struct kvm_io { |
| 84 | #define KVM_EXIT_IO_IN 0 | 90 | #define KVM_EXIT_IO_IN 0 |
| 85 | #define KVM_EXIT_IO_OUT 1 | 91 | #define KVM_EXIT_IO_OUT 1 |
| 86 | __u8 direction; | 92 | __u8 direction; |
| 87 | __u8 size; /* bytes */ | 93 | __u8 size; /* bytes */ |
| 88 | __u8 string; | ||
| 89 | __u8 string_down; | ||
| 90 | __u8 rep; | ||
| 91 | __u8 pad; | ||
| 92 | __u16 port; | 94 | __u16 port; |
| 93 | __u64 count; | 95 | __u32 count; |
| 94 | union { | 96 | __u64 data_offset; /* relative to kvm_run start */ |
| 95 | __u64 address; | ||
| 96 | __u32 value; | ||
| 97 | }; | ||
| 98 | } io; | 97 | } io; |
| 99 | struct { | 98 | struct { |
| 100 | } debug; | 99 | } debug; |
| @@ -105,6 +104,13 @@ struct kvm_run { | |||
| 105 | __u32 len; | 104 | __u32 len; |
| 106 | __u8 is_write; | 105 | __u8 is_write; |
| 107 | } mmio; | 106 | } mmio; |
| 107 | /* KVM_EXIT_HYPERCALL */ | ||
| 108 | struct { | ||
| 109 | __u64 args[6]; | ||
| 110 | __u64 ret; | ||
| 111 | __u32 longmode; | ||
| 112 | __u32 pad; | ||
| 113 | } hypercall; | ||
| 108 | }; | 114 | }; |
| 109 | }; | 115 | }; |
| 110 | 116 | ||
| @@ -118,6 +124,21 @@ struct kvm_regs { | |||
| 118 | __u64 rip, rflags; | 124 | __u64 rip, rflags; |
| 119 | }; | 125 | }; |
| 120 | 126 | ||
| 127 | /* for KVM_GET_FPU and KVM_SET_FPU */ | ||
| 128 | struct kvm_fpu { | ||
| 129 | __u8 fpr[8][16]; | ||
| 130 | __u16 fcw; | ||
| 131 | __u16 fsw; | ||
| 132 | __u8 ftwx; /* in fxsave format */ | ||
| 133 | __u8 pad1; | ||
| 134 | __u16 last_opcode; | ||
| 135 | __u64 last_ip; | ||
| 136 | __u64 last_dp; | ||
| 137 | __u8 xmm[16][16]; | ||
| 138 | __u32 mxcsr; | ||
| 139 | __u32 pad2; | ||
| 140 | }; | ||
| 141 | |||
| 121 | struct kvm_segment { | 142 | struct kvm_segment { |
| 122 | __u64 base; | 143 | __u64 base; |
| 123 | __u32 limit; | 144 | __u32 limit; |
| @@ -210,38 +231,74 @@ struct kvm_dirty_log { | |||
| 210 | }; | 231 | }; |
| 211 | }; | 232 | }; |
| 212 | 233 | ||
| 234 | struct kvm_cpuid_entry { | ||
| 235 | __u32 function; | ||
| 236 | __u32 eax; | ||
| 237 | __u32 ebx; | ||
| 238 | __u32 ecx; | ||
| 239 | __u32 edx; | ||
| 240 | __u32 padding; | ||
| 241 | }; | ||
| 242 | |||
| 243 | /* for KVM_SET_CPUID */ | ||
| 244 | struct kvm_cpuid { | ||
| 245 | __u32 nent; | ||
| 246 | __u32 padding; | ||
| 247 | struct kvm_cpuid_entry entries[0]; | ||
| 248 | }; | ||
| 249 | |||
| 250 | /* for KVM_SET_SIGNAL_MASK */ | ||
| 251 | struct kvm_signal_mask { | ||
| 252 | __u32 len; | ||
| 253 | __u8 sigset[0]; | ||
| 254 | }; | ||
| 255 | |||
| 213 | #define KVMIO 0xAE | 256 | #define KVMIO 0xAE |
| 214 | 257 | ||
| 215 | /* | 258 | /* |
| 216 | * ioctls for /dev/kvm fds: | 259 | * ioctls for /dev/kvm fds: |
| 217 | */ | 260 | */ |
| 218 | #define KVM_GET_API_VERSION _IO(KVMIO, 1) | 261 | #define KVM_GET_API_VERSION _IO(KVMIO, 0x00) |
| 219 | #define KVM_CREATE_VM _IO(KVMIO, 2) /* returns a VM fd */ | 262 | #define KVM_CREATE_VM _IO(KVMIO, 0x01) /* returns a VM fd */ |
| 220 | #define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 15, struct kvm_msr_list) | 263 | #define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 0x02, struct kvm_msr_list) |
| 264 | /* | ||
| 265 | * Check if a kvm extension is available. Argument is extension number, | ||
| 266 | * return is 1 (yes) or 0 (no, sorry). | ||
| 267 | */ | ||
| 268 | #define KVM_CHECK_EXTENSION _IO(KVMIO, 0x03) | ||
| 269 | /* | ||
| 270 | * Get size for mmap(vcpu_fd) | ||
| 271 | */ | ||
| 272 | #define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ | ||
| 221 | 273 | ||
| 222 | /* | 274 | /* |
| 223 | * ioctls for VM fds | 275 | * ioctls for VM fds |
| 224 | */ | 276 | */ |
| 225 | #define KVM_SET_MEMORY_REGION _IOW(KVMIO, 10, struct kvm_memory_region) | 277 | #define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) |
| 226 | /* | 278 | /* |
| 227 | * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns | 279 | * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns |
| 228 | * a vcpu fd. | 280 | * a vcpu fd. |
| 229 | */ | 281 | */ |
| 230 | #define KVM_CREATE_VCPU _IOW(KVMIO, 11, int) | 282 | #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) |
| 231 | #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 12, struct kvm_dirty_log) | 283 | #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) |
| 284 | #define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) | ||
| 232 | 285 | ||
| 233 | /* | 286 | /* |
| 234 | * ioctls for vcpu fds | 287 | * ioctls for vcpu fds |
| 235 | */ | 288 | */ |
| 236 | #define KVM_RUN _IOWR(KVMIO, 2, struct kvm_run) | 289 | #define KVM_RUN _IO(KVMIO, 0x80) |
| 237 | #define KVM_GET_REGS _IOR(KVMIO, 3, struct kvm_regs) | 290 | #define KVM_GET_REGS _IOR(KVMIO, 0x81, struct kvm_regs) |
| 238 | #define KVM_SET_REGS _IOW(KVMIO, 4, struct kvm_regs) | 291 | #define KVM_SET_REGS _IOW(KVMIO, 0x82, struct kvm_regs) |
| 239 | #define KVM_GET_SREGS _IOR(KVMIO, 5, struct kvm_sregs) | 292 | #define KVM_GET_SREGS _IOR(KVMIO, 0x83, struct kvm_sregs) |
| 240 | #define KVM_SET_SREGS _IOW(KVMIO, 6, struct kvm_sregs) | 293 | #define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs) |
| 241 | #define KVM_TRANSLATE _IOWR(KVMIO, 7, struct kvm_translation) | 294 | #define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation) |
| 242 | #define KVM_INTERRUPT _IOW(KVMIO, 8, struct kvm_interrupt) | 295 | #define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt) |
| 243 | #define KVM_DEBUG_GUEST _IOW(KVMIO, 9, struct kvm_debug_guest) | 296 | #define KVM_DEBUG_GUEST _IOW(KVMIO, 0x87, struct kvm_debug_guest) |
| 244 | #define KVM_GET_MSRS _IOWR(KVMIO, 13, struct kvm_msrs) | 297 | #define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs) |
| 245 | #define KVM_SET_MSRS _IOW(KVMIO, 14, struct kvm_msrs) | 298 | #define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs) |
| 299 | #define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid) | ||
| 300 | #define KVM_SET_SIGNAL_MASK _IOW(KVMIO, 0x8b, struct kvm_signal_mask) | ||
| 301 | #define KVM_GET_FPU _IOR(KVMIO, 0x8c, struct kvm_fpu) | ||
| 302 | #define KVM_SET_FPU _IOW(KVMIO, 0x8d, struct kvm_fpu) | ||
| 246 | 303 | ||
| 247 | #endif | 304 | #endif |
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 326da7d500c7..dff9ea32606a 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h | |||
| @@ -29,6 +29,7 @@ | |||
| 29 | 29 | ||
| 30 | #define TUN_MINOR 200 | 30 | #define TUN_MINOR 200 |
| 31 | #define HPET_MINOR 228 | 31 | #define HPET_MINOR 228 |
| 32 | #define KVM_MINOR 232 | ||
| 32 | 33 | ||
| 33 | struct device; | 34 | struct device; |
| 34 | 35 | ||
