aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/kvm/kvm.h13
-rw-r--r--drivers/kvm/kvm_main.c776
-rw-r--r--drivers/kvm/kvm_svm.h3
-rw-r--r--drivers/kvm/mmu.c36
-rw-r--r--drivers/kvm/paging_tmpl.h18
-rw-r--r--drivers/kvm/svm.c42
-rw-r--r--drivers/kvm/vmx.c33
-rw-r--r--include/linux/kvm.h50
-rw-r--r--include/linux/kvm_para.h73
-rw-r--r--include/linux/magic.h1
10 files changed, 748 insertions, 297 deletions
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 04574a9d4430..0d122bf889db 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -14,6 +14,7 @@
14 14
15#include "vmx.h" 15#include "vmx.h"
16#include <linux/kvm.h> 16#include <linux/kvm.h>
17#include <linux/kvm_para.h>
17 18
18#define CR0_PE_MASK (1ULL << 0) 19#define CR0_PE_MASK (1ULL << 0)
19#define CR0_TS_MASK (1ULL << 3) 20#define CR0_TS_MASK (1ULL << 3)
@@ -237,6 +238,9 @@ struct kvm_vcpu {
237 unsigned long cr0; 238 unsigned long cr0;
238 unsigned long cr2; 239 unsigned long cr2;
239 unsigned long cr3; 240 unsigned long cr3;
241 gpa_t para_state_gpa;
242 struct page *para_state_page;
243 gpa_t hypercall_gpa;
240 unsigned long cr4; 244 unsigned long cr4;
241 unsigned long cr8; 245 unsigned long cr8;
242 u64 pdptrs[4]; /* pae */ 246 u64 pdptrs[4]; /* pae */
@@ -305,6 +309,7 @@ struct kvm {
305 int busy; 309 int busy;
306 unsigned long rmap_overflow; 310 unsigned long rmap_overflow;
307 struct list_head vm_list; 311 struct list_head vm_list;
312 struct file *filp;
308}; 313};
309 314
310struct kvm_stat { 315struct kvm_stat {
@@ -339,7 +344,7 @@ struct kvm_arch_ops {
339 int (*vcpu_create)(struct kvm_vcpu *vcpu); 344 int (*vcpu_create)(struct kvm_vcpu *vcpu);
340 void (*vcpu_free)(struct kvm_vcpu *vcpu); 345 void (*vcpu_free)(struct kvm_vcpu *vcpu);
341 346
342 struct kvm_vcpu *(*vcpu_load)(struct kvm_vcpu *vcpu); 347 void (*vcpu_load)(struct kvm_vcpu *vcpu);
343 void (*vcpu_put)(struct kvm_vcpu *vcpu); 348 void (*vcpu_put)(struct kvm_vcpu *vcpu);
344 void (*vcpu_decache)(struct kvm_vcpu *vcpu); 349 void (*vcpu_decache)(struct kvm_vcpu *vcpu);
345 350
@@ -382,6 +387,8 @@ struct kvm_arch_ops {
382 int (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); 387 int (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
383 int (*vcpu_setup)(struct kvm_vcpu *vcpu); 388 int (*vcpu_setup)(struct kvm_vcpu *vcpu);
384 void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); 389 void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
390 void (*patch_hypercall)(struct kvm_vcpu *vcpu,
391 unsigned char *hypercall_addr);
385}; 392};
386 393
387extern struct kvm_stat kvm_stat; 394extern struct kvm_stat kvm_stat;
@@ -476,6 +483,8 @@ void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes);
476int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); 483int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
477void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); 484void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
478 485
486int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run);
487
479static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, 488static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
480 u32 error_code) 489 u32 error_code)
481{ 490{
@@ -523,7 +532,7 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
523{ 532{
524 struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); 533 struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
525 534
526 return (struct kvm_mmu_page *)page->private; 535 return (struct kvm_mmu_page *)page_private(page);
527} 536}
528 537
529static inline u16 read_fs(void) 538static inline u16 read_fs(void)
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index af866147ff25..a163bca38973 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -20,6 +20,7 @@
20#include <linux/kvm.h> 20#include <linux/kvm.h>
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/errno.h> 22#include <linux/errno.h>
23#include <linux/magic.h>
23#include <asm/processor.h> 24#include <asm/processor.h>
24#include <linux/percpu.h> 25#include <linux/percpu.h>
25#include <linux/gfp.h> 26#include <linux/gfp.h>
@@ -36,6 +37,9 @@
36#include <asm/desc.h> 37#include <asm/desc.h>
37#include <linux/sysdev.h> 38#include <linux/sysdev.h>
38#include <linux/cpu.h> 39#include <linux/cpu.h>
40#include <linux/file.h>
41#include <linux/fs.h>
42#include <linux/mount.h>
39 43
40#include "x86_emulate.h" 44#include "x86_emulate.h"
41#include "segment_descriptor.h" 45#include "segment_descriptor.h"
@@ -72,6 +76,8 @@ static struct kvm_stats_debugfs_item {
72 76
73static struct dentry *debugfs_dir; 77static struct dentry *debugfs_dir;
74 78
79struct vfsmount *kvmfs_mnt;
80
75#define MAX_IO_MSRS 256 81#define MAX_IO_MSRS 256
76 82
77#define CR0_RESEVED_BITS 0xffffffff1ffaffc0ULL 83#define CR0_RESEVED_BITS 0xffffffff1ffaffc0ULL
@@ -90,6 +96,58 @@ struct segment_descriptor_64 {
90 96
91#endif 97#endif
92 98
99static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
100 unsigned long arg);
101
102static struct inode *kvmfs_inode(struct file_operations *fops)
103{
104 int error = -ENOMEM;
105 struct inode *inode = new_inode(kvmfs_mnt->mnt_sb);
106
107 if (!inode)
108 goto eexit_1;
109
110 inode->i_fop = fops;
111
112 /*
113 * Mark the inode dirty from the very beginning,
114 * that way it will never be moved to the dirty
115 * list because mark_inode_dirty() will think
116 * that it already _is_ on the dirty list.
117 */
118 inode->i_state = I_DIRTY;
119 inode->i_mode = S_IRUSR | S_IWUSR;
120 inode->i_uid = current->fsuid;
121 inode->i_gid = current->fsgid;
122 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
123 return inode;
124
125eexit_1:
126 return ERR_PTR(error);
127}
128
129static struct file *kvmfs_file(struct inode *inode, void *private_data)
130{
131 struct file *file = get_empty_filp();
132
133 if (!file)
134 return ERR_PTR(-ENFILE);
135
136 file->f_path.mnt = mntget(kvmfs_mnt);
137 file->f_path.dentry = d_alloc_anon(inode);
138 if (!file->f_path.dentry)
139 return ERR_PTR(-ENOMEM);
140 file->f_mapping = inode->i_mapping;
141
142 file->f_pos = 0;
143 file->f_flags = O_RDWR;
144 file->f_op = inode->i_fop;
145 file->f_mode = FMODE_READ | FMODE_WRITE;
146 file->f_version = 0;
147 file->private_data = private_data;
148 return file;
149}
150
93unsigned long segment_base(u16 selector) 151unsigned long segment_base(u16 selector)
94{ 152{
95 struct descriptor_table gdt; 153 struct descriptor_table gdt;
@@ -126,10 +184,8 @@ static inline int valid_vcpu(int n)
126 return likely(n >= 0 && n < KVM_MAX_VCPUS); 184 return likely(n >= 0 && n < KVM_MAX_VCPUS);
127} 185}
128 186
129int kvm_read_guest(struct kvm_vcpu *vcpu, 187int kvm_read_guest(struct kvm_vcpu *vcpu, gva_t addr, unsigned long size,
130 gva_t addr, 188 void *dest)
131 unsigned long size,
132 void *dest)
133{ 189{
134 unsigned char *host_buf = dest; 190 unsigned char *host_buf = dest;
135 unsigned long req_size = size; 191 unsigned long req_size = size;
@@ -161,10 +217,8 @@ int kvm_read_guest(struct kvm_vcpu *vcpu,
161} 217}
162EXPORT_SYMBOL_GPL(kvm_read_guest); 218EXPORT_SYMBOL_GPL(kvm_read_guest);
163 219
164int kvm_write_guest(struct kvm_vcpu *vcpu, 220int kvm_write_guest(struct kvm_vcpu *vcpu, gva_t addr, unsigned long size,
165 gva_t addr, 221 void *data)
166 unsigned long size,
167 void *data)
168{ 222{
169 unsigned char *host_buf = data; 223 unsigned char *host_buf = data;
170 unsigned long req_size = size; 224 unsigned long req_size = size;
@@ -174,12 +228,15 @@ int kvm_write_guest(struct kvm_vcpu *vcpu,
174 unsigned now; 228 unsigned now;
175 unsigned offset; 229 unsigned offset;
176 hva_t guest_buf; 230 hva_t guest_buf;
231 gfn_t gfn;
177 232
178 paddr = gva_to_hpa(vcpu, addr); 233 paddr = gva_to_hpa(vcpu, addr);
179 234
180 if (is_error_hpa(paddr)) 235 if (is_error_hpa(paddr))
181 break; 236 break;
182 237
238 gfn = vcpu->mmu.gva_to_gpa(vcpu, addr) >> PAGE_SHIFT;
239 mark_page_dirty(vcpu->kvm, gfn);
183 guest_buf = (hva_t)kmap_atomic( 240 guest_buf = (hva_t)kmap_atomic(
184 pfn_to_page(paddr >> PAGE_SHIFT), KM_USER0); 241 pfn_to_page(paddr >> PAGE_SHIFT), KM_USER0);
185 offset = addr & ~PAGE_MASK; 242 offset = addr & ~PAGE_MASK;
@@ -195,24 +252,30 @@ int kvm_write_guest(struct kvm_vcpu *vcpu,
195} 252}
196EXPORT_SYMBOL_GPL(kvm_write_guest); 253EXPORT_SYMBOL_GPL(kvm_write_guest);
197 254
198static int vcpu_slot(struct kvm_vcpu *vcpu) 255/*
256 * Switches to specified vcpu, until a matching vcpu_put()
257 */
258static void vcpu_load(struct kvm_vcpu *vcpu)
199{ 259{
200 return vcpu - vcpu->kvm->vcpus; 260 mutex_lock(&vcpu->mutex);
261 kvm_arch_ops->vcpu_load(vcpu);
201} 262}
202 263
203/* 264/*
204 * Switches to specified vcpu, until a matching vcpu_put() 265 * Switches to specified vcpu, until a matching vcpu_put(). Will return NULL
266 * if the slot is not populated.
205 */ 267 */
206static struct kvm_vcpu *vcpu_load(struct kvm *kvm, int vcpu_slot) 268static struct kvm_vcpu *vcpu_load_slot(struct kvm *kvm, int slot)
207{ 269{
208 struct kvm_vcpu *vcpu = &kvm->vcpus[vcpu_slot]; 270 struct kvm_vcpu *vcpu = &kvm->vcpus[slot];
209 271
210 mutex_lock(&vcpu->mutex); 272 mutex_lock(&vcpu->mutex);
211 if (unlikely(!vcpu->vmcs)) { 273 if (!vcpu->vmcs) {
212 mutex_unlock(&vcpu->mutex); 274 mutex_unlock(&vcpu->mutex);
213 return NULL; 275 return NULL;
214 } 276 }
215 return kvm_arch_ops->vcpu_load(vcpu); 277 kvm_arch_ops->vcpu_load(vcpu);
278 return vcpu;
216} 279}
217 280
218static void vcpu_put(struct kvm_vcpu *vcpu) 281static void vcpu_put(struct kvm_vcpu *vcpu)
@@ -221,13 +284,13 @@ static void vcpu_put(struct kvm_vcpu *vcpu)
221 mutex_unlock(&vcpu->mutex); 284 mutex_unlock(&vcpu->mutex);
222} 285}
223 286
224static int kvm_dev_open(struct inode *inode, struct file *filp) 287static struct kvm *kvm_create_vm(void)
225{ 288{
226 struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); 289 struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
227 int i; 290 int i;
228 291
229 if (!kvm) 292 if (!kvm)
230 return -ENOMEM; 293 return ERR_PTR(-ENOMEM);
231 294
232 spin_lock_init(&kvm->lock); 295 spin_lock_init(&kvm->lock);
233 INIT_LIST_HEAD(&kvm->active_mmu_pages); 296 INIT_LIST_HEAD(&kvm->active_mmu_pages);
@@ -243,7 +306,11 @@ static int kvm_dev_open(struct inode *inode, struct file *filp)
243 list_add(&kvm->vm_list, &vm_list); 306 list_add(&kvm->vm_list, &vm_list);
244 spin_unlock(&kvm_lock); 307 spin_unlock(&kvm_lock);
245 } 308 }
246 filp->private_data = kvm; 309 return kvm;
310}
311
312static int kvm_dev_open(struct inode *inode, struct file *filp)
313{
247 return 0; 314 return 0;
248} 315}
249 316
@@ -281,9 +348,10 @@ static void kvm_free_physmem(struct kvm *kvm)
281 348
282static void kvm_free_vcpu(struct kvm_vcpu *vcpu) 349static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
283{ 350{
284 if (!vcpu_load(vcpu->kvm, vcpu_slot(vcpu))) 351 if (!vcpu->vmcs)
285 return; 352 return;
286 353
354 vcpu_load(vcpu);
287 kvm_mmu_destroy(vcpu); 355 kvm_mmu_destroy(vcpu);
288 vcpu_put(vcpu); 356 vcpu_put(vcpu);
289 kvm_arch_ops->vcpu_free(vcpu); 357 kvm_arch_ops->vcpu_free(vcpu);
@@ -299,14 +367,24 @@ static void kvm_free_vcpus(struct kvm *kvm)
299 367
300static int kvm_dev_release(struct inode *inode, struct file *filp) 368static int kvm_dev_release(struct inode *inode, struct file *filp)
301{ 369{
302 struct kvm *kvm = filp->private_data; 370 return 0;
371}
303 372
373static void kvm_destroy_vm(struct kvm *kvm)
374{
304 spin_lock(&kvm_lock); 375 spin_lock(&kvm_lock);
305 list_del(&kvm->vm_list); 376 list_del(&kvm->vm_list);
306 spin_unlock(&kvm_lock); 377 spin_unlock(&kvm_lock);
307 kvm_free_vcpus(kvm); 378 kvm_free_vcpus(kvm);
308 kvm_free_physmem(kvm); 379 kvm_free_physmem(kvm);
309 kfree(kvm); 380 kfree(kvm);
381}
382
383static int kvm_vm_release(struct inode *inode, struct file *filp)
384{
385 struct kvm *kvm = filp->private_data;
386
387 kvm_destroy_vm(kvm);
310 return 0; 388 return 0;
311} 389}
312 390
@@ -457,7 +535,7 @@ EXPORT_SYMBOL_GPL(set_cr4);
457void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 535void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
458{ 536{
459 if (is_long_mode(vcpu)) { 537 if (is_long_mode(vcpu)) {
460 if ( cr3 & CR3_L_MODE_RESEVED_BITS) { 538 if (cr3 & CR3_L_MODE_RESEVED_BITS) {
461 printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); 539 printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
462 inject_gp(vcpu); 540 inject_gp(vcpu);
463 return; 541 return;
@@ -533,55 +611,11 @@ void fx_init(struct kvm_vcpu *vcpu)
533} 611}
534EXPORT_SYMBOL_GPL(fx_init); 612EXPORT_SYMBOL_GPL(fx_init);
535 613
536/* 614static void do_remove_write_access(struct kvm_vcpu *vcpu, int slot)
537 * Creates some virtual cpus. Good luck creating more than one.
538 */
539static int kvm_dev_ioctl_create_vcpu(struct kvm *kvm, int n)
540{ 615{
541 int r; 616 spin_lock(&vcpu->kvm->lock);
542 struct kvm_vcpu *vcpu; 617 kvm_mmu_slot_remove_write_access(vcpu, slot);
543 618 spin_unlock(&vcpu->kvm->lock);
544 r = -EINVAL;
545 if (!valid_vcpu(n))
546 goto out;
547
548 vcpu = &kvm->vcpus[n];
549
550 mutex_lock(&vcpu->mutex);
551
552 if (vcpu->vmcs) {
553 mutex_unlock(&vcpu->mutex);
554 return -EEXIST;
555 }
556
557 vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf,
558 FX_IMAGE_ALIGN);
559 vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE;
560
561 r = kvm_arch_ops->vcpu_create(vcpu);
562 if (r < 0)
563 goto out_free_vcpus;
564
565 r = kvm_mmu_create(vcpu);
566 if (r < 0)
567 goto out_free_vcpus;
568
569 kvm_arch_ops->vcpu_load(vcpu);
570 r = kvm_mmu_setup(vcpu);
571 if (r >= 0)
572 r = kvm_arch_ops->vcpu_setup(vcpu);
573 vcpu_put(vcpu);
574
575 if (r < 0)
576 goto out_free_vcpus;
577
578 return 0;
579
580out_free_vcpus:
581 kvm_free_vcpu(vcpu);
582 mutex_unlock(&vcpu->mutex);
583out:
584 return r;
585} 619}
586 620
587/* 621/*
@@ -590,8 +624,8 @@ out:
590 * 624 *
591 * Discontiguous memory is allowed, mostly for framebuffers. 625 * Discontiguous memory is allowed, mostly for framebuffers.
592 */ 626 */
593static int kvm_dev_ioctl_set_memory_region(struct kvm *kvm, 627static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
594 struct kvm_memory_region *mem) 628 struct kvm_memory_region *mem)
595{ 629{
596 int r; 630 int r;
597 gfn_t base_gfn; 631 gfn_t base_gfn;
@@ -674,7 +708,7 @@ raced:
674 | __GFP_ZERO); 708 | __GFP_ZERO);
675 if (!new.phys_mem[i]) 709 if (!new.phys_mem[i])
676 goto out_free; 710 goto out_free;
677 new.phys_mem[i]->private = 0; 711 set_page_private(new.phys_mem[i],0);
678 } 712 }
679 } 713 }
680 714
@@ -711,9 +745,11 @@ raced:
711 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 745 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
712 struct kvm_vcpu *vcpu; 746 struct kvm_vcpu *vcpu;
713 747
714 vcpu = vcpu_load(kvm, i); 748 vcpu = vcpu_load_slot(kvm, i);
715 if (!vcpu) 749 if (!vcpu)
716 continue; 750 continue;
751 if (new.flags & KVM_MEM_LOG_DIRTY_PAGES)
752 do_remove_write_access(vcpu, mem->slot);
717 kvm_mmu_reset_context(vcpu); 753 kvm_mmu_reset_context(vcpu);
718 vcpu_put(vcpu); 754 vcpu_put(vcpu);
719 } 755 }
@@ -729,18 +765,11 @@ out:
729 return r; 765 return r;
730} 766}
731 767
732static void do_remove_write_access(struct kvm_vcpu *vcpu, int slot)
733{
734 spin_lock(&vcpu->kvm->lock);
735 kvm_mmu_slot_remove_write_access(vcpu, slot);
736 spin_unlock(&vcpu->kvm->lock);
737}
738
739/* 768/*
740 * Get (and clear) the dirty memory log for a memory slot. 769 * Get (and clear) the dirty memory log for a memory slot.
741 */ 770 */
742static int kvm_dev_ioctl_get_dirty_log(struct kvm *kvm, 771static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
743 struct kvm_dirty_log *log) 772 struct kvm_dirty_log *log)
744{ 773{
745 struct kvm_memory_slot *memslot; 774 struct kvm_memory_slot *memslot;
746 int r, i; 775 int r, i;
@@ -765,21 +794,21 @@ static int kvm_dev_ioctl_get_dirty_log(struct kvm *kvm,
765 if (!memslot->dirty_bitmap) 794 if (!memslot->dirty_bitmap)
766 goto out; 795 goto out;
767 796
768 n = ALIGN(memslot->npages, 8) / 8; 797 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
769 798
770 for (i = 0; !any && i < n; ++i) 799 for (i = 0; !any && i < n/sizeof(long); ++i)
771 any = memslot->dirty_bitmap[i]; 800 any = memslot->dirty_bitmap[i];
772 801
773 r = -EFAULT; 802 r = -EFAULT;
774 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) 803 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
775 goto out; 804 goto out;
776 805
777
778 if (any) { 806 if (any) {
779 cleared = 0; 807 cleared = 0;
780 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 808 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
781 struct kvm_vcpu *vcpu = vcpu_load(kvm, i); 809 struct kvm_vcpu *vcpu;
782 810
811 vcpu = vcpu_load_slot(kvm, i);
783 if (!vcpu) 812 if (!vcpu)
784 continue; 813 continue;
785 if (!cleared) { 814 if (!cleared) {
@@ -903,8 +932,9 @@ static int emulator_read_emulated(unsigned long addr,
903 return X86EMUL_CONTINUE; 932 return X86EMUL_CONTINUE;
904 else { 933 else {
905 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); 934 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
935
906 if (gpa == UNMAPPED_GVA) 936 if (gpa == UNMAPPED_GVA)
907 return vcpu_printf(vcpu, "not present\n"), X86EMUL_PROPAGATE_FAULT; 937 return X86EMUL_PROPAGATE_FAULT;
908 vcpu->mmio_needed = 1; 938 vcpu->mmio_needed = 1;
909 vcpu->mmio_phys_addr = gpa; 939 vcpu->mmio_phys_addr = gpa;
910 vcpu->mmio_size = bytes; 940 vcpu->mmio_size = bytes;
@@ -928,6 +958,7 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
928 return 0; 958 return 0;
929 page = gfn_to_page(m, gpa >> PAGE_SHIFT); 959 page = gfn_to_page(m, gpa >> PAGE_SHIFT);
930 kvm_mmu_pre_write(vcpu, gpa, bytes); 960 kvm_mmu_pre_write(vcpu, gpa, bytes);
961 mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT);
931 virt = kmap_atomic(page, KM_USER0); 962 virt = kmap_atomic(page, KM_USER0);
932 memcpy(virt + offset_in_page(gpa), &val, bytes); 963 memcpy(virt + offset_in_page(gpa), &val, bytes);
933 kunmap_atomic(virt, KM_USER0); 964 kunmap_atomic(virt, KM_USER0);
@@ -1142,6 +1173,42 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
1142} 1173}
1143EXPORT_SYMBOL_GPL(emulate_instruction); 1174EXPORT_SYMBOL_GPL(emulate_instruction);
1144 1175
1176int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
1177{
1178 unsigned long nr, a0, a1, a2, a3, a4, a5, ret;
1179
1180 kvm_arch_ops->decache_regs(vcpu);
1181 ret = -KVM_EINVAL;
1182#ifdef CONFIG_X86_64
1183 if (is_long_mode(vcpu)) {
1184 nr = vcpu->regs[VCPU_REGS_RAX];
1185 a0 = vcpu->regs[VCPU_REGS_RDI];
1186 a1 = vcpu->regs[VCPU_REGS_RSI];
1187 a2 = vcpu->regs[VCPU_REGS_RDX];
1188 a3 = vcpu->regs[VCPU_REGS_RCX];
1189 a4 = vcpu->regs[VCPU_REGS_R8];
1190 a5 = vcpu->regs[VCPU_REGS_R9];
1191 } else
1192#endif
1193 {
1194 nr = vcpu->regs[VCPU_REGS_RBX] & -1u;
1195 a0 = vcpu->regs[VCPU_REGS_RAX] & -1u;
1196 a1 = vcpu->regs[VCPU_REGS_RCX] & -1u;
1197 a2 = vcpu->regs[VCPU_REGS_RDX] & -1u;
1198 a3 = vcpu->regs[VCPU_REGS_RSI] & -1u;
1199 a4 = vcpu->regs[VCPU_REGS_RDI] & -1u;
1200 a5 = vcpu->regs[VCPU_REGS_RBP] & -1u;
1201 }
1202 switch (nr) {
1203 default:
1204 ;
1205 }
1206 vcpu->regs[VCPU_REGS_RAX] = ret;
1207 kvm_arch_ops->cache_regs(vcpu);
1208 return 1;
1209}
1210EXPORT_SYMBOL_GPL(kvm_hypercall);
1211
1145static u64 mk_cr_64(u64 curr_cr, u32 new_val) 1212static u64 mk_cr_64(u64 curr_cr, u32 new_val)
1146{ 1213{
1147 return (curr_cr & ~((1ULL << 32) - 1)) | new_val; 1214 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
@@ -1208,6 +1275,75 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
1208 } 1275 }
1209} 1276}
1210 1277
1278/*
1279 * Register the para guest with the host:
1280 */
1281static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa)
1282{
1283 struct kvm_vcpu_para_state *para_state;
1284 hpa_t para_state_hpa, hypercall_hpa;
1285 struct page *para_state_page;
1286 unsigned char *hypercall;
1287 gpa_t hypercall_gpa;
1288
1289 printk(KERN_DEBUG "kvm: guest trying to enter paravirtual mode\n");
1290 printk(KERN_DEBUG ".... para_state_gpa: %08Lx\n", para_state_gpa);
1291
1292 /*
1293 * Needs to be page aligned:
1294 */
1295 if (para_state_gpa != PAGE_ALIGN(para_state_gpa))
1296 goto err_gp;
1297
1298 para_state_hpa = gpa_to_hpa(vcpu, para_state_gpa);
1299 printk(KERN_DEBUG ".... para_state_hpa: %08Lx\n", para_state_hpa);
1300 if (is_error_hpa(para_state_hpa))
1301 goto err_gp;
1302
1303 mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT);
1304 para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT);
1305 para_state = kmap_atomic(para_state_page, KM_USER0);
1306
1307 printk(KERN_DEBUG ".... guest version: %d\n", para_state->guest_version);
1308 printk(KERN_DEBUG ".... size: %d\n", para_state->size);
1309
1310 para_state->host_version = KVM_PARA_API_VERSION;
1311 /*
1312 * We cannot support guests that try to register themselves
1313 * with a newer API version than the host supports:
1314 */
1315 if (para_state->guest_version > KVM_PARA_API_VERSION) {
1316 para_state->ret = -KVM_EINVAL;
1317 goto err_kunmap_skip;
1318 }
1319
1320 hypercall_gpa = para_state->hypercall_gpa;
1321 hypercall_hpa = gpa_to_hpa(vcpu, hypercall_gpa);
1322 printk(KERN_DEBUG ".... hypercall_hpa: %08Lx\n", hypercall_hpa);
1323 if (is_error_hpa(hypercall_hpa)) {
1324 para_state->ret = -KVM_EINVAL;
1325 goto err_kunmap_skip;
1326 }
1327
1328 printk(KERN_DEBUG "kvm: para guest successfully registered.\n");
1329 vcpu->para_state_page = para_state_page;
1330 vcpu->para_state_gpa = para_state_gpa;
1331 vcpu->hypercall_gpa = hypercall_gpa;
1332
1333 mark_page_dirty(vcpu->kvm, hypercall_gpa >> PAGE_SHIFT);
1334 hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT),
1335 KM_USER1) + (hypercall_hpa & ~PAGE_MASK);
1336 kvm_arch_ops->patch_hypercall(vcpu, hypercall);
1337 kunmap_atomic(hypercall, KM_USER1);
1338
1339 para_state->ret = 0;
1340err_kunmap_skip:
1341 kunmap_atomic(para_state, KM_USER0);
1342 return 0;
1343err_gp:
1344 return 1;
1345}
1346
1211int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1347int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1212{ 1348{
1213 u64 data; 1349 u64 data;
@@ -1316,6 +1452,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1316 case MSR_IA32_MISC_ENABLE: 1452 case MSR_IA32_MISC_ENABLE:
1317 vcpu->ia32_misc_enable_msr = data; 1453 vcpu->ia32_misc_enable_msr = data;
1318 break; 1454 break;
1455 /*
1456 * This is the 'probe whether the host is KVM' logic:
1457 */
1458 case MSR_KVM_API_MAGIC:
1459 return vcpu_register_para(vcpu, data);
1460
1319 default: 1461 default:
1320 printk(KERN_ERR "kvm: unhandled wrmsr: 0x%x\n", msr); 1462 printk(KERN_ERR "kvm: unhandled wrmsr: 0x%x\n", msr);
1321 return 1; 1463 return 1;
@@ -1338,8 +1480,7 @@ void kvm_resched(struct kvm_vcpu *vcpu)
1338{ 1480{
1339 vcpu_put(vcpu); 1481 vcpu_put(vcpu);
1340 cond_resched(); 1482 cond_resched();
1341 /* Cannot fail - no vcpu unplug yet. */ 1483 vcpu_load(vcpu);
1342 vcpu_load(vcpu->kvm, vcpu_slot(vcpu));
1343} 1484}
1344EXPORT_SYMBOL_GPL(kvm_resched); 1485EXPORT_SYMBOL_GPL(kvm_resched);
1345 1486
@@ -1361,17 +1502,11 @@ void save_msrs(struct vmx_msr_entry *e, int n)
1361} 1502}
1362EXPORT_SYMBOL_GPL(save_msrs); 1503EXPORT_SYMBOL_GPL(save_msrs);
1363 1504
1364static int kvm_dev_ioctl_run(struct kvm *kvm, struct kvm_run *kvm_run) 1505static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1365{ 1506{
1366 struct kvm_vcpu *vcpu;
1367 int r; 1507 int r;
1368 1508
1369 if (!valid_vcpu(kvm_run->vcpu)) 1509 vcpu_load(vcpu);
1370 return -EINVAL;
1371
1372 vcpu = vcpu_load(kvm, kvm_run->vcpu);
1373 if (!vcpu)
1374 return -ENOENT;
1375 1510
1376 /* re-sync apic's tpr */ 1511 /* re-sync apic's tpr */
1377 vcpu->cr8 = kvm_run->cr8; 1512 vcpu->cr8 = kvm_run->cr8;
@@ -1394,16 +1529,10 @@ static int kvm_dev_ioctl_run(struct kvm *kvm, struct kvm_run *kvm_run)
1394 return r; 1529 return r;
1395} 1530}
1396 1531
1397static int kvm_dev_ioctl_get_regs(struct kvm *kvm, struct kvm_regs *regs) 1532static int kvm_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu,
1533 struct kvm_regs *regs)
1398{ 1534{
1399 struct kvm_vcpu *vcpu; 1535 vcpu_load(vcpu);
1400
1401 if (!valid_vcpu(regs->vcpu))
1402 return -EINVAL;
1403
1404 vcpu = vcpu_load(kvm, regs->vcpu);
1405 if (!vcpu)
1406 return -ENOENT;
1407 1536
1408 kvm_arch_ops->cache_regs(vcpu); 1537 kvm_arch_ops->cache_regs(vcpu);
1409 1538
@@ -1440,16 +1569,10 @@ static int kvm_dev_ioctl_get_regs(struct kvm *kvm, struct kvm_regs *regs)
1440 return 0; 1569 return 0;
1441} 1570}
1442 1571
1443static int kvm_dev_ioctl_set_regs(struct kvm *kvm, struct kvm_regs *regs) 1572static int kvm_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu,
1573 struct kvm_regs *regs)
1444{ 1574{
1445 struct kvm_vcpu *vcpu; 1575 vcpu_load(vcpu);
1446
1447 if (!valid_vcpu(regs->vcpu))
1448 return -EINVAL;
1449
1450 vcpu = vcpu_load(kvm, regs->vcpu);
1451 if (!vcpu)
1452 return -ENOENT;
1453 1576
1454 vcpu->regs[VCPU_REGS_RAX] = regs->rax; 1577 vcpu->regs[VCPU_REGS_RAX] = regs->rax;
1455 vcpu->regs[VCPU_REGS_RBX] = regs->rbx; 1578 vcpu->regs[VCPU_REGS_RBX] = regs->rbx;
@@ -1486,16 +1609,12 @@ static void get_segment(struct kvm_vcpu *vcpu,
1486 return kvm_arch_ops->get_segment(vcpu, var, seg); 1609 return kvm_arch_ops->get_segment(vcpu, var, seg);
1487} 1610}
1488 1611
1489static int kvm_dev_ioctl_get_sregs(struct kvm *kvm, struct kvm_sregs *sregs) 1612static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1613 struct kvm_sregs *sregs)
1490{ 1614{
1491 struct kvm_vcpu *vcpu;
1492 struct descriptor_table dt; 1615 struct descriptor_table dt;
1493 1616
1494 if (!valid_vcpu(sregs->vcpu)) 1617 vcpu_load(vcpu);
1495 return -EINVAL;
1496 vcpu = vcpu_load(kvm, sregs->vcpu);
1497 if (!vcpu)
1498 return -ENOENT;
1499 1618
1500 get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); 1619 get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
1501 get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); 1620 get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
@@ -1537,18 +1656,14 @@ static void set_segment(struct kvm_vcpu *vcpu,
1537 return kvm_arch_ops->set_segment(vcpu, var, seg); 1656 return kvm_arch_ops->set_segment(vcpu, var, seg);
1538} 1657}
1539 1658
1540static int kvm_dev_ioctl_set_sregs(struct kvm *kvm, struct kvm_sregs *sregs) 1659static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1660 struct kvm_sregs *sregs)
1541{ 1661{
1542 struct kvm_vcpu *vcpu;
1543 int mmu_reset_needed = 0; 1662 int mmu_reset_needed = 0;
1544 int i; 1663 int i;
1545 struct descriptor_table dt; 1664 struct descriptor_table dt;
1546 1665
1547 if (!valid_vcpu(sregs->vcpu)) 1666 vcpu_load(vcpu);
1548 return -EINVAL;
1549 vcpu = vcpu_load(kvm, sregs->vcpu);
1550 if (!vcpu)
1551 return -ENOENT;
1552 1667
1553 set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); 1668 set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
1554 set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); 1669 set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
@@ -1654,20 +1769,14 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1654 * 1769 *
1655 * @return number of msrs set successfully. 1770 * @return number of msrs set successfully.
1656 */ 1771 */
1657static int __msr_io(struct kvm *kvm, struct kvm_msrs *msrs, 1772static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
1658 struct kvm_msr_entry *entries, 1773 struct kvm_msr_entry *entries,
1659 int (*do_msr)(struct kvm_vcpu *vcpu, 1774 int (*do_msr)(struct kvm_vcpu *vcpu,
1660 unsigned index, u64 *data)) 1775 unsigned index, u64 *data))
1661{ 1776{
1662 struct kvm_vcpu *vcpu;
1663 int i; 1777 int i;
1664 1778
1665 if (!valid_vcpu(msrs->vcpu)) 1779 vcpu_load(vcpu);
1666 return -EINVAL;
1667
1668 vcpu = vcpu_load(kvm, msrs->vcpu);
1669 if (!vcpu)
1670 return -ENOENT;
1671 1780
1672 for (i = 0; i < msrs->nmsrs; ++i) 1781 for (i = 0; i < msrs->nmsrs; ++i)
1673 if (do_msr(vcpu, entries[i].index, &entries[i].data)) 1782 if (do_msr(vcpu, entries[i].index, &entries[i].data))
@@ -1683,7 +1792,7 @@ static int __msr_io(struct kvm *kvm, struct kvm_msrs *msrs,
1683 * 1792 *
1684 * @return number of msrs set successfully. 1793 * @return number of msrs set successfully.
1685 */ 1794 */
1686static int msr_io(struct kvm *kvm, struct kvm_msrs __user *user_msrs, 1795static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
1687 int (*do_msr)(struct kvm_vcpu *vcpu, 1796 int (*do_msr)(struct kvm_vcpu *vcpu,
1688 unsigned index, u64 *data), 1797 unsigned index, u64 *data),
1689 int writeback) 1798 int writeback)
@@ -1711,7 +1820,7 @@ static int msr_io(struct kvm *kvm, struct kvm_msrs __user *user_msrs,
1711 if (copy_from_user(entries, user_msrs->entries, size)) 1820 if (copy_from_user(entries, user_msrs->entries, size))
1712 goto out_free; 1821 goto out_free;
1713 1822
1714 r = n = __msr_io(kvm, &msrs, entries, do_msr); 1823 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
1715 if (r < 0) 1824 if (r < 0)
1716 goto out_free; 1825 goto out_free;
1717 1826
@@ -1730,38 +1839,31 @@ out:
1730/* 1839/*
1731 * Translate a guest virtual address to a guest physical address. 1840 * Translate a guest virtual address to a guest physical address.
1732 */ 1841 */
1733static int kvm_dev_ioctl_translate(struct kvm *kvm, struct kvm_translation *tr) 1842static int kvm_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1843 struct kvm_translation *tr)
1734{ 1844{
1735 unsigned long vaddr = tr->linear_address; 1845 unsigned long vaddr = tr->linear_address;
1736 struct kvm_vcpu *vcpu;
1737 gpa_t gpa; 1846 gpa_t gpa;
1738 1847
1739 vcpu = vcpu_load(kvm, tr->vcpu); 1848 vcpu_load(vcpu);
1740 if (!vcpu) 1849 spin_lock(&vcpu->kvm->lock);
1741 return -ENOENT;
1742 spin_lock(&kvm->lock);
1743 gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr); 1850 gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr);
1744 tr->physical_address = gpa; 1851 tr->physical_address = gpa;
1745 tr->valid = gpa != UNMAPPED_GVA; 1852 tr->valid = gpa != UNMAPPED_GVA;
1746 tr->writeable = 1; 1853 tr->writeable = 1;
1747 tr->usermode = 0; 1854 tr->usermode = 0;
1748 spin_unlock(&kvm->lock); 1855 spin_unlock(&vcpu->kvm->lock);
1749 vcpu_put(vcpu); 1856 vcpu_put(vcpu);
1750 1857
1751 return 0; 1858 return 0;
1752} 1859}
1753 1860
1754static int kvm_dev_ioctl_interrupt(struct kvm *kvm, struct kvm_interrupt *irq) 1861static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
1862 struct kvm_interrupt *irq)
1755{ 1863{
1756 struct kvm_vcpu *vcpu;
1757
1758 if (!valid_vcpu(irq->vcpu))
1759 return -EINVAL;
1760 if (irq->irq < 0 || irq->irq >= 256) 1864 if (irq->irq < 0 || irq->irq >= 256)
1761 return -EINVAL; 1865 return -EINVAL;
1762 vcpu = vcpu_load(kvm, irq->vcpu); 1866 vcpu_load(vcpu);
1763 if (!vcpu)
1764 return -ENOENT;
1765 1867
1766 set_bit(irq->irq, vcpu->irq_pending); 1868 set_bit(irq->irq, vcpu->irq_pending);
1767 set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary); 1869 set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary);
@@ -1771,17 +1873,12 @@ static int kvm_dev_ioctl_interrupt(struct kvm *kvm, struct kvm_interrupt *irq)
1771 return 0; 1873 return 0;
1772} 1874}
1773 1875
1774static int kvm_dev_ioctl_debug_guest(struct kvm *kvm, 1876static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
1775 struct kvm_debug_guest *dbg) 1877 struct kvm_debug_guest *dbg)
1776{ 1878{
1777 struct kvm_vcpu *vcpu;
1778 int r; 1879 int r;
1779 1880
1780 if (!valid_vcpu(dbg->vcpu)) 1881 vcpu_load(vcpu);
1781 return -EINVAL;
1782 vcpu = vcpu_load(kvm, dbg->vcpu);
1783 if (!vcpu)
1784 return -ENOENT;
1785 1882
1786 r = kvm_arch_ops->set_guest_debug(vcpu, dbg); 1883 r = kvm_arch_ops->set_guest_debug(vcpu, dbg);
1787 1884
@@ -1790,30 +1887,129 @@ static int kvm_dev_ioctl_debug_guest(struct kvm *kvm,
1790 return r; 1887 return r;
1791} 1888}
1792 1889
1793static long kvm_dev_ioctl(struct file *filp, 1890static int kvm_vcpu_release(struct inode *inode, struct file *filp)
1794 unsigned int ioctl, unsigned long arg)
1795{ 1891{
1796 struct kvm *kvm = filp->private_data; 1892 struct kvm_vcpu *vcpu = filp->private_data;
1893
1894 fput(vcpu->kvm->filp);
1895 return 0;
1896}
1897
1898static struct file_operations kvm_vcpu_fops = {
1899 .release = kvm_vcpu_release,
1900 .unlocked_ioctl = kvm_vcpu_ioctl,
1901 .compat_ioctl = kvm_vcpu_ioctl,
1902};
1903
1904/*
1905 * Allocates an inode for the vcpu.
1906 */
1907static int create_vcpu_fd(struct kvm_vcpu *vcpu)
1908{
1909 int fd, r;
1910 struct inode *inode;
1911 struct file *file;
1912
1913 atomic_inc(&vcpu->kvm->filp->f_count);
1914 inode = kvmfs_inode(&kvm_vcpu_fops);
1915 if (IS_ERR(inode)) {
1916 r = PTR_ERR(inode);
1917 goto out1;
1918 }
1919
1920 file = kvmfs_file(inode, vcpu);
1921 if (IS_ERR(file)) {
1922 r = PTR_ERR(file);
1923 goto out2;
1924 }
1925
1926 r = get_unused_fd();
1927 if (r < 0)
1928 goto out3;
1929 fd = r;
1930 fd_install(fd, file);
1931
1932 return fd;
1933
1934out3:
1935 fput(file);
1936out2:
1937 iput(inode);
1938out1:
1939 fput(vcpu->kvm->filp);
1940 return r;
1941}
1942
1943/*
1944 * Creates some virtual cpus. Good luck creating more than one.
1945 */
1946static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
1947{
1948 int r;
1949 struct kvm_vcpu *vcpu;
1950
1951 r = -EINVAL;
1952 if (!valid_vcpu(n))
1953 goto out;
1954
1955 vcpu = &kvm->vcpus[n];
1956
1957 mutex_lock(&vcpu->mutex);
1958
1959 if (vcpu->vmcs) {
1960 mutex_unlock(&vcpu->mutex);
1961 return -EEXIST;
1962 }
1963
1964 vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf,
1965 FX_IMAGE_ALIGN);
1966 vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE;
1967
1968 r = kvm_arch_ops->vcpu_create(vcpu);
1969 if (r < 0)
1970 goto out_free_vcpus;
1971
1972 r = kvm_mmu_create(vcpu);
1973 if (r < 0)
1974 goto out_free_vcpus;
1975
1976 kvm_arch_ops->vcpu_load(vcpu);
1977 r = kvm_mmu_setup(vcpu);
1978 if (r >= 0)
1979 r = kvm_arch_ops->vcpu_setup(vcpu);
1980 vcpu_put(vcpu);
1981
1982 if (r < 0)
1983 goto out_free_vcpus;
1984
1985 r = create_vcpu_fd(vcpu);
1986 if (r < 0)
1987 goto out_free_vcpus;
1988
1989 return r;
1990
1991out_free_vcpus:
1992 kvm_free_vcpu(vcpu);
1993 mutex_unlock(&vcpu->mutex);
1994out:
1995 return r;
1996}
1997
1998static long kvm_vcpu_ioctl(struct file *filp,
1999 unsigned int ioctl, unsigned long arg)
2000{
2001 struct kvm_vcpu *vcpu = filp->private_data;
1797 void __user *argp = (void __user *)arg; 2002 void __user *argp = (void __user *)arg;
1798 int r = -EINVAL; 2003 int r = -EINVAL;
1799 2004
1800 switch (ioctl) { 2005 switch (ioctl) {
1801 case KVM_GET_API_VERSION:
1802 r = KVM_API_VERSION;
1803 break;
1804 case KVM_CREATE_VCPU: {
1805 r = kvm_dev_ioctl_create_vcpu(kvm, arg);
1806 if (r)
1807 goto out;
1808 break;
1809 }
1810 case KVM_RUN: { 2006 case KVM_RUN: {
1811 struct kvm_run kvm_run; 2007 struct kvm_run kvm_run;
1812 2008
1813 r = -EFAULT; 2009 r = -EFAULT;
1814 if (copy_from_user(&kvm_run, argp, sizeof kvm_run)) 2010 if (copy_from_user(&kvm_run, argp, sizeof kvm_run))
1815 goto out; 2011 goto out;
1816 r = kvm_dev_ioctl_run(kvm, &kvm_run); 2012 r = kvm_vcpu_ioctl_run(vcpu, &kvm_run);
1817 if (r < 0 && r != -EINTR) 2013 if (r < 0 && r != -EINTR)
1818 goto out; 2014 goto out;
1819 if (copy_to_user(argp, &kvm_run, sizeof kvm_run)) { 2015 if (copy_to_user(argp, &kvm_run, sizeof kvm_run)) {
@@ -1825,10 +2021,8 @@ static long kvm_dev_ioctl(struct file *filp,
1825 case KVM_GET_REGS: { 2021 case KVM_GET_REGS: {
1826 struct kvm_regs kvm_regs; 2022 struct kvm_regs kvm_regs;
1827 2023
1828 r = -EFAULT; 2024 memset(&kvm_regs, 0, sizeof kvm_regs);
1829 if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs)) 2025 r = kvm_vcpu_ioctl_get_regs(vcpu, &kvm_regs);
1830 goto out;
1831 r = kvm_dev_ioctl_get_regs(kvm, &kvm_regs);
1832 if (r) 2026 if (r)
1833 goto out; 2027 goto out;
1834 r = -EFAULT; 2028 r = -EFAULT;
@@ -1843,7 +2037,7 @@ static long kvm_dev_ioctl(struct file *filp,
1843 r = -EFAULT; 2037 r = -EFAULT;
1844 if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs)) 2038 if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs))
1845 goto out; 2039 goto out;
1846 r = kvm_dev_ioctl_set_regs(kvm, &kvm_regs); 2040 r = kvm_vcpu_ioctl_set_regs(vcpu, &kvm_regs);
1847 if (r) 2041 if (r)
1848 goto out; 2042 goto out;
1849 r = 0; 2043 r = 0;
@@ -1852,10 +2046,8 @@ static long kvm_dev_ioctl(struct file *filp,
1852 case KVM_GET_SREGS: { 2046 case KVM_GET_SREGS: {
1853 struct kvm_sregs kvm_sregs; 2047 struct kvm_sregs kvm_sregs;
1854 2048
1855 r = -EFAULT; 2049 memset(&kvm_sregs, 0, sizeof kvm_sregs);
1856 if (copy_from_user(&kvm_sregs, argp, sizeof kvm_sregs)) 2050 r = kvm_vcpu_ioctl_get_sregs(vcpu, &kvm_sregs);
1857 goto out;
1858 r = kvm_dev_ioctl_get_sregs(kvm, &kvm_sregs);
1859 if (r) 2051 if (r)
1860 goto out; 2052 goto out;
1861 r = -EFAULT; 2053 r = -EFAULT;
@@ -1870,7 +2062,7 @@ static long kvm_dev_ioctl(struct file *filp,
1870 r = -EFAULT; 2062 r = -EFAULT;
1871 if (copy_from_user(&kvm_sregs, argp, sizeof kvm_sregs)) 2063 if (copy_from_user(&kvm_sregs, argp, sizeof kvm_sregs))
1872 goto out; 2064 goto out;
1873 r = kvm_dev_ioctl_set_sregs(kvm, &kvm_sregs); 2065 r = kvm_vcpu_ioctl_set_sregs(vcpu, &kvm_sregs);
1874 if (r) 2066 if (r)
1875 goto out; 2067 goto out;
1876 r = 0; 2068 r = 0;
@@ -1882,7 +2074,7 @@ static long kvm_dev_ioctl(struct file *filp,
1882 r = -EFAULT; 2074 r = -EFAULT;
1883 if (copy_from_user(&tr, argp, sizeof tr)) 2075 if (copy_from_user(&tr, argp, sizeof tr))
1884 goto out; 2076 goto out;
1885 r = kvm_dev_ioctl_translate(kvm, &tr); 2077 r = kvm_vcpu_ioctl_translate(vcpu, &tr);
1886 if (r) 2078 if (r)
1887 goto out; 2079 goto out;
1888 r = -EFAULT; 2080 r = -EFAULT;
@@ -1897,7 +2089,7 @@ static long kvm_dev_ioctl(struct file *filp,
1897 r = -EFAULT; 2089 r = -EFAULT;
1898 if (copy_from_user(&irq, argp, sizeof irq)) 2090 if (copy_from_user(&irq, argp, sizeof irq))
1899 goto out; 2091 goto out;
1900 r = kvm_dev_ioctl_interrupt(kvm, &irq); 2092 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
1901 if (r) 2093 if (r)
1902 goto out; 2094 goto out;
1903 r = 0; 2095 r = 0;
@@ -1909,19 +2101,45 @@ static long kvm_dev_ioctl(struct file *filp,
1909 r = -EFAULT; 2101 r = -EFAULT;
1910 if (copy_from_user(&dbg, argp, sizeof dbg)) 2102 if (copy_from_user(&dbg, argp, sizeof dbg))
1911 goto out; 2103 goto out;
1912 r = kvm_dev_ioctl_debug_guest(kvm, &dbg); 2104 r = kvm_vcpu_ioctl_debug_guest(vcpu, &dbg);
1913 if (r) 2105 if (r)
1914 goto out; 2106 goto out;
1915 r = 0; 2107 r = 0;
1916 break; 2108 break;
1917 } 2109 }
2110 case KVM_GET_MSRS:
2111 r = msr_io(vcpu, argp, get_msr, 1);
2112 break;
2113 case KVM_SET_MSRS:
2114 r = msr_io(vcpu, argp, do_set_msr, 0);
2115 break;
2116 default:
2117 ;
2118 }
2119out:
2120 return r;
2121}
2122
2123static long kvm_vm_ioctl(struct file *filp,
2124 unsigned int ioctl, unsigned long arg)
2125{
2126 struct kvm *kvm = filp->private_data;
2127 void __user *argp = (void __user *)arg;
2128 int r = -EINVAL;
2129
2130 switch (ioctl) {
2131 case KVM_CREATE_VCPU:
2132 r = kvm_vm_ioctl_create_vcpu(kvm, arg);
2133 if (r < 0)
2134 goto out;
2135 break;
1918 case KVM_SET_MEMORY_REGION: { 2136 case KVM_SET_MEMORY_REGION: {
1919 struct kvm_memory_region kvm_mem; 2137 struct kvm_memory_region kvm_mem;
1920 2138
1921 r = -EFAULT; 2139 r = -EFAULT;
1922 if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) 2140 if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
1923 goto out; 2141 goto out;
1924 r = kvm_dev_ioctl_set_memory_region(kvm, &kvm_mem); 2142 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_mem);
1925 if (r) 2143 if (r)
1926 goto out; 2144 goto out;
1927 break; 2145 break;
@@ -1932,16 +2150,112 @@ static long kvm_dev_ioctl(struct file *filp,
1932 r = -EFAULT; 2150 r = -EFAULT;
1933 if (copy_from_user(&log, argp, sizeof log)) 2151 if (copy_from_user(&log, argp, sizeof log))
1934 goto out; 2152 goto out;
1935 r = kvm_dev_ioctl_get_dirty_log(kvm, &log); 2153 r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
1936 if (r) 2154 if (r)
1937 goto out; 2155 goto out;
1938 break; 2156 break;
1939 } 2157 }
1940 case KVM_GET_MSRS: 2158 default:
1941 r = msr_io(kvm, argp, get_msr, 1); 2159 ;
2160 }
2161out:
2162 return r;
2163}
2164
2165static struct page *kvm_vm_nopage(struct vm_area_struct *vma,
2166 unsigned long address,
2167 int *type)
2168{
2169 struct kvm *kvm = vma->vm_file->private_data;
2170 unsigned long pgoff;
2171 struct kvm_memory_slot *slot;
2172 struct page *page;
2173
2174 *type = VM_FAULT_MINOR;
2175 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
2176 slot = gfn_to_memslot(kvm, pgoff);
2177 if (!slot)
2178 return NOPAGE_SIGBUS;
2179 page = gfn_to_page(slot, pgoff);
2180 if (!page)
2181 return NOPAGE_SIGBUS;
2182 get_page(page);
2183 return page;
2184}
2185
2186static struct vm_operations_struct kvm_vm_vm_ops = {
2187 .nopage = kvm_vm_nopage,
2188};
2189
2190static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
2191{
2192 vma->vm_ops = &kvm_vm_vm_ops;
2193 return 0;
2194}
2195
2196static struct file_operations kvm_vm_fops = {
2197 .release = kvm_vm_release,
2198 .unlocked_ioctl = kvm_vm_ioctl,
2199 .compat_ioctl = kvm_vm_ioctl,
2200 .mmap = kvm_vm_mmap,
2201};
2202
2203static int kvm_dev_ioctl_create_vm(void)
2204{
2205 int fd, r;
2206 struct inode *inode;
2207 struct file *file;
2208 struct kvm *kvm;
2209
2210 inode = kvmfs_inode(&kvm_vm_fops);
2211 if (IS_ERR(inode)) {
2212 r = PTR_ERR(inode);
2213 goto out1;
2214 }
2215
2216 kvm = kvm_create_vm();
2217 if (IS_ERR(kvm)) {
2218 r = PTR_ERR(kvm);
2219 goto out2;
2220 }
2221
2222 file = kvmfs_file(inode, kvm);
2223 if (IS_ERR(file)) {
2224 r = PTR_ERR(file);
2225 goto out3;
2226 }
2227 kvm->filp = file;
2228
2229 r = get_unused_fd();
2230 if (r < 0)
2231 goto out4;
2232 fd = r;
2233 fd_install(fd, file);
2234
2235 return fd;
2236
2237out4:
2238 fput(file);
2239out3:
2240 kvm_destroy_vm(kvm);
2241out2:
2242 iput(inode);
2243out1:
2244 return r;
2245}
2246
2247static long kvm_dev_ioctl(struct file *filp,
2248 unsigned int ioctl, unsigned long arg)
2249{
2250 void __user *argp = (void __user *)arg;
2251 int r = -EINVAL;
2252
2253 switch (ioctl) {
2254 case KVM_GET_API_VERSION:
2255 r = KVM_API_VERSION;
1942 break; 2256 break;
1943 case KVM_SET_MSRS: 2257 case KVM_CREATE_VM:
1944 r = msr_io(kvm, argp, do_set_msr, 0); 2258 r = kvm_dev_ioctl_create_vm();
1945 break; 2259 break;
1946 case KVM_GET_MSR_INDEX_LIST: { 2260 case KVM_GET_MSR_INDEX_LIST: {
1947 struct kvm_msr_list __user *user_msr_list = argp; 2261 struct kvm_msr_list __user *user_msr_list = argp;
@@ -1977,43 +2291,11 @@ out:
1977 return r; 2291 return r;
1978} 2292}
1979 2293
1980static struct page *kvm_dev_nopage(struct vm_area_struct *vma,
1981 unsigned long address,
1982 int *type)
1983{
1984 struct kvm *kvm = vma->vm_file->private_data;
1985 unsigned long pgoff;
1986 struct kvm_memory_slot *slot;
1987 struct page *page;
1988
1989 *type = VM_FAULT_MINOR;
1990 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
1991 slot = gfn_to_memslot(kvm, pgoff);
1992 if (!slot)
1993 return NOPAGE_SIGBUS;
1994 page = gfn_to_page(slot, pgoff);
1995 if (!page)
1996 return NOPAGE_SIGBUS;
1997 get_page(page);
1998 return page;
1999}
2000
2001static struct vm_operations_struct kvm_dev_vm_ops = {
2002 .nopage = kvm_dev_nopage,
2003};
2004
2005static int kvm_dev_mmap(struct file *file, struct vm_area_struct *vma)
2006{
2007 vma->vm_ops = &kvm_dev_vm_ops;
2008 return 0;
2009}
2010
2011static struct file_operations kvm_chardev_ops = { 2294static struct file_operations kvm_chardev_ops = {
2012 .open = kvm_dev_open, 2295 .open = kvm_dev_open,
2013 .release = kvm_dev_release, 2296 .release = kvm_dev_release,
2014 .unlocked_ioctl = kvm_dev_ioctl, 2297 .unlocked_ioctl = kvm_dev_ioctl,
2015 .compat_ioctl = kvm_dev_ioctl, 2298 .compat_ioctl = kvm_dev_ioctl,
2016 .mmap = kvm_dev_mmap,
2017}; 2299};
2018 2300
2019static struct miscdevice kvm_dev = { 2301static struct miscdevice kvm_dev = {
@@ -2080,13 +2362,17 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
2080 int cpu = (long)v; 2362 int cpu = (long)v;
2081 2363
2082 switch (val) { 2364 switch (val) {
2083 case CPU_DEAD: 2365 case CPU_DOWN_PREPARE:
2084 case CPU_UP_CANCELED: 2366 case CPU_UP_CANCELED:
2367 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
2368 cpu);
2085 decache_vcpus_on_cpu(cpu); 2369 decache_vcpus_on_cpu(cpu);
2086 smp_call_function_single(cpu, kvm_arch_ops->hardware_disable, 2370 smp_call_function_single(cpu, kvm_arch_ops->hardware_disable,
2087 NULL, 0, 1); 2371 NULL, 0, 1);
2088 break; 2372 break;
2089 case CPU_UP_PREPARE: 2373 case CPU_ONLINE:
2374 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
2375 cpu);
2090 smp_call_function_single(cpu, kvm_arch_ops->hardware_enable, 2376 smp_call_function_single(cpu, kvm_arch_ops->hardware_enable,
2091 NULL, 0, 1); 2377 NULL, 0, 1);
2092 break; 2378 break;
@@ -2121,13 +2407,13 @@ static void kvm_exit_debug(void)
2121static int kvm_suspend(struct sys_device *dev, pm_message_t state) 2407static int kvm_suspend(struct sys_device *dev, pm_message_t state)
2122{ 2408{
2123 decache_vcpus_on_cpu(raw_smp_processor_id()); 2409 decache_vcpus_on_cpu(raw_smp_processor_id());
2124 on_each_cpu(kvm_arch_ops->hardware_disable, 0, 0, 1); 2410 on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
2125 return 0; 2411 return 0;
2126} 2412}
2127 2413
2128static int kvm_resume(struct sys_device *dev) 2414static int kvm_resume(struct sys_device *dev)
2129{ 2415{
2130 on_each_cpu(kvm_arch_ops->hardware_enable, 0, 0, 1); 2416 on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1);
2131 return 0; 2417 return 0;
2132} 2418}
2133 2419
@@ -2144,6 +2430,18 @@ static struct sys_device kvm_sysdev = {
2144 2430
2145hpa_t bad_page_address; 2431hpa_t bad_page_address;
2146 2432
2433static int kvmfs_get_sb(struct file_system_type *fs_type, int flags,
2434 const char *dev_name, void *data, struct vfsmount *mnt)
2435{
2436 return get_sb_pseudo(fs_type, "kvm:", NULL, KVMFS_SUPER_MAGIC, mnt);
2437}
2438
2439static struct file_system_type kvm_fs_type = {
2440 .name = "kvmfs",
2441 .get_sb = kvmfs_get_sb,
2442 .kill_sb = kill_anon_super,
2443};
2444
2147int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) 2445int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
2148{ 2446{
2149 int r; 2447 int r;
@@ -2220,8 +2518,16 @@ void kvm_exit_arch(void)
2220static __init int kvm_init(void) 2518static __init int kvm_init(void)
2221{ 2519{
2222 static struct page *bad_page; 2520 static struct page *bad_page;
2223 int r = 0; 2521 int r;
2522
2523 r = register_filesystem(&kvm_fs_type);
2524 if (r)
2525 goto out3;
2224 2526
2527 kvmfs_mnt = kern_mount(&kvm_fs_type);
2528 r = PTR_ERR(kvmfs_mnt);
2529 if (IS_ERR(kvmfs_mnt))
2530 goto out2;
2225 kvm_init_debug(); 2531 kvm_init_debug();
2226 2532
2227 kvm_init_msr_list(); 2533 kvm_init_msr_list();
@@ -2234,10 +2540,14 @@ static __init int kvm_init(void)
2234 bad_page_address = page_to_pfn(bad_page) << PAGE_SHIFT; 2540 bad_page_address = page_to_pfn(bad_page) << PAGE_SHIFT;
2235 memset(__va(bad_page_address), 0, PAGE_SIZE); 2541 memset(__va(bad_page_address), 0, PAGE_SIZE);
2236 2542
2237 return r; 2543 return 0;
2238 2544
2239out: 2545out:
2240 kvm_exit_debug(); 2546 kvm_exit_debug();
2547 mntput(kvmfs_mnt);
2548out2:
2549 unregister_filesystem(&kvm_fs_type);
2550out3:
2241 return r; 2551 return r;
2242} 2552}
2243 2553
@@ -2245,6 +2555,8 @@ static __exit void kvm_exit(void)
2245{ 2555{
2246 kvm_exit_debug(); 2556 kvm_exit_debug();
2247 __free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT)); 2557 __free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT));
2558 mntput(kvmfs_mnt);
2559 unregister_filesystem(&kvm_fs_type);
2248} 2560}
2249 2561
2250module_init(kvm_init) 2562module_init(kvm_init)
diff --git a/drivers/kvm/kvm_svm.h b/drivers/kvm/kvm_svm.h
index 74cc862f4935..624f1ca48657 100644
--- a/drivers/kvm/kvm_svm.h
+++ b/drivers/kvm/kvm_svm.h
@@ -1,6 +1,7 @@
1#ifndef __KVM_SVM_H 1#ifndef __KVM_SVM_H
2#define __KVM_SVM_H 2#define __KVM_SVM_H
3 3
4#include <linux/kernel.h>
4#include <linux/types.h> 5#include <linux/types.h>
5#include <linux/list.h> 6#include <linux/list.h>
6#include <asm/msr.h> 7#include <asm/msr.h>
@@ -18,7 +19,7 @@ static const u32 host_save_msrs[] = {
18 MSR_IA32_LASTBRANCHTOIP, MSR_IA32_LASTINTFROMIP,MSR_IA32_LASTINTTOIP,*/ 19 MSR_IA32_LASTBRANCHTOIP, MSR_IA32_LASTINTFROMIP,MSR_IA32_LASTINTTOIP,*/
19}; 20};
20 21
21#define NR_HOST_SAVE_MSRS (sizeof(host_save_msrs) / sizeof(*host_save_msrs)) 22#define NR_HOST_SAVE_MSRS ARRAY_SIZE(host_save_msrs)
22#define NUM_DB_REGS 4 23#define NUM_DB_REGS 4
23 24
24struct vcpu_svm { 25struct vcpu_svm {
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index be793770f31b..a1a93368f314 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -298,18 +298,18 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte)
298 if (!is_rmap_pte(*spte)) 298 if (!is_rmap_pte(*spte))
299 return; 299 return;
300 page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); 300 page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
301 if (!page->private) { 301 if (!page_private(page)) {
302 rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); 302 rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
303 page->private = (unsigned long)spte; 303 set_page_private(page,(unsigned long)spte);
304 } else if (!(page->private & 1)) { 304 } else if (!(page_private(page) & 1)) {
305 rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte); 305 rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte);
306 desc = mmu_alloc_rmap_desc(vcpu); 306 desc = mmu_alloc_rmap_desc(vcpu);
307 desc->shadow_ptes[0] = (u64 *)page->private; 307 desc->shadow_ptes[0] = (u64 *)page_private(page);
308 desc->shadow_ptes[1] = spte; 308 desc->shadow_ptes[1] = spte;
309 page->private = (unsigned long)desc | 1; 309 set_page_private(page,(unsigned long)desc | 1);
310 } else { 310 } else {
311 rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); 311 rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
312 desc = (struct kvm_rmap_desc *)(page->private & ~1ul); 312 desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
313 while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) 313 while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)
314 desc = desc->more; 314 desc = desc->more;
315 if (desc->shadow_ptes[RMAP_EXT-1]) { 315 if (desc->shadow_ptes[RMAP_EXT-1]) {
@@ -337,12 +337,12 @@ static void rmap_desc_remove_entry(struct kvm_vcpu *vcpu,
337 if (j != 0) 337 if (j != 0)
338 return; 338 return;
339 if (!prev_desc && !desc->more) 339 if (!prev_desc && !desc->more)
340 page->private = (unsigned long)desc->shadow_ptes[0]; 340 set_page_private(page,(unsigned long)desc->shadow_ptes[0]);
341 else 341 else
342 if (prev_desc) 342 if (prev_desc)
343 prev_desc->more = desc->more; 343 prev_desc->more = desc->more;
344 else 344 else
345 page->private = (unsigned long)desc->more | 1; 345 set_page_private(page,(unsigned long)desc->more | 1);
346 mmu_free_rmap_desc(vcpu, desc); 346 mmu_free_rmap_desc(vcpu, desc);
347} 347}
348 348
@@ -356,20 +356,20 @@ static void rmap_remove(struct kvm_vcpu *vcpu, u64 *spte)
356 if (!is_rmap_pte(*spte)) 356 if (!is_rmap_pte(*spte))
357 return; 357 return;
358 page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); 358 page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
359 if (!page->private) { 359 if (!page_private(page)) {
360 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); 360 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
361 BUG(); 361 BUG();
362 } else if (!(page->private & 1)) { 362 } else if (!(page_private(page) & 1)) {
363 rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte); 363 rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte);
364 if ((u64 *)page->private != spte) { 364 if ((u64 *)page_private(page) != spte) {
365 printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n", 365 printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n",
366 spte, *spte); 366 spte, *spte);
367 BUG(); 367 BUG();
368 } 368 }
369 page->private = 0; 369 set_page_private(page,0);
370 } else { 370 } else {
371 rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte); 371 rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte);
372 desc = (struct kvm_rmap_desc *)(page->private & ~1ul); 372 desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
373 prev_desc = NULL; 373 prev_desc = NULL;
374 while (desc) { 374 while (desc) {
375 for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i) 375 for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i)
@@ -398,11 +398,11 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
398 BUG_ON(!slot); 398 BUG_ON(!slot);
399 page = gfn_to_page(slot, gfn); 399 page = gfn_to_page(slot, gfn);
400 400
401 while (page->private) { 401 while (page_private(page)) {
402 if (!(page->private & 1)) 402 if (!(page_private(page) & 1))
403 spte = (u64 *)page->private; 403 spte = (u64 *)page_private(page);
404 else { 404 else {
405 desc = (struct kvm_rmap_desc *)(page->private & ~1ul); 405 desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
406 spte = desc->shadow_ptes[0]; 406 spte = desc->shadow_ptes[0];
407 } 407 }
408 BUG_ON(!spte); 408 BUG_ON(!spte);
@@ -1218,7 +1218,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
1218 INIT_LIST_HEAD(&page_header->link); 1218 INIT_LIST_HEAD(&page_header->link);
1219 if ((page = alloc_page(GFP_KERNEL)) == NULL) 1219 if ((page = alloc_page(GFP_KERNEL)) == NULL)
1220 goto error_1; 1220 goto error_1;
1221 page->private = (unsigned long)page_header; 1221 set_page_private(page, (unsigned long)page_header);
1222 page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT; 1222 page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT;
1223 memset(__va(page_header->page_hpa), 0, PAGE_SIZE); 1223 memset(__va(page_header->page_hpa), 0, PAGE_SIZE);
1224 list_add(&page_header->link, &vcpu->free_pages); 1224 list_add(&page_header->link, &vcpu->free_pages);
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index b6b90e9e1301..f3bcee904651 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -128,8 +128,10 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
128 goto access_error; 128 goto access_error;
129#endif 129#endif
130 130
131 if (!(*ptep & PT_ACCESSED_MASK)) 131 if (!(*ptep & PT_ACCESSED_MASK)) {
132 *ptep |= PT_ACCESSED_MASK; /* avoid rmw */ 132 mark_page_dirty(vcpu->kvm, table_gfn);
133 *ptep |= PT_ACCESSED_MASK;
134 }
133 135
134 if (walker->level == PT_PAGE_TABLE_LEVEL) { 136 if (walker->level == PT_PAGE_TABLE_LEVEL) {
135 walker->gfn = (*ptep & PT_BASE_ADDR_MASK) 137 walker->gfn = (*ptep & PT_BASE_ADDR_MASK)
@@ -185,6 +187,12 @@ static void FNAME(release_walker)(struct guest_walker *walker)
185 kunmap_atomic(walker->table, KM_USER0); 187 kunmap_atomic(walker->table, KM_USER0);
186} 188}
187 189
190static void FNAME(mark_pagetable_dirty)(struct kvm *kvm,
191 struct guest_walker *walker)
192{
193 mark_page_dirty(kvm, walker->table_gfn[walker->level - 1]);
194}
195
188static void FNAME(set_pte)(struct kvm_vcpu *vcpu, u64 guest_pte, 196static void FNAME(set_pte)(struct kvm_vcpu *vcpu, u64 guest_pte,
189 u64 *shadow_pte, u64 access_bits, gfn_t gfn) 197 u64 *shadow_pte, u64 access_bits, gfn_t gfn)
190{ 198{
@@ -348,12 +356,15 @@ static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu,
348 } else if (kvm_mmu_lookup_page(vcpu, gfn)) { 356 } else if (kvm_mmu_lookup_page(vcpu, gfn)) {
349 pgprintk("%s: found shadow page for %lx, marking ro\n", 357 pgprintk("%s: found shadow page for %lx, marking ro\n",
350 __FUNCTION__, gfn); 358 __FUNCTION__, gfn);
359 mark_page_dirty(vcpu->kvm, gfn);
360 FNAME(mark_pagetable_dirty)(vcpu->kvm, walker);
351 *guest_ent |= PT_DIRTY_MASK; 361 *guest_ent |= PT_DIRTY_MASK;
352 *write_pt = 1; 362 *write_pt = 1;
353 return 0; 363 return 0;
354 } 364 }
355 mark_page_dirty(vcpu->kvm, gfn); 365 mark_page_dirty(vcpu->kvm, gfn);
356 *shadow_ent |= PT_WRITABLE_MASK; 366 *shadow_ent |= PT_WRITABLE_MASK;
367 FNAME(mark_pagetable_dirty)(vcpu->kvm, walker);
357 *guest_ent |= PT_DIRTY_MASK; 368 *guest_ent |= PT_DIRTY_MASK;
358 rmap_add(vcpu, shadow_ent); 369 rmap_add(vcpu, shadow_ent);
359 370
@@ -430,9 +441,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
430 /* 441 /*
431 * mmio: emulate if accessible, otherwise its a guest fault. 442 * mmio: emulate if accessible, otherwise its a guest fault.
432 */ 443 */
433 if (is_io_pte(*shadow_pte)) { 444 if (is_io_pte(*shadow_pte))
434 return 1; 445 return 1;
435 }
436 446
437 ++kvm_stat.pf_fixed; 447 ++kvm_stat.pf_fixed;
438 kvm_mmu_audit(vcpu, "post page fault (fixed)"); 448 kvm_mmu_audit(vcpu, "post page fault (fixed)");
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 83da4ea150a3..3d8ea7ac2ecc 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -15,6 +15,7 @@
15 */ 15 */
16 16
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/kernel.h>
18#include <linux/vmalloc.h> 19#include <linux/vmalloc.h>
19#include <linux/highmem.h> 20#include <linux/highmem.h>
20#include <linux/profile.h> 21#include <linux/profile.h>
@@ -75,7 +76,7 @@ struct svm_init_data {
75 76
76static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; 77static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
77 78
78#define NUM_MSR_MAPS (sizeof(msrpm_ranges) / sizeof(*msrpm_ranges)) 79#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
79#define MSRS_RANGE_SIZE 2048 80#define MSRS_RANGE_SIZE 2048
80#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) 81#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
81 82
@@ -485,6 +486,7 @@ static void init_vmcb(struct vmcb *vmcb)
485 486
486 control->intercept = (1ULL << INTERCEPT_INTR) | 487 control->intercept = (1ULL << INTERCEPT_INTR) |
487 (1ULL << INTERCEPT_NMI) | 488 (1ULL << INTERCEPT_NMI) |
489 (1ULL << INTERCEPT_SMI) |
488 /* 490 /*
489 * selective cr0 intercept bug? 491 * selective cr0 intercept bug?
490 * 0: 0f 22 d8 mov %eax,%cr3 492 * 0: 0f 22 d8 mov %eax,%cr3
@@ -553,7 +555,7 @@ static void init_vmcb(struct vmcb *vmcb)
553 * cr0 val on cpu init should be 0x60000010, we enable cpu 555 * cr0 val on cpu init should be 0x60000010, we enable cpu
554 * cache by default. the orderly way is to enable cache in bios. 556 * cache by default. the orderly way is to enable cache in bios.
555 */ 557 */
556 save->cr0 = 0x00000010 | CR0_PG_MASK; 558 save->cr0 = 0x00000010 | CR0_PG_MASK | CR0_WP_MASK;
557 save->cr4 = CR4_PAE_MASK; 559 save->cr4 = CR4_PAE_MASK;
558 /* rdx = ?? */ 560 /* rdx = ?? */
559} 561}
@@ -598,10 +600,9 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
598 kfree(vcpu->svm); 600 kfree(vcpu->svm);
599} 601}
600 602
601static struct kvm_vcpu *svm_vcpu_load(struct kvm_vcpu *vcpu) 603static void svm_vcpu_load(struct kvm_vcpu *vcpu)
602{ 604{
603 get_cpu(); 605 get_cpu();
604 return vcpu;
605} 606}
606 607
607static void svm_vcpu_put(struct kvm_vcpu *vcpu) 608static void svm_vcpu_put(struct kvm_vcpu *vcpu)
@@ -1042,22 +1043,22 @@ static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1042 1043
1043 addr_mask = io_adress(vcpu, _in, &kvm_run->io.address); 1044 addr_mask = io_adress(vcpu, _in, &kvm_run->io.address);
1044 if (!addr_mask) { 1045 if (!addr_mask) {
1045 printk(KERN_DEBUG "%s: get io address failed\n", __FUNCTION__); 1046 printk(KERN_DEBUG "%s: get io address failed\n",
1047 __FUNCTION__);
1046 return 1; 1048 return 1;
1047 } 1049 }
1048 1050
1049 if (kvm_run->io.rep) { 1051 if (kvm_run->io.rep) {
1050 kvm_run->io.count = vcpu->regs[VCPU_REGS_RCX] & addr_mask; 1052 kvm_run->io.count
1053 = vcpu->regs[VCPU_REGS_RCX] & addr_mask;
1051 kvm_run->io.string_down = (vcpu->svm->vmcb->save.rflags 1054 kvm_run->io.string_down = (vcpu->svm->vmcb->save.rflags
1052 & X86_EFLAGS_DF) != 0; 1055 & X86_EFLAGS_DF) != 0;
1053 } 1056 }
1054 } else { 1057 } else
1055 kvm_run->io.value = vcpu->svm->vmcb->save.rax; 1058 kvm_run->io.value = vcpu->svm->vmcb->save.rax;
1056 }
1057 return 0; 1059 return 0;
1058} 1060}
1059 1061
1060
1061static int nop_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1062static int nop_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1062{ 1063{
1063 return 1; 1064 return 1;
@@ -1075,6 +1076,12 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1075 return 0; 1076 return 0;
1076} 1077}
1077 1078
1079static int vmmcall_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1080{
1081 vcpu->svm->vmcb->save.rip += 3;
1082 return kvm_hypercall(vcpu, kvm_run);
1083}
1084
1078static int invalid_op_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1085static int invalid_op_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1079{ 1086{
1080 inject_ud(vcpu); 1087 inject_ud(vcpu);
@@ -1275,7 +1282,7 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu,
1275 [SVM_EXIT_TASK_SWITCH] = task_switch_interception, 1282 [SVM_EXIT_TASK_SWITCH] = task_switch_interception,
1276 [SVM_EXIT_SHUTDOWN] = shutdown_interception, 1283 [SVM_EXIT_SHUTDOWN] = shutdown_interception,
1277 [SVM_EXIT_VMRUN] = invalid_op_interception, 1284 [SVM_EXIT_VMRUN] = invalid_op_interception,
1278 [SVM_EXIT_VMMCALL] = invalid_op_interception, 1285 [SVM_EXIT_VMMCALL] = vmmcall_interception,
1279 [SVM_EXIT_VMLOAD] = invalid_op_interception, 1286 [SVM_EXIT_VMLOAD] = invalid_op_interception,
1280 [SVM_EXIT_VMSAVE] = invalid_op_interception, 1287 [SVM_EXIT_VMSAVE] = invalid_op_interception,
1281 [SVM_EXIT_STGI] = invalid_op_interception, 1288 [SVM_EXIT_STGI] = invalid_op_interception,
@@ -1297,7 +1304,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1297 __FUNCTION__, vcpu->svm->vmcb->control.exit_int_info, 1304 __FUNCTION__, vcpu->svm->vmcb->control.exit_int_info,
1298 exit_code); 1305 exit_code);
1299 1306
1300 if (exit_code >= sizeof(svm_exit_handlers) / sizeof(*svm_exit_handlers) 1307 if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
1301 || svm_exit_handlers[exit_code] == 0) { 1308 || svm_exit_handlers[exit_code] == 0) {
1302 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 1309 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
1303 printk(KERN_ERR "%s: 0x%x @ 0x%llx cr0 0x%lx rflags 0x%llx\n", 1310 printk(KERN_ERR "%s: 0x%x @ 0x%llx cr0 0x%lx rflags 0x%llx\n",
@@ -1668,6 +1675,18 @@ static int is_disabled(void)
1668 return 0; 1675 return 0;
1669} 1676}
1670 1677
1678static void
1679svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
1680{
1681 /*
1682 * Patch in the VMMCALL instruction:
1683 */
1684 hypercall[0] = 0x0f;
1685 hypercall[1] = 0x01;
1686 hypercall[2] = 0xd9;
1687 hypercall[3] = 0xc3;
1688}
1689
1671static struct kvm_arch_ops svm_arch_ops = { 1690static struct kvm_arch_ops svm_arch_ops = {
1672 .cpu_has_kvm_support = has_svm, 1691 .cpu_has_kvm_support = has_svm,
1673 .disabled_by_bios = is_disabled, 1692 .disabled_by_bios = is_disabled,
@@ -1716,6 +1735,7 @@ static struct kvm_arch_ops svm_arch_ops = {
1716 .run = svm_vcpu_run, 1735 .run = svm_vcpu_run,
1717 .skip_emulated_instruction = skip_emulated_instruction, 1736 .skip_emulated_instruction = skip_emulated_instruction,
1718 .vcpu_setup = svm_vcpu_setup, 1737 .vcpu_setup = svm_vcpu_setup,
1738 .patch_hypercall = svm_patch_hypercall,
1719}; 1739};
1720 1740
1721static int __init svm_init(void) 1741static int __init svm_init(void)
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index fd4e91734388..c07178e61122 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -19,6 +19,7 @@
19#include "vmx.h" 19#include "vmx.h"
20#include "kvm_vmx.h" 20#include "kvm_vmx.h"
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/kernel.h>
22#include <linux/mm.h> 23#include <linux/mm.h>
23#include <linux/highmem.h> 24#include <linux/highmem.h>
24#include <linux/profile.h> 25#include <linux/profile.h>
@@ -27,7 +28,6 @@
27 28
28#include "segment_descriptor.h" 29#include "segment_descriptor.h"
29 30
30
31MODULE_AUTHOR("Qumranet"); 31MODULE_AUTHOR("Qumranet");
32MODULE_LICENSE("GPL"); 32MODULE_LICENSE("GPL");
33 33
@@ -76,7 +76,7 @@ static const u32 vmx_msr_index[] = {
76#endif 76#endif
77 MSR_EFER, MSR_K6_STAR, 77 MSR_EFER, MSR_K6_STAR,
78}; 78};
79#define NR_VMX_MSR (sizeof(vmx_msr_index) / sizeof(*vmx_msr_index)) 79#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
80 80
81static inline int is_page_fault(u32 intr_info) 81static inline int is_page_fault(u32 intr_info)
82{ 82{
@@ -204,7 +204,7 @@ static void vmcs_write64(unsigned long field, u64 value)
204 * Switches to specified vcpu, until a matching vcpu_put(), but assumes 204 * Switches to specified vcpu, until a matching vcpu_put(), but assumes
205 * vcpu mutex is already taken. 205 * vcpu mutex is already taken.
206 */ 206 */
207static struct kvm_vcpu *vmx_vcpu_load(struct kvm_vcpu *vcpu) 207static void vmx_vcpu_load(struct kvm_vcpu *vcpu)
208{ 208{
209 u64 phys_addr = __pa(vcpu->vmcs); 209 u64 phys_addr = __pa(vcpu->vmcs);
210 int cpu; 210 int cpu;
@@ -242,7 +242,6 @@ static struct kvm_vcpu *vmx_vcpu_load(struct kvm_vcpu *vcpu)
242 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); 242 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
243 vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ 243 vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
244 } 244 }
245 return vcpu;
246} 245}
247 246
248static void vmx_vcpu_put(struct kvm_vcpu *vcpu) 247static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
@@ -418,10 +417,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
418 case MSR_IA32_SYSENTER_ESP: 417 case MSR_IA32_SYSENTER_ESP:
419 vmcs_write32(GUEST_SYSENTER_ESP, data); 418 vmcs_write32(GUEST_SYSENTER_ESP, data);
420 break; 419 break;
421 case MSR_IA32_TIME_STAMP_COUNTER: { 420 case MSR_IA32_TIME_STAMP_COUNTER:
422 guest_write_tsc(data); 421 guest_write_tsc(data);
423 break; 422 break;
424 }
425 default: 423 default:
426 msr = find_msr_entry(vcpu, msr_index); 424 msr = find_msr_entry(vcpu, msr_index);
427 if (msr) { 425 if (msr) {
@@ -793,6 +791,9 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
793 */ 791 */
794static void vmx_set_cr0_no_modeswitch(struct kvm_vcpu *vcpu, unsigned long cr0) 792static void vmx_set_cr0_no_modeswitch(struct kvm_vcpu *vcpu, unsigned long cr0)
795{ 793{
794 if (!vcpu->rmode.active && !(cr0 & CR0_PE_MASK))
795 enter_rmode(vcpu);
796
796 vcpu->rmode.active = ((cr0 & CR0_PE_MASK) == 0); 797 vcpu->rmode.active = ((cr0 & CR0_PE_MASK) == 0);
797 update_exception_bitmap(vcpu); 798 update_exception_bitmap(vcpu);
798 vmcs_writel(CR0_READ_SHADOW, cr0); 799 vmcs_writel(CR0_READ_SHADOW, cr0);
@@ -1467,6 +1468,18 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1467 return 0; 1468 return 0;
1468} 1469}
1469 1470
1471static void
1472vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
1473{
1474 /*
1475 * Patch in the VMCALL instruction:
1476 */
1477 hypercall[0] = 0x0f;
1478 hypercall[1] = 0x01;
1479 hypercall[2] = 0xc1;
1480 hypercall[3] = 0xc3;
1481}
1482
1470static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1483static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1471{ 1484{
1472 u64 exit_qualification; 1485 u64 exit_qualification;
@@ -1643,6 +1656,12 @@ static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1643 return 0; 1656 return 0;
1644} 1657}
1645 1658
1659static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1660{
1661 vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP)+3);
1662 return kvm_hypercall(vcpu, kvm_run);
1663}
1664
1646/* 1665/*
1647 * The exit handlers return 1 if the exit was handled fully and guest execution 1666 * The exit handlers return 1 if the exit was handled fully and guest execution
1648 * may resume. Otherwise they set the kvm_run parameter to indicate what needs 1667 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -1661,6 +1680,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
1661 [EXIT_REASON_MSR_WRITE] = handle_wrmsr, 1680 [EXIT_REASON_MSR_WRITE] = handle_wrmsr,
1662 [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, 1681 [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window,
1663 [EXIT_REASON_HLT] = handle_halt, 1682 [EXIT_REASON_HLT] = handle_halt,
1683 [EXIT_REASON_VMCALL] = handle_vmcall,
1664}; 1684};
1665 1685
1666static const int kvm_vmx_max_exit_handlers = 1686static const int kvm_vmx_max_exit_handlers =
@@ -2062,6 +2082,7 @@ static struct kvm_arch_ops vmx_arch_ops = {
2062 .run = vmx_vcpu_run, 2082 .run = vmx_vcpu_run,
2063 .skip_emulated_instruction = skip_emulated_instruction, 2083 .skip_emulated_instruction = skip_emulated_instruction,
2064 .vcpu_setup = vmx_vcpu_setup, 2084 .vcpu_setup = vmx_vcpu_setup,
2085 .patch_hypercall = vmx_patch_hypercall,
2065}; 2086};
2066 2087
2067static int __init vmx_init(void) 2088static int __init vmx_init(void)
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index f3604593fb76..275354ffa1cb 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -11,7 +11,7 @@
11#include <asm/types.h> 11#include <asm/types.h>
12#include <linux/ioctl.h> 12#include <linux/ioctl.h>
13 13
14#define KVM_API_VERSION 3 14#define KVM_API_VERSION 4
15 15
16/* 16/*
17 * Architectural interrupt line count, and the size of the bitmap needed 17 * Architectural interrupt line count, and the size of the bitmap needed
@@ -52,11 +52,10 @@ enum kvm_exit_reason {
52/* for KVM_RUN */ 52/* for KVM_RUN */
53struct kvm_run { 53struct kvm_run {
54 /* in */ 54 /* in */
55 __u32 vcpu;
56 __u32 emulated; /* skip current instruction */ 55 __u32 emulated; /* skip current instruction */
57 __u32 mmio_completed; /* mmio request completed */ 56 __u32 mmio_completed; /* mmio request completed */
58 __u8 request_interrupt_window; 57 __u8 request_interrupt_window;
59 __u8 padding1[3]; 58 __u8 padding1[7];
60 59
61 /* out */ 60 /* out */
62 __u32 exit_type; 61 __u32 exit_type;
@@ -111,10 +110,6 @@ struct kvm_run {
111 110
112/* for KVM_GET_REGS and KVM_SET_REGS */ 111/* for KVM_GET_REGS and KVM_SET_REGS */
113struct kvm_regs { 112struct kvm_regs {
114 /* in */
115 __u32 vcpu;
116 __u32 padding;
117
118 /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */ 113 /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
119 __u64 rax, rbx, rcx, rdx; 114 __u64 rax, rbx, rcx, rdx;
120 __u64 rsi, rdi, rsp, rbp; 115 __u64 rsi, rdi, rsp, rbp;
@@ -141,10 +136,6 @@ struct kvm_dtable {
141 136
142/* for KVM_GET_SREGS and KVM_SET_SREGS */ 137/* for KVM_GET_SREGS and KVM_SET_SREGS */
143struct kvm_sregs { 138struct kvm_sregs {
144 /* in */
145 __u32 vcpu;
146 __u32 padding;
147
148 /* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */ 139 /* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */
149 struct kvm_segment cs, ds, es, fs, gs, ss; 140 struct kvm_segment cs, ds, es, fs, gs, ss;
150 struct kvm_segment tr, ldt; 141 struct kvm_segment tr, ldt;
@@ -163,8 +154,8 @@ struct kvm_msr_entry {
163 154
164/* for KVM_GET_MSRS and KVM_SET_MSRS */ 155/* for KVM_GET_MSRS and KVM_SET_MSRS */
165struct kvm_msrs { 156struct kvm_msrs {
166 __u32 vcpu;
167 __u32 nmsrs; /* number of msrs in entries */ 157 __u32 nmsrs; /* number of msrs in entries */
158 __u32 pad;
168 159
169 struct kvm_msr_entry entries[0]; 160 struct kvm_msr_entry entries[0];
170}; 161};
@@ -179,8 +170,6 @@ struct kvm_msr_list {
179struct kvm_translation { 170struct kvm_translation {
180 /* in */ 171 /* in */
181 __u64 linear_address; 172 __u64 linear_address;
182 __u32 vcpu;
183 __u32 padding;
184 173
185 /* out */ 174 /* out */
186 __u64 physical_address; 175 __u64 physical_address;
@@ -193,7 +182,6 @@ struct kvm_translation {
193/* for KVM_INTERRUPT */ 182/* for KVM_INTERRUPT */
194struct kvm_interrupt { 183struct kvm_interrupt {
195 /* in */ 184 /* in */
196 __u32 vcpu;
197 __u32 irq; 185 __u32 irq;
198}; 186};
199 187
@@ -206,8 +194,8 @@ struct kvm_breakpoint {
206/* for KVM_DEBUG_GUEST */ 194/* for KVM_DEBUG_GUEST */
207struct kvm_debug_guest { 195struct kvm_debug_guest {
208 /* int */ 196 /* int */
209 __u32 vcpu;
210 __u32 enabled; 197 __u32 enabled;
198 __u32 pad;
211 struct kvm_breakpoint breakpoints[4]; 199 struct kvm_breakpoint breakpoints[4];
212 __u32 singlestep; 200 __u32 singlestep;
213}; 201};
@@ -224,20 +212,36 @@ struct kvm_dirty_log {
224 212
225#define KVMIO 0xAE 213#define KVMIO 0xAE
226 214
215/*
216 * ioctls for /dev/kvm fds:
217 */
227#define KVM_GET_API_VERSION _IO(KVMIO, 1) 218#define KVM_GET_API_VERSION _IO(KVMIO, 1)
219#define KVM_CREATE_VM _IO(KVMIO, 2) /* returns a VM fd */
220#define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 15, struct kvm_msr_list)
221
222/*
223 * ioctls for VM fds
224 */
225#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 10, struct kvm_memory_region)
226/*
227 * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
228 * a vcpu fd.
229 */
230#define KVM_CREATE_VCPU _IOW(KVMIO, 11, int)
231#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 12, struct kvm_dirty_log)
232
233/*
234 * ioctls for vcpu fds
235 */
228#define KVM_RUN _IOWR(KVMIO, 2, struct kvm_run) 236#define KVM_RUN _IOWR(KVMIO, 2, struct kvm_run)
229#define KVM_GET_REGS _IOWR(KVMIO, 3, struct kvm_regs) 237#define KVM_GET_REGS _IOR(KVMIO, 3, struct kvm_regs)
230#define KVM_SET_REGS _IOW(KVMIO, 4, struct kvm_regs) 238#define KVM_SET_REGS _IOW(KVMIO, 4, struct kvm_regs)
231#define KVM_GET_SREGS _IOWR(KVMIO, 5, struct kvm_sregs) 239#define KVM_GET_SREGS _IOR(KVMIO, 5, struct kvm_sregs)
232#define KVM_SET_SREGS _IOW(KVMIO, 6, struct kvm_sregs) 240#define KVM_SET_SREGS _IOW(KVMIO, 6, struct kvm_sregs)
233#define KVM_TRANSLATE _IOWR(KVMIO, 7, struct kvm_translation) 241#define KVM_TRANSLATE _IOWR(KVMIO, 7, struct kvm_translation)
234#define KVM_INTERRUPT _IOW(KVMIO, 8, struct kvm_interrupt) 242#define KVM_INTERRUPT _IOW(KVMIO, 8, struct kvm_interrupt)
235#define KVM_DEBUG_GUEST _IOW(KVMIO, 9, struct kvm_debug_guest) 243#define KVM_DEBUG_GUEST _IOW(KVMIO, 9, struct kvm_debug_guest)
236#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 10, struct kvm_memory_region)
237#define KVM_CREATE_VCPU _IOW(KVMIO, 11, int /* vcpu_slot */)
238#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 12, struct kvm_dirty_log)
239#define KVM_GET_MSRS _IOWR(KVMIO, 13, struct kvm_msrs) 244#define KVM_GET_MSRS _IOWR(KVMIO, 13, struct kvm_msrs)
240#define KVM_SET_MSRS _IOWR(KVMIO, 14, struct kvm_msrs) 245#define KVM_SET_MSRS _IOW(KVMIO, 14, struct kvm_msrs)
241#define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 15, struct kvm_msr_list)
242 246
243#endif 247#endif
diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
new file mode 100644
index 000000000000..3b292565a693
--- /dev/null
+++ b/include/linux/kvm_para.h
@@ -0,0 +1,73 @@
1#ifndef __LINUX_KVM_PARA_H
2#define __LINUX_KVM_PARA_H
3
4/*
5 * Guest OS interface for KVM paravirtualization
6 *
7 * Note: this interface is totally experimental, and is certain to change
8 * as we make progress.
9 */
10
11/*
12 * Per-VCPU descriptor area shared between guest and host. Writable to
13 * both guest and host. Registered with the host by the guest when
14 * a guest acknowledges paravirtual mode.
15 *
16 * NOTE: all addresses are guest-physical addresses (gpa), to make it
17 * easier for the hypervisor to map between the various addresses.
18 */
19struct kvm_vcpu_para_state {
20 /*
21 * API version information for compatibility. If there's any support
22 * mismatch (too old host trying to execute too new guest) then
23 * the host will deny entry into paravirtual mode. Any other
24 * combination (new host + old guest and new host + new guest)
25 * is supposed to work - new host versions will support all old
26 * guest API versions.
27 */
28 u32 guest_version;
29 u32 host_version;
30 u32 size;
31 u32 ret;
32
33 /*
34 * The address of the vm exit instruction (VMCALL or VMMCALL),
35 * which the host will patch according to the CPU model the
36 * VM runs on:
37 */
38 u64 hypercall_gpa;
39
40} __attribute__ ((aligned(PAGE_SIZE)));
41
42#define KVM_PARA_API_VERSION 1
43
44/*
45 * This is used for an RDMSR's ECX parameter to probe for a KVM host.
46 * Hopefully no CPU vendor will use up this number. This is placed well
47 * out of way of the typical space occupied by CPU vendors' MSR indices,
48 * and we think (or at least hope) it wont be occupied in the future
49 * either.
50 */
51#define MSR_KVM_API_MAGIC 0x87655678
52
53#define KVM_EINVAL 1
54
55/*
56 * Hypercall calling convention:
57 *
58 * Each hypercall may have 0-6 parameters.
59 *
60 * 64-bit hypercall index is in RAX, goes from 0 to __NR_hypercalls-1
61 *
62 * 64-bit parameters 1-6 are in the standard gcc x86_64 calling convention
63 * order: RDI, RSI, RDX, RCX, R8, R9.
64 *
65 * 32-bit index is EBX, parameters are: EAX, ECX, EDX, ESI, EDI, EBP.
66 * (the first 3 are according to the gcc regparm calling convention)
67 *
68 * No registers are clobbered by the hypercall, except that the
69 * return value is in RAX.
70 */
71#define __NR_hypercalls 0
72
73#endif
diff --git a/include/linux/magic.h b/include/linux/magic.h
index b32c8a97fcec..a9c6567fe70c 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -13,6 +13,7 @@
13#define HPFS_SUPER_MAGIC 0xf995e849 13#define HPFS_SUPER_MAGIC 0xf995e849
14#define ISOFS_SUPER_MAGIC 0x9660 14#define ISOFS_SUPER_MAGIC 0x9660
15#define JFFS2_SUPER_MAGIC 0x72b6 15#define JFFS2_SUPER_MAGIC 0x72b6
16#define KVMFS_SUPER_MAGIC 0x19700426
16 17
17#define MINIX_SUPER_MAGIC 0x137F /* original minix fs */ 18#define MINIX_SUPER_MAGIC 0x137F /* original minix fs */
18#define MINIX_SUPER_MAGIC2 0x138F /* minix fs, 30 char names */ 19#define MINIX_SUPER_MAGIC2 0x138F /* minix fs, 30 char names */