aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/kvm/kvm_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/kvm/kvm_main.c')
-rw-r--r--drivers/kvm/kvm_main.c1486
1 files changed, 950 insertions, 536 deletions
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index cd0557954e50..353e58527d15 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -18,6 +18,7 @@
18#include "kvm.h" 18#include "kvm.h"
19#include "x86_emulate.h" 19#include "x86_emulate.h"
20#include "segment_descriptor.h" 20#include "segment_descriptor.h"
21#include "irq.h"
21 22
22#include <linux/kvm.h> 23#include <linux/kvm.h>
23#include <linux/module.h> 24#include <linux/module.h>
@@ -37,6 +38,7 @@
37#include <linux/cpumask.h> 38#include <linux/cpumask.h>
38#include <linux/smp.h> 39#include <linux/smp.h>
39#include <linux/anon_inodes.h> 40#include <linux/anon_inodes.h>
41#include <linux/profile.h>
40 42
41#include <asm/processor.h> 43#include <asm/processor.h>
42#include <asm/msr.h> 44#include <asm/msr.h>
@@ -52,9 +54,11 @@ static LIST_HEAD(vm_list);
52 54
53static cpumask_t cpus_hardware_enabled; 55static cpumask_t cpus_hardware_enabled;
54 56
55struct kvm_arch_ops *kvm_arch_ops; 57struct kvm_x86_ops *kvm_x86_ops;
58struct kmem_cache *kvm_vcpu_cache;
59EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
56 60
57static void hardware_disable(void *ignored); 61static __read_mostly struct preempt_ops kvm_preempt_ops;
58 62
59#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x) 63#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
60 64
@@ -73,6 +77,7 @@ static struct kvm_stats_debugfs_item {
73 { "signal_exits", STAT_OFFSET(signal_exits) }, 77 { "signal_exits", STAT_OFFSET(signal_exits) },
74 { "irq_window", STAT_OFFSET(irq_window_exits) }, 78 { "irq_window", STAT_OFFSET(irq_window_exits) },
75 { "halt_exits", STAT_OFFSET(halt_exits) }, 79 { "halt_exits", STAT_OFFSET(halt_exits) },
80 { "halt_wakeup", STAT_OFFSET(halt_wakeup) },
76 { "request_irq", STAT_OFFSET(request_irq_exits) }, 81 { "request_irq", STAT_OFFSET(request_irq_exits) },
77 { "irq_exits", STAT_OFFSET(irq_exits) }, 82 { "irq_exits", STAT_OFFSET(irq_exits) },
78 { "light_exits", STAT_OFFSET(light_exits) }, 83 { "light_exits", STAT_OFFSET(light_exits) },
@@ -84,10 +89,17 @@ static struct dentry *debugfs_dir;
84 89
85#define MAX_IO_MSRS 256 90#define MAX_IO_MSRS 256
86 91
87#define CR0_RESEVED_BITS 0xffffffff1ffaffc0ULL 92#define CR0_RESERVED_BITS \
88#define LMSW_GUEST_MASK 0x0eULL 93 (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
89#define CR4_RESEVED_BITS (~((1ULL << 11) - 1)) 94 | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
90#define CR8_RESEVED_BITS (~0x0fULL) 95 | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
96#define CR4_RESERVED_BITS \
97 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
98 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
99 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
100 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
101
102#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
91#define EFER_RESERVED_BITS 0xfffffffffffff2fe 103#define EFER_RESERVED_BITS 0xfffffffffffff2fe
92 104
93#ifdef CONFIG_X86_64 105#ifdef CONFIG_X86_64
@@ -139,82 +151,14 @@ static inline int valid_vcpu(int n)
139 return likely(n >= 0 && n < KVM_MAX_VCPUS); 151 return likely(n >= 0 && n < KVM_MAX_VCPUS);
140} 152}
141 153
142int kvm_read_guest(struct kvm_vcpu *vcpu, gva_t addr, unsigned long size,
143 void *dest)
144{
145 unsigned char *host_buf = dest;
146 unsigned long req_size = size;
147
148 while (size) {
149 hpa_t paddr;
150 unsigned now;
151 unsigned offset;
152 hva_t guest_buf;
153
154 paddr = gva_to_hpa(vcpu, addr);
155
156 if (is_error_hpa(paddr))
157 break;
158
159 guest_buf = (hva_t)kmap_atomic(
160 pfn_to_page(paddr >> PAGE_SHIFT),
161 KM_USER0);
162 offset = addr & ~PAGE_MASK;
163 guest_buf |= offset;
164 now = min(size, PAGE_SIZE - offset);
165 memcpy(host_buf, (void*)guest_buf, now);
166 host_buf += now;
167 addr += now;
168 size -= now;
169 kunmap_atomic((void *)(guest_buf & PAGE_MASK), KM_USER0);
170 }
171 return req_size - size;
172}
173EXPORT_SYMBOL_GPL(kvm_read_guest);
174
175int kvm_write_guest(struct kvm_vcpu *vcpu, gva_t addr, unsigned long size,
176 void *data)
177{
178 unsigned char *host_buf = data;
179 unsigned long req_size = size;
180
181 while (size) {
182 hpa_t paddr;
183 unsigned now;
184 unsigned offset;
185 hva_t guest_buf;
186 gfn_t gfn;
187
188 paddr = gva_to_hpa(vcpu, addr);
189
190 if (is_error_hpa(paddr))
191 break;
192
193 gfn = vcpu->mmu.gva_to_gpa(vcpu, addr) >> PAGE_SHIFT;
194 mark_page_dirty(vcpu->kvm, gfn);
195 guest_buf = (hva_t)kmap_atomic(
196 pfn_to_page(paddr >> PAGE_SHIFT), KM_USER0);
197 offset = addr & ~PAGE_MASK;
198 guest_buf |= offset;
199 now = min(size, PAGE_SIZE - offset);
200 memcpy((void*)guest_buf, host_buf, now);
201 host_buf += now;
202 addr += now;
203 size -= now;
204 kunmap_atomic((void *)(guest_buf & PAGE_MASK), KM_USER0);
205 }
206 return req_size - size;
207}
208EXPORT_SYMBOL_GPL(kvm_write_guest);
209
210void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) 154void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
211{ 155{
212 if (!vcpu->fpu_active || vcpu->guest_fpu_loaded) 156 if (!vcpu->fpu_active || vcpu->guest_fpu_loaded)
213 return; 157 return;
214 158
215 vcpu->guest_fpu_loaded = 1; 159 vcpu->guest_fpu_loaded = 1;
216 fx_save(vcpu->host_fx_image); 160 fx_save(&vcpu->host_fx_image);
217 fx_restore(vcpu->guest_fx_image); 161 fx_restore(&vcpu->guest_fx_image);
218} 162}
219EXPORT_SYMBOL_GPL(kvm_load_guest_fpu); 163EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
220 164
@@ -224,8 +168,8 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
224 return; 168 return;
225 169
226 vcpu->guest_fpu_loaded = 0; 170 vcpu->guest_fpu_loaded = 0;
227 fx_save(vcpu->guest_fx_image); 171 fx_save(&vcpu->guest_fx_image);
228 fx_restore(vcpu->host_fx_image); 172 fx_restore(&vcpu->host_fx_image);
229} 173}
230EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); 174EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
231 175
@@ -234,13 +178,21 @@ EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
234 */ 178 */
235static void vcpu_load(struct kvm_vcpu *vcpu) 179static void vcpu_load(struct kvm_vcpu *vcpu)
236{ 180{
181 int cpu;
182
237 mutex_lock(&vcpu->mutex); 183 mutex_lock(&vcpu->mutex);
238 kvm_arch_ops->vcpu_load(vcpu); 184 cpu = get_cpu();
185 preempt_notifier_register(&vcpu->preempt_notifier);
186 kvm_x86_ops->vcpu_load(vcpu, cpu);
187 put_cpu();
239} 188}
240 189
241static void vcpu_put(struct kvm_vcpu *vcpu) 190static void vcpu_put(struct kvm_vcpu *vcpu)
242{ 191{
243 kvm_arch_ops->vcpu_put(vcpu); 192 preempt_disable();
193 kvm_x86_ops->vcpu_put(vcpu);
194 preempt_notifier_unregister(&vcpu->preempt_notifier);
195 preempt_enable();
244 mutex_unlock(&vcpu->mutex); 196 mutex_unlock(&vcpu->mutex);
245} 197}
246 198
@@ -261,8 +213,10 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
261 atomic_set(&completed, 0); 213 atomic_set(&completed, 0);
262 cpus_clear(cpus); 214 cpus_clear(cpus);
263 needed = 0; 215 needed = 0;
264 for (i = 0; i < kvm->nvcpus; ++i) { 216 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
265 vcpu = &kvm->vcpus[i]; 217 vcpu = kvm->vcpus[i];
218 if (!vcpu)
219 continue;
266 if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests)) 220 if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests))
267 continue; 221 continue;
268 cpu = vcpu->cpu; 222 cpu = vcpu->cpu;
@@ -286,37 +240,79 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
286 } 240 }
287} 241}
288 242
243int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
244{
245 struct page *page;
246 int r;
247
248 mutex_init(&vcpu->mutex);
249 vcpu->cpu = -1;
250 vcpu->mmu.root_hpa = INVALID_PAGE;
251 vcpu->kvm = kvm;
252 vcpu->vcpu_id = id;
253 if (!irqchip_in_kernel(kvm) || id == 0)
254 vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
255 else
256 vcpu->mp_state = VCPU_MP_STATE_UNINITIALIZED;
257 init_waitqueue_head(&vcpu->wq);
258
259 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
260 if (!page) {
261 r = -ENOMEM;
262 goto fail;
263 }
264 vcpu->run = page_address(page);
265
266 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
267 if (!page) {
268 r = -ENOMEM;
269 goto fail_free_run;
270 }
271 vcpu->pio_data = page_address(page);
272
273 r = kvm_mmu_create(vcpu);
274 if (r < 0)
275 goto fail_free_pio_data;
276
277 return 0;
278
279fail_free_pio_data:
280 free_page((unsigned long)vcpu->pio_data);
281fail_free_run:
282 free_page((unsigned long)vcpu->run);
283fail:
284 return -ENOMEM;
285}
286EXPORT_SYMBOL_GPL(kvm_vcpu_init);
287
288void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
289{
290 kvm_mmu_destroy(vcpu);
291 if (vcpu->apic)
292 hrtimer_cancel(&vcpu->apic->timer.dev);
293 kvm_free_apic(vcpu->apic);
294 free_page((unsigned long)vcpu->pio_data);
295 free_page((unsigned long)vcpu->run);
296}
297EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
298
289static struct kvm *kvm_create_vm(void) 299static struct kvm *kvm_create_vm(void)
290{ 300{
291 struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); 301 struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
292 int i;
293 302
294 if (!kvm) 303 if (!kvm)
295 return ERR_PTR(-ENOMEM); 304 return ERR_PTR(-ENOMEM);
296 305
297 kvm_io_bus_init(&kvm->pio_bus); 306 kvm_io_bus_init(&kvm->pio_bus);
298 spin_lock_init(&kvm->lock); 307 mutex_init(&kvm->lock);
299 INIT_LIST_HEAD(&kvm->active_mmu_pages); 308 INIT_LIST_HEAD(&kvm->active_mmu_pages);
300 kvm_io_bus_init(&kvm->mmio_bus); 309 kvm_io_bus_init(&kvm->mmio_bus);
301 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
302 struct kvm_vcpu *vcpu = &kvm->vcpus[i];
303
304 mutex_init(&vcpu->mutex);
305 vcpu->cpu = -1;
306 vcpu->kvm = kvm;
307 vcpu->mmu.root_hpa = INVALID_PAGE;
308 }
309 spin_lock(&kvm_lock); 310 spin_lock(&kvm_lock);
310 list_add(&kvm->vm_list, &vm_list); 311 list_add(&kvm->vm_list, &vm_list);
311 spin_unlock(&kvm_lock); 312 spin_unlock(&kvm_lock);
312 return kvm; 313 return kvm;
313} 314}
314 315
315static int kvm_dev_open(struct inode *inode, struct file *filp)
316{
317 return 0;
318}
319
320/* 316/*
321 * Free any memory in @free but not in @dont. 317 * Free any memory in @free but not in @dont.
322 */ 318 */
@@ -353,7 +349,7 @@ static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
353{ 349{
354 int i; 350 int i;
355 351
356 for (i = 0; i < 2; ++i) 352 for (i = 0; i < ARRAY_SIZE(vcpu->pio.guest_pages); ++i)
357 if (vcpu->pio.guest_pages[i]) { 353 if (vcpu->pio.guest_pages[i]) {
358 __free_page(vcpu->pio.guest_pages[i]); 354 __free_page(vcpu->pio.guest_pages[i]);
359 vcpu->pio.guest_pages[i] = NULL; 355 vcpu->pio.guest_pages[i] = NULL;
@@ -362,30 +358,11 @@ static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
362 358
363static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) 359static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
364{ 360{
365 if (!vcpu->vmcs)
366 return;
367
368 vcpu_load(vcpu); 361 vcpu_load(vcpu);
369 kvm_mmu_unload(vcpu); 362 kvm_mmu_unload(vcpu);
370 vcpu_put(vcpu); 363 vcpu_put(vcpu);
371} 364}
372 365
373static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
374{
375 if (!vcpu->vmcs)
376 return;
377
378 vcpu_load(vcpu);
379 kvm_mmu_destroy(vcpu);
380 vcpu_put(vcpu);
381 kvm_arch_ops->vcpu_free(vcpu);
382 free_page((unsigned long)vcpu->run);
383 vcpu->run = NULL;
384 free_page((unsigned long)vcpu->pio_data);
385 vcpu->pio_data = NULL;
386 free_pio_guest_pages(vcpu);
387}
388
389static void kvm_free_vcpus(struct kvm *kvm) 366static void kvm_free_vcpus(struct kvm *kvm)
390{ 367{
391 unsigned int i; 368 unsigned int i;
@@ -394,14 +371,15 @@ static void kvm_free_vcpus(struct kvm *kvm)
394 * Unpin any mmu pages first. 371 * Unpin any mmu pages first.
395 */ 372 */
396 for (i = 0; i < KVM_MAX_VCPUS; ++i) 373 for (i = 0; i < KVM_MAX_VCPUS; ++i)
397 kvm_unload_vcpu_mmu(&kvm->vcpus[i]); 374 if (kvm->vcpus[i])
398 for (i = 0; i < KVM_MAX_VCPUS; ++i) 375 kvm_unload_vcpu_mmu(kvm->vcpus[i]);
399 kvm_free_vcpu(&kvm->vcpus[i]); 376 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
400} 377 if (kvm->vcpus[i]) {
378 kvm_x86_ops->vcpu_free(kvm->vcpus[i]);
379 kvm->vcpus[i] = NULL;
380 }
381 }
401 382
402static int kvm_dev_release(struct inode *inode, struct file *filp)
403{
404 return 0;
405} 383}
406 384
407static void kvm_destroy_vm(struct kvm *kvm) 385static void kvm_destroy_vm(struct kvm *kvm)
@@ -411,6 +389,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
411 spin_unlock(&kvm_lock); 389 spin_unlock(&kvm_lock);
412 kvm_io_bus_destroy(&kvm->pio_bus); 390 kvm_io_bus_destroy(&kvm->pio_bus);
413 kvm_io_bus_destroy(&kvm->mmio_bus); 391 kvm_io_bus_destroy(&kvm->mmio_bus);
392 kfree(kvm->vpic);
393 kfree(kvm->vioapic);
414 kvm_free_vcpus(kvm); 394 kvm_free_vcpus(kvm);
415 kvm_free_physmem(kvm); 395 kvm_free_physmem(kvm);
416 kfree(kvm); 396 kfree(kvm);
@@ -426,7 +406,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
426 406
427static void inject_gp(struct kvm_vcpu *vcpu) 407static void inject_gp(struct kvm_vcpu *vcpu)
428{ 408{
429 kvm_arch_ops->inject_gp(vcpu, 0); 409 kvm_x86_ops->inject_gp(vcpu, 0);
430} 410}
431 411
432/* 412/*
@@ -437,58 +417,60 @@ static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
437 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT; 417 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
438 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2; 418 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
439 int i; 419 int i;
440 u64 pdpte;
441 u64 *pdpt; 420 u64 *pdpt;
442 int ret; 421 int ret;
443 struct page *page; 422 struct page *page;
423 u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)];
444 424
445 spin_lock(&vcpu->kvm->lock); 425 mutex_lock(&vcpu->kvm->lock);
446 page = gfn_to_page(vcpu->kvm, pdpt_gfn); 426 page = gfn_to_page(vcpu->kvm, pdpt_gfn);
447 /* FIXME: !page - emulate? 0xff? */ 427 if (!page) {
428 ret = 0;
429 goto out;
430 }
431
448 pdpt = kmap_atomic(page, KM_USER0); 432 pdpt = kmap_atomic(page, KM_USER0);
433 memcpy(pdpte, pdpt+offset, sizeof(pdpte));
434 kunmap_atomic(pdpt, KM_USER0);
449 435
450 ret = 1; 436 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
451 for (i = 0; i < 4; ++i) { 437 if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) {
452 pdpte = pdpt[offset + i];
453 if ((pdpte & 1) && (pdpte & 0xfffffff0000001e6ull)) {
454 ret = 0; 438 ret = 0;
455 goto out; 439 goto out;
456 } 440 }
457 } 441 }
442 ret = 1;
458 443
459 for (i = 0; i < 4; ++i) 444 memcpy(vcpu->pdptrs, pdpte, sizeof(vcpu->pdptrs));
460 vcpu->pdptrs[i] = pdpt[offset + i];
461
462out: 445out:
463 kunmap_atomic(pdpt, KM_USER0); 446 mutex_unlock(&vcpu->kvm->lock);
464 spin_unlock(&vcpu->kvm->lock);
465 447
466 return ret; 448 return ret;
467} 449}
468 450
469void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 451void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
470{ 452{
471 if (cr0 & CR0_RESEVED_BITS) { 453 if (cr0 & CR0_RESERVED_BITS) {
472 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", 454 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
473 cr0, vcpu->cr0); 455 cr0, vcpu->cr0);
474 inject_gp(vcpu); 456 inject_gp(vcpu);
475 return; 457 return;
476 } 458 }
477 459
478 if ((cr0 & CR0_NW_MASK) && !(cr0 & CR0_CD_MASK)) { 460 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
479 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); 461 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
480 inject_gp(vcpu); 462 inject_gp(vcpu);
481 return; 463 return;
482 } 464 }
483 465
484 if ((cr0 & CR0_PG_MASK) && !(cr0 & CR0_PE_MASK)) { 466 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
485 printk(KERN_DEBUG "set_cr0: #GP, set PG flag " 467 printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
486 "and a clear PE flag\n"); 468 "and a clear PE flag\n");
487 inject_gp(vcpu); 469 inject_gp(vcpu);
488 return; 470 return;
489 } 471 }
490 472
491 if (!is_paging(vcpu) && (cr0 & CR0_PG_MASK)) { 473 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
492#ifdef CONFIG_X86_64 474#ifdef CONFIG_X86_64
493 if ((vcpu->shadow_efer & EFER_LME)) { 475 if ((vcpu->shadow_efer & EFER_LME)) {
494 int cs_db, cs_l; 476 int cs_db, cs_l;
@@ -499,7 +481,7 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
499 inject_gp(vcpu); 481 inject_gp(vcpu);
500 return; 482 return;
501 } 483 }
502 kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 484 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
503 if (cs_l) { 485 if (cs_l) {
504 printk(KERN_DEBUG "set_cr0: #GP, start paging " 486 printk(KERN_DEBUG "set_cr0: #GP, start paging "
505 "in long mode while CS.L == 1\n"); 487 "in long mode while CS.L == 1\n");
@@ -518,12 +500,12 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
518 500
519 } 501 }
520 502
521 kvm_arch_ops->set_cr0(vcpu, cr0); 503 kvm_x86_ops->set_cr0(vcpu, cr0);
522 vcpu->cr0 = cr0; 504 vcpu->cr0 = cr0;
523 505
524 spin_lock(&vcpu->kvm->lock); 506 mutex_lock(&vcpu->kvm->lock);
525 kvm_mmu_reset_context(vcpu); 507 kvm_mmu_reset_context(vcpu);
526 spin_unlock(&vcpu->kvm->lock); 508 mutex_unlock(&vcpu->kvm->lock);
527 return; 509 return;
528} 510}
529EXPORT_SYMBOL_GPL(set_cr0); 511EXPORT_SYMBOL_GPL(set_cr0);
@@ -536,62 +518,72 @@ EXPORT_SYMBOL_GPL(lmsw);
536 518
537void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 519void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
538{ 520{
539 if (cr4 & CR4_RESEVED_BITS) { 521 if (cr4 & CR4_RESERVED_BITS) {
540 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); 522 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
541 inject_gp(vcpu); 523 inject_gp(vcpu);
542 return; 524 return;
543 } 525 }
544 526
545 if (is_long_mode(vcpu)) { 527 if (is_long_mode(vcpu)) {
546 if (!(cr4 & CR4_PAE_MASK)) { 528 if (!(cr4 & X86_CR4_PAE)) {
547 printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while " 529 printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
548 "in long mode\n"); 530 "in long mode\n");
549 inject_gp(vcpu); 531 inject_gp(vcpu);
550 return; 532 return;
551 } 533 }
552 } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & CR4_PAE_MASK) 534 } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE)
553 && !load_pdptrs(vcpu, vcpu->cr3)) { 535 && !load_pdptrs(vcpu, vcpu->cr3)) {
554 printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); 536 printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
555 inject_gp(vcpu); 537 inject_gp(vcpu);
538 return;
556 } 539 }
557 540
558 if (cr4 & CR4_VMXE_MASK) { 541 if (cr4 & X86_CR4_VMXE) {
559 printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n"); 542 printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
560 inject_gp(vcpu); 543 inject_gp(vcpu);
561 return; 544 return;
562 } 545 }
563 kvm_arch_ops->set_cr4(vcpu, cr4); 546 kvm_x86_ops->set_cr4(vcpu, cr4);
564 spin_lock(&vcpu->kvm->lock); 547 vcpu->cr4 = cr4;
548 mutex_lock(&vcpu->kvm->lock);
565 kvm_mmu_reset_context(vcpu); 549 kvm_mmu_reset_context(vcpu);
566 spin_unlock(&vcpu->kvm->lock); 550 mutex_unlock(&vcpu->kvm->lock);
567} 551}
568EXPORT_SYMBOL_GPL(set_cr4); 552EXPORT_SYMBOL_GPL(set_cr4);
569 553
570void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 554void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
571{ 555{
572 if (is_long_mode(vcpu)) { 556 if (is_long_mode(vcpu)) {
573 if (cr3 & CR3_L_MODE_RESEVED_BITS) { 557 if (cr3 & CR3_L_MODE_RESERVED_BITS) {
574 printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); 558 printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
575 inject_gp(vcpu); 559 inject_gp(vcpu);
576 return; 560 return;
577 } 561 }
578 } else { 562 } else {
579 if (cr3 & CR3_RESEVED_BITS) { 563 if (is_pae(vcpu)) {
580 printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); 564 if (cr3 & CR3_PAE_RESERVED_BITS) {
581 inject_gp(vcpu); 565 printk(KERN_DEBUG
582 return; 566 "set_cr3: #GP, reserved bits\n");
583 } 567 inject_gp(vcpu);
584 if (is_paging(vcpu) && is_pae(vcpu) && 568 return;
585 !load_pdptrs(vcpu, cr3)) { 569 }
586 printk(KERN_DEBUG "set_cr3: #GP, pdptrs " 570 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
587 "reserved bits\n"); 571 printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
588 inject_gp(vcpu); 572 "reserved bits\n");
589 return; 573 inject_gp(vcpu);
574 return;
575 }
576 } else {
577 if (cr3 & CR3_NONPAE_RESERVED_BITS) {
578 printk(KERN_DEBUG
579 "set_cr3: #GP, reserved bits\n");
580 inject_gp(vcpu);
581 return;
582 }
590 } 583 }
591 } 584 }
592 585
593 vcpu->cr3 = cr3; 586 mutex_lock(&vcpu->kvm->lock);
594 spin_lock(&vcpu->kvm->lock);
595 /* 587 /*
596 * Does the new cr3 value map to physical memory? (Note, we 588 * Does the new cr3 value map to physical memory? (Note, we
597 * catch an invalid cr3 even in real-mode, because it would 589 * catch an invalid cr3 even in real-mode, because it would
@@ -603,46 +595,73 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
603 */ 595 */
604 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) 596 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
605 inject_gp(vcpu); 597 inject_gp(vcpu);
606 else 598 else {
599 vcpu->cr3 = cr3;
607 vcpu->mmu.new_cr3(vcpu); 600 vcpu->mmu.new_cr3(vcpu);
608 spin_unlock(&vcpu->kvm->lock); 601 }
602 mutex_unlock(&vcpu->kvm->lock);
609} 603}
610EXPORT_SYMBOL_GPL(set_cr3); 604EXPORT_SYMBOL_GPL(set_cr3);
611 605
612void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) 606void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
613{ 607{
614 if ( cr8 & CR8_RESEVED_BITS) { 608 if (cr8 & CR8_RESERVED_BITS) {
615 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); 609 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
616 inject_gp(vcpu); 610 inject_gp(vcpu);
617 return; 611 return;
618 } 612 }
619 vcpu->cr8 = cr8; 613 if (irqchip_in_kernel(vcpu->kvm))
614 kvm_lapic_set_tpr(vcpu, cr8);
615 else
616 vcpu->cr8 = cr8;
620} 617}
621EXPORT_SYMBOL_GPL(set_cr8); 618EXPORT_SYMBOL_GPL(set_cr8);
622 619
623void fx_init(struct kvm_vcpu *vcpu) 620unsigned long get_cr8(struct kvm_vcpu *vcpu)
621{
622 if (irqchip_in_kernel(vcpu->kvm))
623 return kvm_lapic_get_cr8(vcpu);
624 else
625 return vcpu->cr8;
626}
627EXPORT_SYMBOL_GPL(get_cr8);
628
629u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
624{ 630{
625 struct __attribute__ ((__packed__)) fx_image_s { 631 if (irqchip_in_kernel(vcpu->kvm))
626 u16 control; //fcw 632 return vcpu->apic_base;
627 u16 status; //fsw 633 else
628 u16 tag; // ftw 634 return vcpu->apic_base;
629 u16 opcode; //fop 635}
630 u64 ip; // fpu ip 636EXPORT_SYMBOL_GPL(kvm_get_apic_base);
631 u64 operand;// fpu dp
632 u32 mxcsr;
633 u32 mxcsr_mask;
634 637
635 } *fx_image; 638void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
639{
640 /* TODO: reserve bits check */
641 if (irqchip_in_kernel(vcpu->kvm))
642 kvm_lapic_set_base(vcpu, data);
643 else
644 vcpu->apic_base = data;
645}
646EXPORT_SYMBOL_GPL(kvm_set_apic_base);
647
648void fx_init(struct kvm_vcpu *vcpu)
649{
650 unsigned after_mxcsr_mask;
636 651
637 fx_save(vcpu->host_fx_image); 652 /* Initialize guest FPU by resetting ours and saving into guest's */
653 preempt_disable();
654 fx_save(&vcpu->host_fx_image);
638 fpu_init(); 655 fpu_init();
639 fx_save(vcpu->guest_fx_image); 656 fx_save(&vcpu->guest_fx_image);
640 fx_restore(vcpu->host_fx_image); 657 fx_restore(&vcpu->host_fx_image);
658 preempt_enable();
641 659
642 fx_image = (struct fx_image_s *)vcpu->guest_fx_image; 660 vcpu->cr0 |= X86_CR0_ET;
643 fx_image->mxcsr = 0x1f80; 661 after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space);
644 memset(vcpu->guest_fx_image + sizeof(struct fx_image_s), 662 vcpu->guest_fx_image.mxcsr = 0x1f80;
645 0, FX_IMAGE_SIZE - sizeof(struct fx_image_s)); 663 memset((void *)&vcpu->guest_fx_image + after_mxcsr_mask,
664 0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask);
646} 665}
647EXPORT_SYMBOL_GPL(fx_init); 666EXPORT_SYMBOL_GPL(fx_init);
648 667
@@ -661,7 +680,6 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
661 unsigned long i; 680 unsigned long i;
662 struct kvm_memory_slot *memslot; 681 struct kvm_memory_slot *memslot;
663 struct kvm_memory_slot old, new; 682 struct kvm_memory_slot old, new;
664 int memory_config_version;
665 683
666 r = -EINVAL; 684 r = -EINVAL;
667 /* General sanity checks */ 685 /* General sanity checks */
@@ -681,10 +699,8 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
681 if (!npages) 699 if (!npages)
682 mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; 700 mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
683 701
684raced: 702 mutex_lock(&kvm->lock);
685 spin_lock(&kvm->lock);
686 703
687 memory_config_version = kvm->memory_config_version;
688 new = old = *memslot; 704 new = old = *memslot;
689 705
690 new.base_gfn = base_gfn; 706 new.base_gfn = base_gfn;
@@ -707,11 +723,6 @@ raced:
707 (base_gfn >= s->base_gfn + s->npages))) 723 (base_gfn >= s->base_gfn + s->npages)))
708 goto out_unlock; 724 goto out_unlock;
709 } 725 }
710 /*
711 * Do memory allocations outside lock. memory_config_version will
712 * detect any races.
713 */
714 spin_unlock(&kvm->lock);
715 726
716 /* Deallocate if slot is being removed */ 727 /* Deallocate if slot is being removed */
717 if (!npages) 728 if (!npages)
@@ -728,14 +739,14 @@ raced:
728 new.phys_mem = vmalloc(npages * sizeof(struct page *)); 739 new.phys_mem = vmalloc(npages * sizeof(struct page *));
729 740
730 if (!new.phys_mem) 741 if (!new.phys_mem)
731 goto out_free; 742 goto out_unlock;
732 743
733 memset(new.phys_mem, 0, npages * sizeof(struct page *)); 744 memset(new.phys_mem, 0, npages * sizeof(struct page *));
734 for (i = 0; i < npages; ++i) { 745 for (i = 0; i < npages; ++i) {
735 new.phys_mem[i] = alloc_page(GFP_HIGHUSER 746 new.phys_mem[i] = alloc_page(GFP_HIGHUSER
736 | __GFP_ZERO); 747 | __GFP_ZERO);
737 if (!new.phys_mem[i]) 748 if (!new.phys_mem[i])
738 goto out_free; 749 goto out_unlock;
739 set_page_private(new.phys_mem[i],0); 750 set_page_private(new.phys_mem[i],0);
740 } 751 }
741 } 752 }
@@ -746,39 +757,25 @@ raced:
746 757
747 new.dirty_bitmap = vmalloc(dirty_bytes); 758 new.dirty_bitmap = vmalloc(dirty_bytes);
748 if (!new.dirty_bitmap) 759 if (!new.dirty_bitmap)
749 goto out_free; 760 goto out_unlock;
750 memset(new.dirty_bitmap, 0, dirty_bytes); 761 memset(new.dirty_bitmap, 0, dirty_bytes);
751 } 762 }
752 763
753 spin_lock(&kvm->lock);
754
755 if (memory_config_version != kvm->memory_config_version) {
756 spin_unlock(&kvm->lock);
757 kvm_free_physmem_slot(&new, &old);
758 goto raced;
759 }
760
761 r = -EAGAIN;
762 if (kvm->busy)
763 goto out_unlock;
764
765 if (mem->slot >= kvm->nmemslots) 764 if (mem->slot >= kvm->nmemslots)
766 kvm->nmemslots = mem->slot + 1; 765 kvm->nmemslots = mem->slot + 1;
767 766
768 *memslot = new; 767 *memslot = new;
769 ++kvm->memory_config_version;
770 768
771 kvm_mmu_slot_remove_write_access(kvm, mem->slot); 769 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
772 kvm_flush_remote_tlbs(kvm); 770 kvm_flush_remote_tlbs(kvm);
773 771
774 spin_unlock(&kvm->lock); 772 mutex_unlock(&kvm->lock);
775 773
776 kvm_free_physmem_slot(&old, &new); 774 kvm_free_physmem_slot(&old, &new);
777 return 0; 775 return 0;
778 776
779out_unlock: 777out_unlock:
780 spin_unlock(&kvm->lock); 778 mutex_unlock(&kvm->lock);
781out_free:
782 kvm_free_physmem_slot(&new, &old); 779 kvm_free_physmem_slot(&new, &old);
783out: 780out:
784 return r; 781 return r;
@@ -795,14 +792,8 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
795 int n; 792 int n;
796 unsigned long any = 0; 793 unsigned long any = 0;
797 794
798 spin_lock(&kvm->lock); 795 mutex_lock(&kvm->lock);
799 796
800 /*
801 * Prevent changes to guest memory configuration even while the lock
802 * is not taken.
803 */
804 ++kvm->busy;
805 spin_unlock(&kvm->lock);
806 r = -EINVAL; 797 r = -EINVAL;
807 if (log->slot >= KVM_MEMORY_SLOTS) 798 if (log->slot >= KVM_MEMORY_SLOTS)
808 goto out; 799 goto out;
@@ -821,18 +812,17 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
821 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) 812 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
822 goto out; 813 goto out;
823 814
824 spin_lock(&kvm->lock); 815 /* If nothing is dirty, don't bother messing with page tables. */
825 kvm_mmu_slot_remove_write_access(kvm, log->slot); 816 if (any) {
826 kvm_flush_remote_tlbs(kvm); 817 kvm_mmu_slot_remove_write_access(kvm, log->slot);
827 memset(memslot->dirty_bitmap, 0, n); 818 kvm_flush_remote_tlbs(kvm);
828 spin_unlock(&kvm->lock); 819 memset(memslot->dirty_bitmap, 0, n);
820 }
829 821
830 r = 0; 822 r = 0;
831 823
832out: 824out:
833 spin_lock(&kvm->lock); 825 mutex_unlock(&kvm->lock);
834 --kvm->busy;
835 spin_unlock(&kvm->lock);
836 return r; 826 return r;
837} 827}
838 828
@@ -862,7 +852,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
862 < alias->target_phys_addr) 852 < alias->target_phys_addr)
863 goto out; 853 goto out;
864 854
865 spin_lock(&kvm->lock); 855 mutex_lock(&kvm->lock);
866 856
867 p = &kvm->aliases[alias->slot]; 857 p = &kvm->aliases[alias->slot];
868 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; 858 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
@@ -876,7 +866,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
876 866
877 kvm_mmu_zap_all(kvm); 867 kvm_mmu_zap_all(kvm);
878 868
879 spin_unlock(&kvm->lock); 869 mutex_unlock(&kvm->lock);
880 870
881 return 0; 871 return 0;
882 872
@@ -884,6 +874,63 @@ out:
884 return r; 874 return r;
885} 875}
886 876
877static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
878{
879 int r;
880
881 r = 0;
882 switch (chip->chip_id) {
883 case KVM_IRQCHIP_PIC_MASTER:
884 memcpy (&chip->chip.pic,
885 &pic_irqchip(kvm)->pics[0],
886 sizeof(struct kvm_pic_state));
887 break;
888 case KVM_IRQCHIP_PIC_SLAVE:
889 memcpy (&chip->chip.pic,
890 &pic_irqchip(kvm)->pics[1],
891 sizeof(struct kvm_pic_state));
892 break;
893 case KVM_IRQCHIP_IOAPIC:
894 memcpy (&chip->chip.ioapic,
895 ioapic_irqchip(kvm),
896 sizeof(struct kvm_ioapic_state));
897 break;
898 default:
899 r = -EINVAL;
900 break;
901 }
902 return r;
903}
904
905static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
906{
907 int r;
908
909 r = 0;
910 switch (chip->chip_id) {
911 case KVM_IRQCHIP_PIC_MASTER:
912 memcpy (&pic_irqchip(kvm)->pics[0],
913 &chip->chip.pic,
914 sizeof(struct kvm_pic_state));
915 break;
916 case KVM_IRQCHIP_PIC_SLAVE:
917 memcpy (&pic_irqchip(kvm)->pics[1],
918 &chip->chip.pic,
919 sizeof(struct kvm_pic_state));
920 break;
921 case KVM_IRQCHIP_IOAPIC:
922 memcpy (ioapic_irqchip(kvm),
923 &chip->chip.ioapic,
924 sizeof(struct kvm_ioapic_state));
925 break;
926 default:
927 r = -EINVAL;
928 break;
929 }
930 kvm_pic_update_irq(pic_irqchip(kvm));
931 return r;
932}
933
887static gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) 934static gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
888{ 935{
889 int i; 936 int i;
@@ -930,37 +977,26 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
930} 977}
931EXPORT_SYMBOL_GPL(gfn_to_page); 978EXPORT_SYMBOL_GPL(gfn_to_page);
932 979
980/* WARNING: Does not work on aliased pages. */
933void mark_page_dirty(struct kvm *kvm, gfn_t gfn) 981void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
934{ 982{
935 int i;
936 struct kvm_memory_slot *memslot; 983 struct kvm_memory_slot *memslot;
937 unsigned long rel_gfn;
938 984
939 for (i = 0; i < kvm->nmemslots; ++i) { 985 memslot = __gfn_to_memslot(kvm, gfn);
940 memslot = &kvm->memslots[i]; 986 if (memslot && memslot->dirty_bitmap) {
941 987 unsigned long rel_gfn = gfn - memslot->base_gfn;
942 if (gfn >= memslot->base_gfn
943 && gfn < memslot->base_gfn + memslot->npages) {
944 988
945 if (!memslot->dirty_bitmap) 989 /* avoid RMW */
946 return; 990 if (!test_bit(rel_gfn, memslot->dirty_bitmap))
947 991 set_bit(rel_gfn, memslot->dirty_bitmap);
948 rel_gfn = gfn - memslot->base_gfn;
949
950 /* avoid RMW */
951 if (!test_bit(rel_gfn, memslot->dirty_bitmap))
952 set_bit(rel_gfn, memslot->dirty_bitmap);
953 return;
954 }
955 } 992 }
956} 993}
957 994
958static int emulator_read_std(unsigned long addr, 995int emulator_read_std(unsigned long addr,
959 void *val, 996 void *val,
960 unsigned int bytes, 997 unsigned int bytes,
961 struct x86_emulate_ctxt *ctxt) 998 struct kvm_vcpu *vcpu)
962{ 999{
963 struct kvm_vcpu *vcpu = ctxt->vcpu;
964 void *data = val; 1000 void *data = val;
965 1001
966 while (bytes) { 1002 while (bytes) {
@@ -990,26 +1026,42 @@ static int emulator_read_std(unsigned long addr,
990 1026
991 return X86EMUL_CONTINUE; 1027 return X86EMUL_CONTINUE;
992} 1028}
1029EXPORT_SYMBOL_GPL(emulator_read_std);
993 1030
994static int emulator_write_std(unsigned long addr, 1031static int emulator_write_std(unsigned long addr,
995 const void *val, 1032 const void *val,
996 unsigned int bytes, 1033 unsigned int bytes,
997 struct x86_emulate_ctxt *ctxt) 1034 struct kvm_vcpu *vcpu)
998{ 1035{
999 printk(KERN_ERR "emulator_write_std: addr %lx n %d\n", 1036 pr_unimpl(vcpu, "emulator_write_std: addr %lx n %d\n", addr, bytes);
1000 addr, bytes);
1001 return X86EMUL_UNHANDLEABLE; 1037 return X86EMUL_UNHANDLEABLE;
1002} 1038}
1003 1039
1040/*
1041 * Only apic need an MMIO device hook, so shortcut now..
1042 */
1043static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
1044 gpa_t addr)
1045{
1046 struct kvm_io_device *dev;
1047
1048 if (vcpu->apic) {
1049 dev = &vcpu->apic->dev;
1050 if (dev->in_range(dev, addr))
1051 return dev;
1052 }
1053 return NULL;
1054}
1055
1004static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, 1056static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
1005 gpa_t addr) 1057 gpa_t addr)
1006{ 1058{
1007 /* 1059 struct kvm_io_device *dev;
1008 * Note that its important to have this wrapper function because 1060
1009 * in the very near future we will be checking for MMIOs against 1061 dev = vcpu_find_pervcpu_dev(vcpu, addr);
1010 * the LAPIC as well as the general MMIO bus 1062 if (dev == NULL)
1011 */ 1063 dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
1012 return kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); 1064 return dev;
1013} 1065}
1014 1066
1015static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, 1067static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
@@ -1021,9 +1073,8 @@ static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
1021static int emulator_read_emulated(unsigned long addr, 1073static int emulator_read_emulated(unsigned long addr,
1022 void *val, 1074 void *val,
1023 unsigned int bytes, 1075 unsigned int bytes,
1024 struct x86_emulate_ctxt *ctxt) 1076 struct kvm_vcpu *vcpu)
1025{ 1077{
1026 struct kvm_vcpu *vcpu = ctxt->vcpu;
1027 struct kvm_io_device *mmio_dev; 1078 struct kvm_io_device *mmio_dev;
1028 gpa_t gpa; 1079 gpa_t gpa;
1029 1080
@@ -1031,7 +1082,7 @@ static int emulator_read_emulated(unsigned long addr,
1031 memcpy(val, vcpu->mmio_data, bytes); 1082 memcpy(val, vcpu->mmio_data, bytes);
1032 vcpu->mmio_read_completed = 0; 1083 vcpu->mmio_read_completed = 0;
1033 return X86EMUL_CONTINUE; 1084 return X86EMUL_CONTINUE;
1034 } else if (emulator_read_std(addr, val, bytes, ctxt) 1085 } else if (emulator_read_std(addr, val, bytes, vcpu)
1035 == X86EMUL_CONTINUE) 1086 == X86EMUL_CONTINUE)
1036 return X86EMUL_CONTINUE; 1087 return X86EMUL_CONTINUE;
1037 1088
@@ -1061,7 +1112,6 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
1061{ 1112{
1062 struct page *page; 1113 struct page *page;
1063 void *virt; 1114 void *virt;
1064 unsigned offset = offset_in_page(gpa);
1065 1115
1066 if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT)) 1116 if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT))
1067 return 0; 1117 return 0;
@@ -1070,7 +1120,7 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
1070 return 0; 1120 return 0;
1071 mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); 1121 mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT);
1072 virt = kmap_atomic(page, KM_USER0); 1122 virt = kmap_atomic(page, KM_USER0);
1073 kvm_mmu_pte_write(vcpu, gpa, virt + offset, val, bytes); 1123 kvm_mmu_pte_write(vcpu, gpa, val, bytes);
1074 memcpy(virt + offset_in_page(gpa), val, bytes); 1124 memcpy(virt + offset_in_page(gpa), val, bytes);
1075 kunmap_atomic(virt, KM_USER0); 1125 kunmap_atomic(virt, KM_USER0);
1076 return 1; 1126 return 1;
@@ -1079,14 +1129,13 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
1079static int emulator_write_emulated_onepage(unsigned long addr, 1129static int emulator_write_emulated_onepage(unsigned long addr,
1080 const void *val, 1130 const void *val,
1081 unsigned int bytes, 1131 unsigned int bytes,
1082 struct x86_emulate_ctxt *ctxt) 1132 struct kvm_vcpu *vcpu)
1083{ 1133{
1084 struct kvm_vcpu *vcpu = ctxt->vcpu;
1085 struct kvm_io_device *mmio_dev; 1134 struct kvm_io_device *mmio_dev;
1086 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); 1135 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
1087 1136
1088 if (gpa == UNMAPPED_GVA) { 1137 if (gpa == UNMAPPED_GVA) {
1089 kvm_arch_ops->inject_page_fault(vcpu, addr, 2); 1138 kvm_x86_ops->inject_page_fault(vcpu, addr, 2);
1090 return X86EMUL_PROPAGATE_FAULT; 1139 return X86EMUL_PROPAGATE_FAULT;
1091 } 1140 }
1092 1141
@@ -1111,31 +1160,32 @@ static int emulator_write_emulated_onepage(unsigned long addr,
1111 return X86EMUL_CONTINUE; 1160 return X86EMUL_CONTINUE;
1112} 1161}
1113 1162
1114static int emulator_write_emulated(unsigned long addr, 1163int emulator_write_emulated(unsigned long addr,
1115 const void *val, 1164 const void *val,
1116 unsigned int bytes, 1165 unsigned int bytes,
1117 struct x86_emulate_ctxt *ctxt) 1166 struct kvm_vcpu *vcpu)
1118{ 1167{
1119 /* Crossing a page boundary? */ 1168 /* Crossing a page boundary? */
1120 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { 1169 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
1121 int rc, now; 1170 int rc, now;
1122 1171
1123 now = -addr & ~PAGE_MASK; 1172 now = -addr & ~PAGE_MASK;
1124 rc = emulator_write_emulated_onepage(addr, val, now, ctxt); 1173 rc = emulator_write_emulated_onepage(addr, val, now, vcpu);
1125 if (rc != X86EMUL_CONTINUE) 1174 if (rc != X86EMUL_CONTINUE)
1126 return rc; 1175 return rc;
1127 addr += now; 1176 addr += now;
1128 val += now; 1177 val += now;
1129 bytes -= now; 1178 bytes -= now;
1130 } 1179 }
1131 return emulator_write_emulated_onepage(addr, val, bytes, ctxt); 1180 return emulator_write_emulated_onepage(addr, val, bytes, vcpu);
1132} 1181}
1182EXPORT_SYMBOL_GPL(emulator_write_emulated);
1133 1183
1134static int emulator_cmpxchg_emulated(unsigned long addr, 1184static int emulator_cmpxchg_emulated(unsigned long addr,
1135 const void *old, 1185 const void *old,
1136 const void *new, 1186 const void *new,
1137 unsigned int bytes, 1187 unsigned int bytes,
1138 struct x86_emulate_ctxt *ctxt) 1188 struct kvm_vcpu *vcpu)
1139{ 1189{
1140 static int reported; 1190 static int reported;
1141 1191
@@ -1143,12 +1193,12 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
1143 reported = 1; 1193 reported = 1;
1144 printk(KERN_WARNING "kvm: emulating exchange as write\n"); 1194 printk(KERN_WARNING "kvm: emulating exchange as write\n");
1145 } 1195 }
1146 return emulator_write_emulated(addr, new, bytes, ctxt); 1196 return emulator_write_emulated(addr, new, bytes, vcpu);
1147} 1197}
1148 1198
1149static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) 1199static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
1150{ 1200{
1151 return kvm_arch_ops->get_segment_base(vcpu, seg); 1201 return kvm_x86_ops->get_segment_base(vcpu, seg);
1152} 1202}
1153 1203
1154int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) 1204int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
@@ -1158,10 +1208,8 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
1158 1208
1159int emulate_clts(struct kvm_vcpu *vcpu) 1209int emulate_clts(struct kvm_vcpu *vcpu)
1160{ 1210{
1161 unsigned long cr0; 1211 vcpu->cr0 &= ~X86_CR0_TS;
1162 1212 kvm_x86_ops->set_cr0(vcpu, vcpu->cr0);
1163 cr0 = vcpu->cr0 & ~CR0_TS_MASK;
1164 kvm_arch_ops->set_cr0(vcpu, cr0);
1165 return X86EMUL_CONTINUE; 1213 return X86EMUL_CONTINUE;
1166} 1214}
1167 1215
@@ -1171,11 +1219,10 @@ int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, unsigned long *dest)
1171 1219
1172 switch (dr) { 1220 switch (dr) {
1173 case 0 ... 3: 1221 case 0 ... 3:
1174 *dest = kvm_arch_ops->get_dr(vcpu, dr); 1222 *dest = kvm_x86_ops->get_dr(vcpu, dr);
1175 return X86EMUL_CONTINUE; 1223 return X86EMUL_CONTINUE;
1176 default: 1224 default:
1177 printk(KERN_DEBUG "%s: unexpected dr %u\n", 1225 pr_unimpl(vcpu, "%s: unexpected dr %u\n", __FUNCTION__, dr);
1178 __FUNCTION__, dr);
1179 return X86EMUL_UNHANDLEABLE; 1226 return X86EMUL_UNHANDLEABLE;
1180 } 1227 }
1181} 1228}
@@ -1185,7 +1232,7 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
1185 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; 1232 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
1186 int exception; 1233 int exception;
1187 1234
1188 kvm_arch_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception); 1235 kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception);
1189 if (exception) { 1236 if (exception) {
1190 /* FIXME: better handling */ 1237 /* FIXME: better handling */
1191 return X86EMUL_UNHANDLEABLE; 1238 return X86EMUL_UNHANDLEABLE;
@@ -1193,25 +1240,25 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
1193 return X86EMUL_CONTINUE; 1240 return X86EMUL_CONTINUE;
1194} 1241}
1195 1242
1196static void report_emulation_failure(struct x86_emulate_ctxt *ctxt) 1243void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
1197{ 1244{
1198 static int reported; 1245 static int reported;
1199 u8 opcodes[4]; 1246 u8 opcodes[4];
1200 unsigned long rip = ctxt->vcpu->rip; 1247 unsigned long rip = vcpu->rip;
1201 unsigned long rip_linear; 1248 unsigned long rip_linear;
1202 1249
1203 rip_linear = rip + get_segment_base(ctxt->vcpu, VCPU_SREG_CS); 1250 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
1204 1251
1205 if (reported) 1252 if (reported)
1206 return; 1253 return;
1207 1254
1208 emulator_read_std(rip_linear, (void *)opcodes, 4, ctxt); 1255 emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);
1209 1256
1210 printk(KERN_ERR "emulation failed but !mmio_needed?" 1257 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
1211 " rip %lx %02x %02x %02x %02x\n", 1258 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
1212 rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
1213 reported = 1; 1259 reported = 1;
1214} 1260}
1261EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
1215 1262
1216struct x86_emulate_ops emulate_ops = { 1263struct x86_emulate_ops emulate_ops = {
1217 .read_std = emulator_read_std, 1264 .read_std = emulator_read_std,
@@ -1231,12 +1278,12 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
1231 int cs_db, cs_l; 1278 int cs_db, cs_l;
1232 1279
1233 vcpu->mmio_fault_cr2 = cr2; 1280 vcpu->mmio_fault_cr2 = cr2;
1234 kvm_arch_ops->cache_regs(vcpu); 1281 kvm_x86_ops->cache_regs(vcpu);
1235 1282
1236 kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 1283 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
1237 1284
1238 emulate_ctxt.vcpu = vcpu; 1285 emulate_ctxt.vcpu = vcpu;
1239 emulate_ctxt.eflags = kvm_arch_ops->get_rflags(vcpu); 1286 emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
1240 emulate_ctxt.cr2 = cr2; 1287 emulate_ctxt.cr2 = cr2;
1241 emulate_ctxt.mode = (emulate_ctxt.eflags & X86_EFLAGS_VM) 1288 emulate_ctxt.mode = (emulate_ctxt.eflags & X86_EFLAGS_VM)
1242 ? X86EMUL_MODE_REAL : cs_l 1289 ? X86EMUL_MODE_REAL : cs_l
@@ -1259,9 +1306,13 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
1259 emulate_ctxt.fs_base = get_segment_base(vcpu, VCPU_SREG_FS); 1306 emulate_ctxt.fs_base = get_segment_base(vcpu, VCPU_SREG_FS);
1260 1307
1261 vcpu->mmio_is_write = 0; 1308 vcpu->mmio_is_write = 0;
1309 vcpu->pio.string = 0;
1262 r = x86_emulate_memop(&emulate_ctxt, &emulate_ops); 1310 r = x86_emulate_memop(&emulate_ctxt, &emulate_ops);
1311 if (vcpu->pio.string)
1312 return EMULATE_DO_MMIO;
1263 1313
1264 if ((r || vcpu->mmio_is_write) && run) { 1314 if ((r || vcpu->mmio_is_write) && run) {
1315 run->exit_reason = KVM_EXIT_MMIO;
1265 run->mmio.phys_addr = vcpu->mmio_phys_addr; 1316 run->mmio.phys_addr = vcpu->mmio_phys_addr;
1266 memcpy(run->mmio.data, vcpu->mmio_data, 8); 1317 memcpy(run->mmio.data, vcpu->mmio_data, 8);
1267 run->mmio.len = vcpu->mmio_size; 1318 run->mmio.len = vcpu->mmio_size;
@@ -1272,14 +1323,14 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
1272 if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) 1323 if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
1273 return EMULATE_DONE; 1324 return EMULATE_DONE;
1274 if (!vcpu->mmio_needed) { 1325 if (!vcpu->mmio_needed) {
1275 report_emulation_failure(&emulate_ctxt); 1326 kvm_report_emulation_failure(vcpu, "mmio");
1276 return EMULATE_FAIL; 1327 return EMULATE_FAIL;
1277 } 1328 }
1278 return EMULATE_DO_MMIO; 1329 return EMULATE_DO_MMIO;
1279 } 1330 }
1280 1331
1281 kvm_arch_ops->decache_regs(vcpu); 1332 kvm_x86_ops->decache_regs(vcpu);
1282 kvm_arch_ops->set_rflags(vcpu, emulate_ctxt.eflags); 1333 kvm_x86_ops->set_rflags(vcpu, emulate_ctxt.eflags);
1283 1334
1284 if (vcpu->mmio_is_write) { 1335 if (vcpu->mmio_is_write) {
1285 vcpu->mmio_needed = 0; 1336 vcpu->mmio_needed = 0;
@@ -1290,14 +1341,45 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
1290} 1341}
1291EXPORT_SYMBOL_GPL(emulate_instruction); 1342EXPORT_SYMBOL_GPL(emulate_instruction);
1292 1343
1293int kvm_emulate_halt(struct kvm_vcpu *vcpu) 1344/*
1345 * The vCPU has executed a HLT instruction with in-kernel mode enabled.
1346 */
1347static void kvm_vcpu_block(struct kvm_vcpu *vcpu)
1294{ 1348{
1295 if (vcpu->irq_summary) 1349 DECLARE_WAITQUEUE(wait, current);
1296 return 1;
1297 1350
1298 vcpu->run->exit_reason = KVM_EXIT_HLT; 1351 add_wait_queue(&vcpu->wq, &wait);
1352
1353 /*
1354 * We will block until either an interrupt or a signal wakes us up
1355 */
1356 while (!kvm_cpu_has_interrupt(vcpu)
1357 && !signal_pending(current)
1358 && vcpu->mp_state != VCPU_MP_STATE_RUNNABLE
1359 && vcpu->mp_state != VCPU_MP_STATE_SIPI_RECEIVED) {
1360 set_current_state(TASK_INTERRUPTIBLE);
1361 vcpu_put(vcpu);
1362 schedule();
1363 vcpu_load(vcpu);
1364 }
1365
1366 __set_current_state(TASK_RUNNING);
1367 remove_wait_queue(&vcpu->wq, &wait);
1368}
1369
1370int kvm_emulate_halt(struct kvm_vcpu *vcpu)
1371{
1299 ++vcpu->stat.halt_exits; 1372 ++vcpu->stat.halt_exits;
1300 return 0; 1373 if (irqchip_in_kernel(vcpu->kvm)) {
1374 vcpu->mp_state = VCPU_MP_STATE_HALTED;
1375 kvm_vcpu_block(vcpu);
1376 if (vcpu->mp_state != VCPU_MP_STATE_RUNNABLE)
1377 return -EINTR;
1378 return 1;
1379 } else {
1380 vcpu->run->exit_reason = KVM_EXIT_HLT;
1381 return 0;
1382 }
1301} 1383}
1302EXPORT_SYMBOL_GPL(kvm_emulate_halt); 1384EXPORT_SYMBOL_GPL(kvm_emulate_halt);
1303 1385
@@ -1305,7 +1387,7 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
1305{ 1387{
1306 unsigned long nr, a0, a1, a2, a3, a4, a5, ret; 1388 unsigned long nr, a0, a1, a2, a3, a4, a5, ret;
1307 1389
1308 kvm_arch_ops->cache_regs(vcpu); 1390 kvm_x86_ops->cache_regs(vcpu);
1309 ret = -KVM_EINVAL; 1391 ret = -KVM_EINVAL;
1310#ifdef CONFIG_X86_64 1392#ifdef CONFIG_X86_64
1311 if (is_long_mode(vcpu)) { 1393 if (is_long_mode(vcpu)) {
@@ -1329,6 +1411,7 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
1329 } 1411 }
1330 switch (nr) { 1412 switch (nr) {
1331 default: 1413 default:
1414 run->hypercall.nr = nr;
1332 run->hypercall.args[0] = a0; 1415 run->hypercall.args[0] = a0;
1333 run->hypercall.args[1] = a1; 1416 run->hypercall.args[1] = a1;
1334 run->hypercall.args[2] = a2; 1417 run->hypercall.args[2] = a2;
@@ -1337,11 +1420,11 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
1337 run->hypercall.args[5] = a5; 1420 run->hypercall.args[5] = a5;
1338 run->hypercall.ret = ret; 1421 run->hypercall.ret = ret;
1339 run->hypercall.longmode = is_long_mode(vcpu); 1422 run->hypercall.longmode = is_long_mode(vcpu);
1340 kvm_arch_ops->decache_regs(vcpu); 1423 kvm_x86_ops->decache_regs(vcpu);
1341 return 0; 1424 return 0;
1342 } 1425 }
1343 vcpu->regs[VCPU_REGS_RAX] = ret; 1426 vcpu->regs[VCPU_REGS_RAX] = ret;
1344 kvm_arch_ops->decache_regs(vcpu); 1427 kvm_x86_ops->decache_regs(vcpu);
1345 return 1; 1428 return 1;
1346} 1429}
1347EXPORT_SYMBOL_GPL(kvm_hypercall); 1430EXPORT_SYMBOL_GPL(kvm_hypercall);
@@ -1355,26 +1438,26 @@ void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
1355{ 1438{
1356 struct descriptor_table dt = { limit, base }; 1439 struct descriptor_table dt = { limit, base };
1357 1440
1358 kvm_arch_ops->set_gdt(vcpu, &dt); 1441 kvm_x86_ops->set_gdt(vcpu, &dt);
1359} 1442}
1360 1443
1361void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) 1444void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
1362{ 1445{
1363 struct descriptor_table dt = { limit, base }; 1446 struct descriptor_table dt = { limit, base };
1364 1447
1365 kvm_arch_ops->set_idt(vcpu, &dt); 1448 kvm_x86_ops->set_idt(vcpu, &dt);
1366} 1449}
1367 1450
1368void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, 1451void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
1369 unsigned long *rflags) 1452 unsigned long *rflags)
1370{ 1453{
1371 lmsw(vcpu, msw); 1454 lmsw(vcpu, msw);
1372 *rflags = kvm_arch_ops->get_rflags(vcpu); 1455 *rflags = kvm_x86_ops->get_rflags(vcpu);
1373} 1456}
1374 1457
1375unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) 1458unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
1376{ 1459{
1377 kvm_arch_ops->decache_cr4_guest_bits(vcpu); 1460 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
1378 switch (cr) { 1461 switch (cr) {
1379 case 0: 1462 case 0:
1380 return vcpu->cr0; 1463 return vcpu->cr0;
@@ -1396,7 +1479,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
1396 switch (cr) { 1479 switch (cr) {
1397 case 0: 1480 case 0:
1398 set_cr0(vcpu, mk_cr_64(vcpu->cr0, val)); 1481 set_cr0(vcpu, mk_cr_64(vcpu->cr0, val));
1399 *rflags = kvm_arch_ops->get_rflags(vcpu); 1482 *rflags = kvm_x86_ops->get_rflags(vcpu);
1400 break; 1483 break;
1401 case 2: 1484 case 2:
1402 vcpu->cr2 = val; 1485 vcpu->cr2 = val;
@@ -1439,7 +1522,7 @@ static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa)
1439 1522
1440 mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT); 1523 mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT);
1441 para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT); 1524 para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT);
1442 para_state = kmap_atomic(para_state_page, KM_USER0); 1525 para_state = kmap(para_state_page);
1443 1526
1444 printk(KERN_DEBUG ".... guest version: %d\n", para_state->guest_version); 1527 printk(KERN_DEBUG ".... guest version: %d\n", para_state->guest_version);
1445 printk(KERN_DEBUG ".... size: %d\n", para_state->size); 1528 printk(KERN_DEBUG ".... size: %d\n", para_state->size);
@@ -1470,12 +1553,12 @@ static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa)
1470 mark_page_dirty(vcpu->kvm, hypercall_gpa >> PAGE_SHIFT); 1553 mark_page_dirty(vcpu->kvm, hypercall_gpa >> PAGE_SHIFT);
1471 hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT), 1554 hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT),
1472 KM_USER1) + (hypercall_hpa & ~PAGE_MASK); 1555 KM_USER1) + (hypercall_hpa & ~PAGE_MASK);
1473 kvm_arch_ops->patch_hypercall(vcpu, hypercall); 1556 kvm_x86_ops->patch_hypercall(vcpu, hypercall);
1474 kunmap_atomic(hypercall, KM_USER1); 1557 kunmap_atomic(hypercall, KM_USER1);
1475 1558
1476 para_state->ret = 0; 1559 para_state->ret = 0;
1477err_kunmap_skip: 1560err_kunmap_skip:
1478 kunmap_atomic(para_state, KM_USER0); 1561 kunmap(para_state_page);
1479 return 0; 1562 return 0;
1480err_gp: 1563err_gp:
1481 return 1; 1564 return 1;
@@ -1511,7 +1594,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1511 data = 3; 1594 data = 3;
1512 break; 1595 break;
1513 case MSR_IA32_APICBASE: 1596 case MSR_IA32_APICBASE:
1514 data = vcpu->apic_base; 1597 data = kvm_get_apic_base(vcpu);
1515 break; 1598 break;
1516 case MSR_IA32_MISC_ENABLE: 1599 case MSR_IA32_MISC_ENABLE:
1517 data = vcpu->ia32_misc_enable_msr; 1600 data = vcpu->ia32_misc_enable_msr;
@@ -1522,7 +1605,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1522 break; 1605 break;
1523#endif 1606#endif
1524 default: 1607 default:
1525 printk(KERN_ERR "kvm: unhandled rdmsr: 0x%x\n", msr); 1608 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
1526 return 1; 1609 return 1;
1527 } 1610 }
1528 *pdata = data; 1611 *pdata = data;
@@ -1537,7 +1620,7 @@ EXPORT_SYMBOL_GPL(kvm_get_msr_common);
1537 */ 1620 */
1538int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) 1621int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
1539{ 1622{
1540 return kvm_arch_ops->get_msr(vcpu, msr_index, pdata); 1623 return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
1541} 1624}
1542 1625
1543#ifdef CONFIG_X86_64 1626#ifdef CONFIG_X86_64
@@ -1558,7 +1641,7 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
1558 return; 1641 return;
1559 } 1642 }
1560 1643
1561 kvm_arch_ops->set_efer(vcpu, efer); 1644 kvm_x86_ops->set_efer(vcpu, efer);
1562 1645
1563 efer &= ~EFER_LMA; 1646 efer &= ~EFER_LMA;
1564 efer |= vcpu->shadow_efer & EFER_LMA; 1647 efer |= vcpu->shadow_efer & EFER_LMA;
@@ -1577,11 +1660,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1577 break; 1660 break;
1578#endif 1661#endif
1579 case MSR_IA32_MC0_STATUS: 1662 case MSR_IA32_MC0_STATUS:
1580 printk(KERN_WARNING "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", 1663 pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
1581 __FUNCTION__, data); 1664 __FUNCTION__, data);
1582 break; 1665 break;
1583 case MSR_IA32_MCG_STATUS: 1666 case MSR_IA32_MCG_STATUS:
1584 printk(KERN_WARNING "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", 1667 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
1585 __FUNCTION__, data); 1668 __FUNCTION__, data);
1586 break; 1669 break;
1587 case MSR_IA32_UCODE_REV: 1670 case MSR_IA32_UCODE_REV:
@@ -1589,7 +1672,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1589 case 0x200 ... 0x2ff: /* MTRRs */ 1672 case 0x200 ... 0x2ff: /* MTRRs */
1590 break; 1673 break;
1591 case MSR_IA32_APICBASE: 1674 case MSR_IA32_APICBASE:
1592 vcpu->apic_base = data; 1675 kvm_set_apic_base(vcpu, data);
1593 break; 1676 break;
1594 case MSR_IA32_MISC_ENABLE: 1677 case MSR_IA32_MISC_ENABLE:
1595 vcpu->ia32_misc_enable_msr = data; 1678 vcpu->ia32_misc_enable_msr = data;
@@ -1601,7 +1684,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1601 return vcpu_register_para(vcpu, data); 1684 return vcpu_register_para(vcpu, data);
1602 1685
1603 default: 1686 default:
1604 printk(KERN_ERR "kvm: unhandled wrmsr: 0x%x\n", msr); 1687 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr);
1605 return 1; 1688 return 1;
1606 } 1689 }
1607 return 0; 1690 return 0;
@@ -1615,44 +1698,24 @@ EXPORT_SYMBOL_GPL(kvm_set_msr_common);
1615 */ 1698 */
1616int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) 1699int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1617{ 1700{
1618 return kvm_arch_ops->set_msr(vcpu, msr_index, data); 1701 return kvm_x86_ops->set_msr(vcpu, msr_index, data);
1619} 1702}
1620 1703
1621void kvm_resched(struct kvm_vcpu *vcpu) 1704void kvm_resched(struct kvm_vcpu *vcpu)
1622{ 1705{
1623 if (!need_resched()) 1706 if (!need_resched())
1624 return; 1707 return;
1625 vcpu_put(vcpu);
1626 cond_resched(); 1708 cond_resched();
1627 vcpu_load(vcpu);
1628} 1709}
1629EXPORT_SYMBOL_GPL(kvm_resched); 1710EXPORT_SYMBOL_GPL(kvm_resched);
1630 1711
1631void load_msrs(struct vmx_msr_entry *e, int n)
1632{
1633 int i;
1634
1635 for (i = 0; i < n; ++i)
1636 wrmsrl(e[i].index, e[i].data);
1637}
1638EXPORT_SYMBOL_GPL(load_msrs);
1639
1640void save_msrs(struct vmx_msr_entry *e, int n)
1641{
1642 int i;
1643
1644 for (i = 0; i < n; ++i)
1645 rdmsrl(e[i].index, e[i].data);
1646}
1647EXPORT_SYMBOL_GPL(save_msrs);
1648
1649void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) 1712void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
1650{ 1713{
1651 int i; 1714 int i;
1652 u32 function; 1715 u32 function;
1653 struct kvm_cpuid_entry *e, *best; 1716 struct kvm_cpuid_entry *e, *best;
1654 1717
1655 kvm_arch_ops->cache_regs(vcpu); 1718 kvm_x86_ops->cache_regs(vcpu);
1656 function = vcpu->regs[VCPU_REGS_RAX]; 1719 function = vcpu->regs[VCPU_REGS_RAX];
1657 vcpu->regs[VCPU_REGS_RAX] = 0; 1720 vcpu->regs[VCPU_REGS_RAX] = 0;
1658 vcpu->regs[VCPU_REGS_RBX] = 0; 1721 vcpu->regs[VCPU_REGS_RBX] = 0;
@@ -1678,8 +1741,8 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
1678 vcpu->regs[VCPU_REGS_RCX] = best->ecx; 1741 vcpu->regs[VCPU_REGS_RCX] = best->ecx;
1679 vcpu->regs[VCPU_REGS_RDX] = best->edx; 1742 vcpu->regs[VCPU_REGS_RDX] = best->edx;
1680 } 1743 }
1681 kvm_arch_ops->decache_regs(vcpu); 1744 kvm_x86_ops->decache_regs(vcpu);
1682 kvm_arch_ops->skip_emulated_instruction(vcpu); 1745 kvm_x86_ops->skip_emulated_instruction(vcpu);
1683} 1746}
1684EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); 1747EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
1685 1748
@@ -1690,11 +1753,9 @@ static int pio_copy_data(struct kvm_vcpu *vcpu)
1690 unsigned bytes; 1753 unsigned bytes;
1691 int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1; 1754 int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1;
1692 1755
1693 kvm_arch_ops->vcpu_put(vcpu);
1694 q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE, 1756 q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
1695 PAGE_KERNEL); 1757 PAGE_KERNEL);
1696 if (!q) { 1758 if (!q) {
1697 kvm_arch_ops->vcpu_load(vcpu);
1698 free_pio_guest_pages(vcpu); 1759 free_pio_guest_pages(vcpu);
1699 return -ENOMEM; 1760 return -ENOMEM;
1700 } 1761 }
@@ -1706,7 +1767,6 @@ static int pio_copy_data(struct kvm_vcpu *vcpu)
1706 memcpy(p, q, bytes); 1767 memcpy(p, q, bytes);
1707 q -= vcpu->pio.guest_page_offset; 1768 q -= vcpu->pio.guest_page_offset;
1708 vunmap(q); 1769 vunmap(q);
1709 kvm_arch_ops->vcpu_load(vcpu);
1710 free_pio_guest_pages(vcpu); 1770 free_pio_guest_pages(vcpu);
1711 return 0; 1771 return 0;
1712} 1772}
@@ -1717,7 +1777,7 @@ static int complete_pio(struct kvm_vcpu *vcpu)
1717 long delta; 1777 long delta;
1718 int r; 1778 int r;
1719 1779
1720 kvm_arch_ops->cache_regs(vcpu); 1780 kvm_x86_ops->cache_regs(vcpu);
1721 1781
1722 if (!io->string) { 1782 if (!io->string) {
1723 if (io->in) 1783 if (io->in)
@@ -1727,7 +1787,7 @@ static int complete_pio(struct kvm_vcpu *vcpu)
1727 if (io->in) { 1787 if (io->in) {
1728 r = pio_copy_data(vcpu); 1788 r = pio_copy_data(vcpu);
1729 if (r) { 1789 if (r) {
1730 kvm_arch_ops->cache_regs(vcpu); 1790 kvm_x86_ops->cache_regs(vcpu);
1731 return r; 1791 return r;
1732 } 1792 }
1733 } 1793 }
@@ -1750,79 +1810,109 @@ static int complete_pio(struct kvm_vcpu *vcpu)
1750 vcpu->regs[VCPU_REGS_RSI] += delta; 1810 vcpu->regs[VCPU_REGS_RSI] += delta;
1751 } 1811 }
1752 1812
1753 kvm_arch_ops->decache_regs(vcpu); 1813 kvm_x86_ops->decache_regs(vcpu);
1754 1814
1755 io->count -= io->cur_count; 1815 io->count -= io->cur_count;
1756 io->cur_count = 0; 1816 io->cur_count = 0;
1757 1817
1758 if (!io->count)
1759 kvm_arch_ops->skip_emulated_instruction(vcpu);
1760 return 0; 1818 return 0;
1761} 1819}
1762 1820
1763void kernel_pio(struct kvm_io_device *pio_dev, struct kvm_vcpu *vcpu) 1821static void kernel_pio(struct kvm_io_device *pio_dev,
1822 struct kvm_vcpu *vcpu,
1823 void *pd)
1764{ 1824{
1765 /* TODO: String I/O for in kernel device */ 1825 /* TODO: String I/O for in kernel device */
1766 1826
1827 mutex_lock(&vcpu->kvm->lock);
1767 if (vcpu->pio.in) 1828 if (vcpu->pio.in)
1768 kvm_iodevice_read(pio_dev, vcpu->pio.port, 1829 kvm_iodevice_read(pio_dev, vcpu->pio.port,
1769 vcpu->pio.size, 1830 vcpu->pio.size,
1770 vcpu->pio_data); 1831 pd);
1771 else 1832 else
1772 kvm_iodevice_write(pio_dev, vcpu->pio.port, 1833 kvm_iodevice_write(pio_dev, vcpu->pio.port,
1773 vcpu->pio.size, 1834 vcpu->pio.size,
1774 vcpu->pio_data); 1835 pd);
1836 mutex_unlock(&vcpu->kvm->lock);
1837}
1838
1839static void pio_string_write(struct kvm_io_device *pio_dev,
1840 struct kvm_vcpu *vcpu)
1841{
1842 struct kvm_pio_request *io = &vcpu->pio;
1843 void *pd = vcpu->pio_data;
1844 int i;
1845
1846 mutex_lock(&vcpu->kvm->lock);
1847 for (i = 0; i < io->cur_count; i++) {
1848 kvm_iodevice_write(pio_dev, io->port,
1849 io->size,
1850 pd);
1851 pd += io->size;
1852 }
1853 mutex_unlock(&vcpu->kvm->lock);
1775} 1854}
1776 1855
1777int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, 1856int kvm_emulate_pio (struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
1778 int size, unsigned long count, int string, int down, 1857 int size, unsigned port)
1858{
1859 struct kvm_io_device *pio_dev;
1860
1861 vcpu->run->exit_reason = KVM_EXIT_IO;
1862 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
1863 vcpu->run->io.size = vcpu->pio.size = size;
1864 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
1865 vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = 1;
1866 vcpu->run->io.port = vcpu->pio.port = port;
1867 vcpu->pio.in = in;
1868 vcpu->pio.string = 0;
1869 vcpu->pio.down = 0;
1870 vcpu->pio.guest_page_offset = 0;
1871 vcpu->pio.rep = 0;
1872
1873 kvm_x86_ops->cache_regs(vcpu);
1874 memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4);
1875 kvm_x86_ops->decache_regs(vcpu);
1876
1877 kvm_x86_ops->skip_emulated_instruction(vcpu);
1878
1879 pio_dev = vcpu_find_pio_dev(vcpu, port);
1880 if (pio_dev) {
1881 kernel_pio(pio_dev, vcpu, vcpu->pio_data);
1882 complete_pio(vcpu);
1883 return 1;
1884 }
1885 return 0;
1886}
1887EXPORT_SYMBOL_GPL(kvm_emulate_pio);
1888
1889int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
1890 int size, unsigned long count, int down,
1779 gva_t address, int rep, unsigned port) 1891 gva_t address, int rep, unsigned port)
1780{ 1892{
1781 unsigned now, in_page; 1893 unsigned now, in_page;
1782 int i; 1894 int i, ret = 0;
1783 int nr_pages = 1; 1895 int nr_pages = 1;
1784 struct page *page; 1896 struct page *page;
1785 struct kvm_io_device *pio_dev; 1897 struct kvm_io_device *pio_dev;
1786 1898
1787 vcpu->run->exit_reason = KVM_EXIT_IO; 1899 vcpu->run->exit_reason = KVM_EXIT_IO;
1788 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; 1900 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
1789 vcpu->run->io.size = size; 1901 vcpu->run->io.size = vcpu->pio.size = size;
1790 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; 1902 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
1791 vcpu->run->io.count = count; 1903 vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = count;
1792 vcpu->run->io.port = port; 1904 vcpu->run->io.port = vcpu->pio.port = port;
1793 vcpu->pio.count = count;
1794 vcpu->pio.cur_count = count;
1795 vcpu->pio.size = size;
1796 vcpu->pio.in = in; 1905 vcpu->pio.in = in;
1797 vcpu->pio.port = port; 1906 vcpu->pio.string = 1;
1798 vcpu->pio.string = string;
1799 vcpu->pio.down = down; 1907 vcpu->pio.down = down;
1800 vcpu->pio.guest_page_offset = offset_in_page(address); 1908 vcpu->pio.guest_page_offset = offset_in_page(address);
1801 vcpu->pio.rep = rep; 1909 vcpu->pio.rep = rep;
1802 1910
1803 pio_dev = vcpu_find_pio_dev(vcpu, port);
1804 if (!string) {
1805 kvm_arch_ops->cache_regs(vcpu);
1806 memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4);
1807 kvm_arch_ops->decache_regs(vcpu);
1808 if (pio_dev) {
1809 kernel_pio(pio_dev, vcpu);
1810 complete_pio(vcpu);
1811 return 1;
1812 }
1813 return 0;
1814 }
1815 /* TODO: String I/O for in kernel device */
1816 if (pio_dev)
1817 printk(KERN_ERR "kvm_setup_pio: no string io support\n");
1818
1819 if (!count) { 1911 if (!count) {
1820 kvm_arch_ops->skip_emulated_instruction(vcpu); 1912 kvm_x86_ops->skip_emulated_instruction(vcpu);
1821 return 1; 1913 return 1;
1822 } 1914 }
1823 1915
1824 now = min(count, PAGE_SIZE / size);
1825
1826 if (!down) 1916 if (!down)
1827 in_page = PAGE_SIZE - offset_in_page(address); 1917 in_page = PAGE_SIZE - offset_in_page(address);
1828 else 1918 else
@@ -1841,20 +1931,23 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
1841 /* 1931 /*
1842 * String I/O in reverse. Yuck. Kill the guest, fix later. 1932 * String I/O in reverse. Yuck. Kill the guest, fix later.
1843 */ 1933 */
1844 printk(KERN_ERR "kvm: guest string pio down\n"); 1934 pr_unimpl(vcpu, "guest string pio down\n");
1845 inject_gp(vcpu); 1935 inject_gp(vcpu);
1846 return 1; 1936 return 1;
1847 } 1937 }
1848 vcpu->run->io.count = now; 1938 vcpu->run->io.count = now;
1849 vcpu->pio.cur_count = now; 1939 vcpu->pio.cur_count = now;
1850 1940
1941 if (vcpu->pio.cur_count == vcpu->pio.count)
1942 kvm_x86_ops->skip_emulated_instruction(vcpu);
1943
1851 for (i = 0; i < nr_pages; ++i) { 1944 for (i = 0; i < nr_pages; ++i) {
1852 spin_lock(&vcpu->kvm->lock); 1945 mutex_lock(&vcpu->kvm->lock);
1853 page = gva_to_page(vcpu, address + i * PAGE_SIZE); 1946 page = gva_to_page(vcpu, address + i * PAGE_SIZE);
1854 if (page) 1947 if (page)
1855 get_page(page); 1948 get_page(page);
1856 vcpu->pio.guest_pages[i] = page; 1949 vcpu->pio.guest_pages[i] = page;
1857 spin_unlock(&vcpu->kvm->lock); 1950 mutex_unlock(&vcpu->kvm->lock);
1858 if (!page) { 1951 if (!page) {
1859 inject_gp(vcpu); 1952 inject_gp(vcpu);
1860 free_pio_guest_pages(vcpu); 1953 free_pio_guest_pages(vcpu);
@@ -1862,11 +1955,145 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
1862 } 1955 }
1863 } 1956 }
1864 1957
1865 if (!vcpu->pio.in) 1958 pio_dev = vcpu_find_pio_dev(vcpu, port);
1866 return pio_copy_data(vcpu); 1959 if (!vcpu->pio.in) {
1867 return 0; 1960 /* string PIO write */
1961 ret = pio_copy_data(vcpu);
1962 if (ret >= 0 && pio_dev) {
1963 pio_string_write(pio_dev, vcpu);
1964 complete_pio(vcpu);
1965 if (vcpu->pio.count == 0)
1966 ret = 1;
1967 }
1968 } else if (pio_dev)
1969 pr_unimpl(vcpu, "no string pio read support yet, "
1970 "port %x size %d count %ld\n",
1971 port, size, count);
1972
1973 return ret;
1974}
1975EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
1976
1977/*
1978 * Check if userspace requested an interrupt window, and that the
1979 * interrupt window is open.
1980 *
1981 * No need to exit to userspace if we already have an interrupt queued.
1982 */
1983static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
1984 struct kvm_run *kvm_run)
1985{
1986 return (!vcpu->irq_summary &&
1987 kvm_run->request_interrupt_window &&
1988 vcpu->interrupt_window_open &&
1989 (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
1990}
1991
1992static void post_kvm_run_save(struct kvm_vcpu *vcpu,
1993 struct kvm_run *kvm_run)
1994{
1995 kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
1996 kvm_run->cr8 = get_cr8(vcpu);
1997 kvm_run->apic_base = kvm_get_apic_base(vcpu);
1998 if (irqchip_in_kernel(vcpu->kvm))
1999 kvm_run->ready_for_interrupt_injection = 1;
2000 else
2001 kvm_run->ready_for_interrupt_injection =
2002 (vcpu->interrupt_window_open &&
2003 vcpu->irq_summary == 0);
2004}
2005
2006static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2007{
2008 int r;
2009
2010 if (unlikely(vcpu->mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) {
2011 printk("vcpu %d received sipi with vector # %x\n",
2012 vcpu->vcpu_id, vcpu->sipi_vector);
2013 kvm_lapic_reset(vcpu);
2014 kvm_x86_ops->vcpu_reset(vcpu);
2015 vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
2016 }
2017
2018preempted:
2019 if (vcpu->guest_debug.enabled)
2020 kvm_x86_ops->guest_debug_pre(vcpu);
2021
2022again:
2023 r = kvm_mmu_reload(vcpu);
2024 if (unlikely(r))
2025 goto out;
2026
2027 preempt_disable();
2028
2029 kvm_x86_ops->prepare_guest_switch(vcpu);
2030 kvm_load_guest_fpu(vcpu);
2031
2032 local_irq_disable();
2033
2034 if (signal_pending(current)) {
2035 local_irq_enable();
2036 preempt_enable();
2037 r = -EINTR;
2038 kvm_run->exit_reason = KVM_EXIT_INTR;
2039 ++vcpu->stat.signal_exits;
2040 goto out;
2041 }
2042
2043 if (irqchip_in_kernel(vcpu->kvm))
2044 kvm_x86_ops->inject_pending_irq(vcpu);
2045 else if (!vcpu->mmio_read_completed)
2046 kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);
2047
2048 vcpu->guest_mode = 1;
2049
2050 if (vcpu->requests)
2051 if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests))
2052 kvm_x86_ops->tlb_flush(vcpu);
2053
2054 kvm_x86_ops->run(vcpu, kvm_run);
2055
2056 vcpu->guest_mode = 0;
2057 local_irq_enable();
2058
2059 ++vcpu->stat.exits;
2060
2061 preempt_enable();
2062
2063 /*
2064 * Profile KVM exit RIPs:
2065 */
2066 if (unlikely(prof_on == KVM_PROFILING)) {
2067 kvm_x86_ops->cache_regs(vcpu);
2068 profile_hit(KVM_PROFILING, (void *)vcpu->rip);
2069 }
2070
2071 r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
2072
2073 if (r > 0) {
2074 if (dm_request_for_irq_injection(vcpu, kvm_run)) {
2075 r = -EINTR;
2076 kvm_run->exit_reason = KVM_EXIT_INTR;
2077 ++vcpu->stat.request_irq_exits;
2078 goto out;
2079 }
2080 if (!need_resched()) {
2081 ++vcpu->stat.light_exits;
2082 goto again;
2083 }
2084 }
2085
2086out:
2087 if (r > 0) {
2088 kvm_resched(vcpu);
2089 goto preempted;
2090 }
2091
2092 post_kvm_run_save(vcpu, kvm_run);
2093
2094 return r;
1868} 2095}
1869EXPORT_SYMBOL_GPL(kvm_setup_pio); 2096
1870 2097
1871static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2098static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1872{ 2099{
@@ -1875,11 +2102,18 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1875 2102
1876 vcpu_load(vcpu); 2103 vcpu_load(vcpu);
1877 2104
2105 if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) {
2106 kvm_vcpu_block(vcpu);
2107 vcpu_put(vcpu);
2108 return -EAGAIN;
2109 }
2110
1878 if (vcpu->sigset_active) 2111 if (vcpu->sigset_active)
1879 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 2112 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
1880 2113
1881 /* re-sync apic's tpr */ 2114 /* re-sync apic's tpr */
1882 vcpu->cr8 = kvm_run->cr8; 2115 if (!irqchip_in_kernel(vcpu->kvm))
2116 set_cr8(vcpu, kvm_run->cr8);
1883 2117
1884 if (vcpu->pio.cur_count) { 2118 if (vcpu->pio.cur_count) {
1885 r = complete_pio(vcpu); 2119 r = complete_pio(vcpu);
@@ -1897,19 +2131,18 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1897 /* 2131 /*
1898 * Read-modify-write. Back to userspace. 2132 * Read-modify-write. Back to userspace.
1899 */ 2133 */
1900 kvm_run->exit_reason = KVM_EXIT_MMIO;
1901 r = 0; 2134 r = 0;
1902 goto out; 2135 goto out;
1903 } 2136 }
1904 } 2137 }
1905 2138
1906 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { 2139 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) {
1907 kvm_arch_ops->cache_regs(vcpu); 2140 kvm_x86_ops->cache_regs(vcpu);
1908 vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret; 2141 vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret;
1909 kvm_arch_ops->decache_regs(vcpu); 2142 kvm_x86_ops->decache_regs(vcpu);
1910 } 2143 }
1911 2144
1912 r = kvm_arch_ops->run(vcpu, kvm_run); 2145 r = __vcpu_run(vcpu, kvm_run);
1913 2146
1914out: 2147out:
1915 if (vcpu->sigset_active) 2148 if (vcpu->sigset_active)
@@ -1924,7 +2157,7 @@ static int kvm_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu,
1924{ 2157{
1925 vcpu_load(vcpu); 2158 vcpu_load(vcpu);
1926 2159
1927 kvm_arch_ops->cache_regs(vcpu); 2160 kvm_x86_ops->cache_regs(vcpu);
1928 2161
1929 regs->rax = vcpu->regs[VCPU_REGS_RAX]; 2162 regs->rax = vcpu->regs[VCPU_REGS_RAX];
1930 regs->rbx = vcpu->regs[VCPU_REGS_RBX]; 2163 regs->rbx = vcpu->regs[VCPU_REGS_RBX];
@@ -1946,7 +2179,7 @@ static int kvm_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu,
1946#endif 2179#endif
1947 2180
1948 regs->rip = vcpu->rip; 2181 regs->rip = vcpu->rip;
1949 regs->rflags = kvm_arch_ops->get_rflags(vcpu); 2182 regs->rflags = kvm_x86_ops->get_rflags(vcpu);
1950 2183
1951 /* 2184 /*
1952 * Don't leak debug flags in case they were set for guest debugging 2185 * Don't leak debug flags in case they were set for guest debugging
@@ -1984,9 +2217,9 @@ static int kvm_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu,
1984#endif 2217#endif
1985 2218
1986 vcpu->rip = regs->rip; 2219 vcpu->rip = regs->rip;
1987 kvm_arch_ops->set_rflags(vcpu, regs->rflags); 2220 kvm_x86_ops->set_rflags(vcpu, regs->rflags);
1988 2221
1989 kvm_arch_ops->decache_regs(vcpu); 2222 kvm_x86_ops->decache_regs(vcpu);
1990 2223
1991 vcpu_put(vcpu); 2224 vcpu_put(vcpu);
1992 2225
@@ -1996,13 +2229,14 @@ static int kvm_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu,
1996static void get_segment(struct kvm_vcpu *vcpu, 2229static void get_segment(struct kvm_vcpu *vcpu,
1997 struct kvm_segment *var, int seg) 2230 struct kvm_segment *var, int seg)
1998{ 2231{
1999 return kvm_arch_ops->get_segment(vcpu, var, seg); 2232 return kvm_x86_ops->get_segment(vcpu, var, seg);
2000} 2233}
2001 2234
2002static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 2235static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2003 struct kvm_sregs *sregs) 2236 struct kvm_sregs *sregs)
2004{ 2237{
2005 struct descriptor_table dt; 2238 struct descriptor_table dt;
2239 int pending_vec;
2006 2240
2007 vcpu_load(vcpu); 2241 vcpu_load(vcpu);
2008 2242
@@ -2016,24 +2250,31 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2016 get_segment(vcpu, &sregs->tr, VCPU_SREG_TR); 2250 get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
2017 get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); 2251 get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
2018 2252
2019 kvm_arch_ops->get_idt(vcpu, &dt); 2253 kvm_x86_ops->get_idt(vcpu, &dt);
2020 sregs->idt.limit = dt.limit; 2254 sregs->idt.limit = dt.limit;
2021 sregs->idt.base = dt.base; 2255 sregs->idt.base = dt.base;
2022 kvm_arch_ops->get_gdt(vcpu, &dt); 2256 kvm_x86_ops->get_gdt(vcpu, &dt);
2023 sregs->gdt.limit = dt.limit; 2257 sregs->gdt.limit = dt.limit;
2024 sregs->gdt.base = dt.base; 2258 sregs->gdt.base = dt.base;
2025 2259
2026 kvm_arch_ops->decache_cr4_guest_bits(vcpu); 2260 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
2027 sregs->cr0 = vcpu->cr0; 2261 sregs->cr0 = vcpu->cr0;
2028 sregs->cr2 = vcpu->cr2; 2262 sregs->cr2 = vcpu->cr2;
2029 sregs->cr3 = vcpu->cr3; 2263 sregs->cr3 = vcpu->cr3;
2030 sregs->cr4 = vcpu->cr4; 2264 sregs->cr4 = vcpu->cr4;
2031 sregs->cr8 = vcpu->cr8; 2265 sregs->cr8 = get_cr8(vcpu);
2032 sregs->efer = vcpu->shadow_efer; 2266 sregs->efer = vcpu->shadow_efer;
2033 sregs->apic_base = vcpu->apic_base; 2267 sregs->apic_base = kvm_get_apic_base(vcpu);
2034 2268
2035 memcpy(sregs->interrupt_bitmap, vcpu->irq_pending, 2269 if (irqchip_in_kernel(vcpu->kvm)) {
2036 sizeof sregs->interrupt_bitmap); 2270 memset(sregs->interrupt_bitmap, 0,
2271 sizeof sregs->interrupt_bitmap);
2272 pending_vec = kvm_x86_ops->get_irq(vcpu);
2273 if (pending_vec >= 0)
2274 set_bit(pending_vec, (unsigned long *)sregs->interrupt_bitmap);
2275 } else
2276 memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
2277 sizeof sregs->interrupt_bitmap);
2037 2278
2038 vcpu_put(vcpu); 2279 vcpu_put(vcpu);
2039 2280
@@ -2043,56 +2284,69 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2043static void set_segment(struct kvm_vcpu *vcpu, 2284static void set_segment(struct kvm_vcpu *vcpu,
2044 struct kvm_segment *var, int seg) 2285 struct kvm_segment *var, int seg)
2045{ 2286{
2046 return kvm_arch_ops->set_segment(vcpu, var, seg); 2287 return kvm_x86_ops->set_segment(vcpu, var, seg);
2047} 2288}
2048 2289
2049static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 2290static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2050 struct kvm_sregs *sregs) 2291 struct kvm_sregs *sregs)
2051{ 2292{
2052 int mmu_reset_needed = 0; 2293 int mmu_reset_needed = 0;
2053 int i; 2294 int i, pending_vec, max_bits;
2054 struct descriptor_table dt; 2295 struct descriptor_table dt;
2055 2296
2056 vcpu_load(vcpu); 2297 vcpu_load(vcpu);
2057 2298
2058 dt.limit = sregs->idt.limit; 2299 dt.limit = sregs->idt.limit;
2059 dt.base = sregs->idt.base; 2300 dt.base = sregs->idt.base;
2060 kvm_arch_ops->set_idt(vcpu, &dt); 2301 kvm_x86_ops->set_idt(vcpu, &dt);
2061 dt.limit = sregs->gdt.limit; 2302 dt.limit = sregs->gdt.limit;
2062 dt.base = sregs->gdt.base; 2303 dt.base = sregs->gdt.base;
2063 kvm_arch_ops->set_gdt(vcpu, &dt); 2304 kvm_x86_ops->set_gdt(vcpu, &dt);
2064 2305
2065 vcpu->cr2 = sregs->cr2; 2306 vcpu->cr2 = sregs->cr2;
2066 mmu_reset_needed |= vcpu->cr3 != sregs->cr3; 2307 mmu_reset_needed |= vcpu->cr3 != sregs->cr3;
2067 vcpu->cr3 = sregs->cr3; 2308 vcpu->cr3 = sregs->cr3;
2068 2309
2069 vcpu->cr8 = sregs->cr8; 2310 set_cr8(vcpu, sregs->cr8);
2070 2311
2071 mmu_reset_needed |= vcpu->shadow_efer != sregs->efer; 2312 mmu_reset_needed |= vcpu->shadow_efer != sregs->efer;
2072#ifdef CONFIG_X86_64 2313#ifdef CONFIG_X86_64
2073 kvm_arch_ops->set_efer(vcpu, sregs->efer); 2314 kvm_x86_ops->set_efer(vcpu, sregs->efer);
2074#endif 2315#endif
2075 vcpu->apic_base = sregs->apic_base; 2316 kvm_set_apic_base(vcpu, sregs->apic_base);
2076 2317
2077 kvm_arch_ops->decache_cr4_guest_bits(vcpu); 2318 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
2078 2319
2079 mmu_reset_needed |= vcpu->cr0 != sregs->cr0; 2320 mmu_reset_needed |= vcpu->cr0 != sregs->cr0;
2080 kvm_arch_ops->set_cr0(vcpu, sregs->cr0); 2321 vcpu->cr0 = sregs->cr0;
2322 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
2081 2323
2082 mmu_reset_needed |= vcpu->cr4 != sregs->cr4; 2324 mmu_reset_needed |= vcpu->cr4 != sregs->cr4;
2083 kvm_arch_ops->set_cr4(vcpu, sregs->cr4); 2325 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
2084 if (!is_long_mode(vcpu) && is_pae(vcpu)) 2326 if (!is_long_mode(vcpu) && is_pae(vcpu))
2085 load_pdptrs(vcpu, vcpu->cr3); 2327 load_pdptrs(vcpu, vcpu->cr3);
2086 2328
2087 if (mmu_reset_needed) 2329 if (mmu_reset_needed)
2088 kvm_mmu_reset_context(vcpu); 2330 kvm_mmu_reset_context(vcpu);
2089 2331
2090 memcpy(vcpu->irq_pending, sregs->interrupt_bitmap, 2332 if (!irqchip_in_kernel(vcpu->kvm)) {
2091 sizeof vcpu->irq_pending); 2333 memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
2092 vcpu->irq_summary = 0; 2334 sizeof vcpu->irq_pending);
2093 for (i = 0; i < NR_IRQ_WORDS; ++i) 2335 vcpu->irq_summary = 0;
2094 if (vcpu->irq_pending[i]) 2336 for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i)
2095 __set_bit(i, &vcpu->irq_summary); 2337 if (vcpu->irq_pending[i])
2338 __set_bit(i, &vcpu->irq_summary);
2339 } else {
2340 max_bits = (sizeof sregs->interrupt_bitmap) << 3;
2341 pending_vec = find_first_bit(
2342 (const unsigned long *)sregs->interrupt_bitmap,
2343 max_bits);
2344 /* Only pending external irq is handled here */
2345 if (pending_vec < max_bits) {
2346 kvm_x86_ops->set_irq(vcpu, pending_vec);
2347 printk("Set back pending irq %d\n", pending_vec);
2348 }
2349 }
2096 2350
2097 set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); 2351 set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
2098 set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); 2352 set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
@@ -2109,6 +2363,16 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2109 return 0; 2363 return 0;
2110} 2364}
2111 2365
2366void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
2367{
2368 struct kvm_segment cs;
2369
2370 get_segment(vcpu, &cs, VCPU_SREG_CS);
2371 *db = cs.db;
2372 *l = cs.l;
2373}
2374EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
2375
2112/* 2376/*
2113 * List of msr numbers which we expose to userspace through KVM_GET_MSRS 2377 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
2114 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. 2378 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
@@ -2236,13 +2500,13 @@ static int kvm_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2236 gpa_t gpa; 2500 gpa_t gpa;
2237 2501
2238 vcpu_load(vcpu); 2502 vcpu_load(vcpu);
2239 spin_lock(&vcpu->kvm->lock); 2503 mutex_lock(&vcpu->kvm->lock);
2240 gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr); 2504 gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr);
2241 tr->physical_address = gpa; 2505 tr->physical_address = gpa;
2242 tr->valid = gpa != UNMAPPED_GVA; 2506 tr->valid = gpa != UNMAPPED_GVA;
2243 tr->writeable = 1; 2507 tr->writeable = 1;
2244 tr->usermode = 0; 2508 tr->usermode = 0;
2245 spin_unlock(&vcpu->kvm->lock); 2509 mutex_unlock(&vcpu->kvm->lock);
2246 vcpu_put(vcpu); 2510 vcpu_put(vcpu);
2247 2511
2248 return 0; 2512 return 0;
@@ -2253,6 +2517,8 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
2253{ 2517{
2254 if (irq->irq < 0 || irq->irq >= 256) 2518 if (irq->irq < 0 || irq->irq >= 256)
2255 return -EINVAL; 2519 return -EINVAL;
2520 if (irqchip_in_kernel(vcpu->kvm))
2521 return -ENXIO;
2256 vcpu_load(vcpu); 2522 vcpu_load(vcpu);
2257 2523
2258 set_bit(irq->irq, vcpu->irq_pending); 2524 set_bit(irq->irq, vcpu->irq_pending);
@@ -2270,7 +2536,7 @@ static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
2270 2536
2271 vcpu_load(vcpu); 2537 vcpu_load(vcpu);
2272 2538
2273 r = kvm_arch_ops->set_guest_debug(vcpu, dbg); 2539 r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
2274 2540
2275 vcpu_put(vcpu); 2541 vcpu_put(vcpu);
2276 2542
@@ -2285,7 +2551,6 @@ static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma,
2285 unsigned long pgoff; 2551 unsigned long pgoff;
2286 struct page *page; 2552 struct page *page;
2287 2553
2288 *type = VM_FAULT_MINOR;
2289 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 2554 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
2290 if (pgoff == 0) 2555 if (pgoff == 0)
2291 page = virt_to_page(vcpu->run); 2556 page = virt_to_page(vcpu->run);
@@ -2294,6 +2559,9 @@ static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma,
2294 else 2559 else
2295 return NOPAGE_SIGBUS; 2560 return NOPAGE_SIGBUS;
2296 get_page(page); 2561 get_page(page);
2562 if (type != NULL)
2563 *type = VM_FAULT_MINOR;
2564
2297 return page; 2565 return page;
2298} 2566}
2299 2567
@@ -2346,74 +2614,52 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
2346{ 2614{
2347 int r; 2615 int r;
2348 struct kvm_vcpu *vcpu; 2616 struct kvm_vcpu *vcpu;
2349 struct page *page;
2350 2617
2351 r = -EINVAL;
2352 if (!valid_vcpu(n)) 2618 if (!valid_vcpu(n))
2353 goto out; 2619 return -EINVAL;
2354
2355 vcpu = &kvm->vcpus[n];
2356
2357 mutex_lock(&vcpu->mutex);
2358
2359 if (vcpu->vmcs) {
2360 mutex_unlock(&vcpu->mutex);
2361 return -EEXIST;
2362 }
2363
2364 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
2365 r = -ENOMEM;
2366 if (!page)
2367 goto out_unlock;
2368 vcpu->run = page_address(page);
2369
2370 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
2371 r = -ENOMEM;
2372 if (!page)
2373 goto out_free_run;
2374 vcpu->pio_data = page_address(page);
2375 2620
2376 vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, 2621 vcpu = kvm_x86_ops->vcpu_create(kvm, n);
2377 FX_IMAGE_ALIGN); 2622 if (IS_ERR(vcpu))
2378 vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; 2623 return PTR_ERR(vcpu);
2379 vcpu->cr0 = 0x10;
2380 2624
2381 r = kvm_arch_ops->vcpu_create(vcpu); 2625 preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
2382 if (r < 0)
2383 goto out_free_vcpus;
2384 2626
2385 r = kvm_mmu_create(vcpu); 2627 /* We do fxsave: this must be aligned. */
2386 if (r < 0) 2628 BUG_ON((unsigned long)&vcpu->host_fx_image & 0xF);
2387 goto out_free_vcpus;
2388 2629
2389 kvm_arch_ops->vcpu_load(vcpu); 2630 vcpu_load(vcpu);
2390 r = kvm_mmu_setup(vcpu); 2631 r = kvm_mmu_setup(vcpu);
2391 if (r >= 0)
2392 r = kvm_arch_ops->vcpu_setup(vcpu);
2393 vcpu_put(vcpu); 2632 vcpu_put(vcpu);
2394
2395 if (r < 0) 2633 if (r < 0)
2396 goto out_free_vcpus; 2634 goto free_vcpu;
2397 2635
2636 mutex_lock(&kvm->lock);
2637 if (kvm->vcpus[n]) {
2638 r = -EEXIST;
2639 mutex_unlock(&kvm->lock);
2640 goto mmu_unload;
2641 }
2642 kvm->vcpus[n] = vcpu;
2643 mutex_unlock(&kvm->lock);
2644
2645 /* Now it's all set up, let userspace reach it */
2398 r = create_vcpu_fd(vcpu); 2646 r = create_vcpu_fd(vcpu);
2399 if (r < 0) 2647 if (r < 0)
2400 goto out_free_vcpus; 2648 goto unlink;
2649 return r;
2401 2650
2402 spin_lock(&kvm_lock); 2651unlink:
2403 if (n >= kvm->nvcpus) 2652 mutex_lock(&kvm->lock);
2404 kvm->nvcpus = n + 1; 2653 kvm->vcpus[n] = NULL;
2405 spin_unlock(&kvm_lock); 2654 mutex_unlock(&kvm->lock);
2406 2655
2407 return r; 2656mmu_unload:
2657 vcpu_load(vcpu);
2658 kvm_mmu_unload(vcpu);
2659 vcpu_put(vcpu);
2408 2660
2409out_free_vcpus: 2661free_vcpu:
2410 kvm_free_vcpu(vcpu); 2662 kvm_x86_ops->vcpu_free(vcpu);
2411out_free_run:
2412 free_page((unsigned long)vcpu->run);
2413 vcpu->run = NULL;
2414out_unlock:
2415 mutex_unlock(&vcpu->mutex);
2416out:
2417 return r; 2663 return r;
2418} 2664}
2419 2665
@@ -2493,7 +2739,7 @@ struct fxsave {
2493 2739
2494static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 2740static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2495{ 2741{
2496 struct fxsave *fxsave = (struct fxsave *)vcpu->guest_fx_image; 2742 struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image;
2497 2743
2498 vcpu_load(vcpu); 2744 vcpu_load(vcpu);
2499 2745
@@ -2513,7 +2759,7 @@ static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2513 2759
2514static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 2760static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2515{ 2761{
2516 struct fxsave *fxsave = (struct fxsave *)vcpu->guest_fx_image; 2762 struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image;
2517 2763
2518 vcpu_load(vcpu); 2764 vcpu_load(vcpu);
2519 2765
@@ -2531,6 +2777,27 @@ static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2531 return 0; 2777 return 0;
2532} 2778}
2533 2779
2780static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
2781 struct kvm_lapic_state *s)
2782{
2783 vcpu_load(vcpu);
2784 memcpy(s->regs, vcpu->apic->regs, sizeof *s);
2785 vcpu_put(vcpu);
2786
2787 return 0;
2788}
2789
2790static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
2791 struct kvm_lapic_state *s)
2792{
2793 vcpu_load(vcpu);
2794 memcpy(vcpu->apic->regs, s->regs, sizeof *s);
2795 kvm_apic_post_state_restore(vcpu);
2796 vcpu_put(vcpu);
2797
2798 return 0;
2799}
2800
2534static long kvm_vcpu_ioctl(struct file *filp, 2801static long kvm_vcpu_ioctl(struct file *filp,
2535 unsigned int ioctl, unsigned long arg) 2802 unsigned int ioctl, unsigned long arg)
2536{ 2803{
@@ -2700,6 +2967,31 @@ static long kvm_vcpu_ioctl(struct file *filp,
2700 r = 0; 2967 r = 0;
2701 break; 2968 break;
2702 } 2969 }
2970 case KVM_GET_LAPIC: {
2971 struct kvm_lapic_state lapic;
2972
2973 memset(&lapic, 0, sizeof lapic);
2974 r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic);
2975 if (r)
2976 goto out;
2977 r = -EFAULT;
2978 if (copy_to_user(argp, &lapic, sizeof lapic))
2979 goto out;
2980 r = 0;
2981 break;
2982 }
2983 case KVM_SET_LAPIC: {
2984 struct kvm_lapic_state lapic;
2985
2986 r = -EFAULT;
2987 if (copy_from_user(&lapic, argp, sizeof lapic))
2988 goto out;
2989 r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);;
2990 if (r)
2991 goto out;
2992 r = 0;
2993 break;
2994 }
2703 default: 2995 default:
2704 ; 2996 ;
2705 } 2997 }
@@ -2753,6 +3045,75 @@ static long kvm_vm_ioctl(struct file *filp,
2753 goto out; 3045 goto out;
2754 break; 3046 break;
2755 } 3047 }
3048 case KVM_CREATE_IRQCHIP:
3049 r = -ENOMEM;
3050 kvm->vpic = kvm_create_pic(kvm);
3051 if (kvm->vpic) {
3052 r = kvm_ioapic_init(kvm);
3053 if (r) {
3054 kfree(kvm->vpic);
3055 kvm->vpic = NULL;
3056 goto out;
3057 }
3058 }
3059 else
3060 goto out;
3061 break;
3062 case KVM_IRQ_LINE: {
3063 struct kvm_irq_level irq_event;
3064
3065 r = -EFAULT;
3066 if (copy_from_user(&irq_event, argp, sizeof irq_event))
3067 goto out;
3068 if (irqchip_in_kernel(kvm)) {
3069 mutex_lock(&kvm->lock);
3070 if (irq_event.irq < 16)
3071 kvm_pic_set_irq(pic_irqchip(kvm),
3072 irq_event.irq,
3073 irq_event.level);
3074 kvm_ioapic_set_irq(kvm->vioapic,
3075 irq_event.irq,
3076 irq_event.level);
3077 mutex_unlock(&kvm->lock);
3078 r = 0;
3079 }
3080 break;
3081 }
3082 case KVM_GET_IRQCHIP: {
3083 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
3084 struct kvm_irqchip chip;
3085
3086 r = -EFAULT;
3087 if (copy_from_user(&chip, argp, sizeof chip))
3088 goto out;
3089 r = -ENXIO;
3090 if (!irqchip_in_kernel(kvm))
3091 goto out;
3092 r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
3093 if (r)
3094 goto out;
3095 r = -EFAULT;
3096 if (copy_to_user(argp, &chip, sizeof chip))
3097 goto out;
3098 r = 0;
3099 break;
3100 }
3101 case KVM_SET_IRQCHIP: {
3102 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
3103 struct kvm_irqchip chip;
3104
3105 r = -EFAULT;
3106 if (copy_from_user(&chip, argp, sizeof chip))
3107 goto out;
3108 r = -ENXIO;
3109 if (!irqchip_in_kernel(kvm))
3110 goto out;
3111 r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
3112 if (r)
3113 goto out;
3114 r = 0;
3115 break;
3116 }
2756 default: 3117 default:
2757 ; 3118 ;
2758 } 3119 }
@@ -2768,12 +3129,14 @@ static struct page *kvm_vm_nopage(struct vm_area_struct *vma,
2768 unsigned long pgoff; 3129 unsigned long pgoff;
2769 struct page *page; 3130 struct page *page;
2770 3131
2771 *type = VM_FAULT_MINOR;
2772 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 3132 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
2773 page = gfn_to_page(kvm, pgoff); 3133 page = gfn_to_page(kvm, pgoff);
2774 if (!page) 3134 if (!page)
2775 return NOPAGE_SIGBUS; 3135 return NOPAGE_SIGBUS;
2776 get_page(page); 3136 get_page(page);
3137 if (type != NULL)
3138 *type = VM_FAULT_MINOR;
3139
2777 return page; 3140 return page;
2778} 3141}
2779 3142
@@ -2861,12 +3224,20 @@ static long kvm_dev_ioctl(struct file *filp,
2861 r = 0; 3224 r = 0;
2862 break; 3225 break;
2863 } 3226 }
2864 case KVM_CHECK_EXTENSION: 3227 case KVM_CHECK_EXTENSION: {
2865 /* 3228 int ext = (long)argp;
2866 * No extensions defined at present. 3229
2867 */ 3230 switch (ext) {
2868 r = 0; 3231 case KVM_CAP_IRQCHIP:
3232 case KVM_CAP_HLT:
3233 r = 1;
3234 break;
3235 default:
3236 r = 0;
3237 break;
3238 }
2869 break; 3239 break;
3240 }
2870 case KVM_GET_VCPU_MMAP_SIZE: 3241 case KVM_GET_VCPU_MMAP_SIZE:
2871 r = -EINVAL; 3242 r = -EINVAL;
2872 if (arg) 3243 if (arg)
@@ -2881,8 +3252,6 @@ out:
2881} 3252}
2882 3253
2883static struct file_operations kvm_chardev_ops = { 3254static struct file_operations kvm_chardev_ops = {
2884 .open = kvm_dev_open,
2885 .release = kvm_dev_release,
2886 .unlocked_ioctl = kvm_dev_ioctl, 3255 .unlocked_ioctl = kvm_dev_ioctl,
2887 .compat_ioctl = kvm_dev_ioctl, 3256 .compat_ioctl = kvm_dev_ioctl,
2888}; 3257};
@@ -2893,25 +3262,6 @@ static struct miscdevice kvm_dev = {
2893 &kvm_chardev_ops, 3262 &kvm_chardev_ops,
2894}; 3263};
2895 3264
2896static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
2897 void *v)
2898{
2899 if (val == SYS_RESTART) {
2900 /*
2901 * Some (well, at least mine) BIOSes hang on reboot if
2902 * in vmx root mode.
2903 */
2904 printk(KERN_INFO "kvm: exiting hardware virtualization\n");
2905 on_each_cpu(hardware_disable, NULL, 0, 1);
2906 }
2907 return NOTIFY_OK;
2908}
2909
2910static struct notifier_block kvm_reboot_notifier = {
2911 .notifier_call = kvm_reboot,
2912 .priority = 0,
2913};
2914
2915/* 3265/*
2916 * Make sure that a cpu that is being hot-unplugged does not have any vcpus 3266 * Make sure that a cpu that is being hot-unplugged does not have any vcpus
2917 * cached on it. 3267 * cached on it.
@@ -2925,7 +3275,9 @@ static void decache_vcpus_on_cpu(int cpu)
2925 spin_lock(&kvm_lock); 3275 spin_lock(&kvm_lock);
2926 list_for_each_entry(vm, &vm_list, vm_list) 3276 list_for_each_entry(vm, &vm_list, vm_list)
2927 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 3277 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
2928 vcpu = &vm->vcpus[i]; 3278 vcpu = vm->vcpus[i];
3279 if (!vcpu)
3280 continue;
2929 /* 3281 /*
2930 * If the vcpu is locked, then it is running on some 3282 * If the vcpu is locked, then it is running on some
2931 * other cpu and therefore it is not cached on the 3283 * other cpu and therefore it is not cached on the
@@ -2936,7 +3288,7 @@ static void decache_vcpus_on_cpu(int cpu)
2936 */ 3288 */
2937 if (mutex_trylock(&vcpu->mutex)) { 3289 if (mutex_trylock(&vcpu->mutex)) {
2938 if (vcpu->cpu == cpu) { 3290 if (vcpu->cpu == cpu) {
2939 kvm_arch_ops->vcpu_decache(vcpu); 3291 kvm_x86_ops->vcpu_decache(vcpu);
2940 vcpu->cpu = -1; 3292 vcpu->cpu = -1;
2941 } 3293 }
2942 mutex_unlock(&vcpu->mutex); 3294 mutex_unlock(&vcpu->mutex);
@@ -2952,7 +3304,7 @@ static void hardware_enable(void *junk)
2952 if (cpu_isset(cpu, cpus_hardware_enabled)) 3304 if (cpu_isset(cpu, cpus_hardware_enabled))
2953 return; 3305 return;
2954 cpu_set(cpu, cpus_hardware_enabled); 3306 cpu_set(cpu, cpus_hardware_enabled);
2955 kvm_arch_ops->hardware_enable(NULL); 3307 kvm_x86_ops->hardware_enable(NULL);
2956} 3308}
2957 3309
2958static void hardware_disable(void *junk) 3310static void hardware_disable(void *junk)
@@ -2963,7 +3315,7 @@ static void hardware_disable(void *junk)
2963 return; 3315 return;
2964 cpu_clear(cpu, cpus_hardware_enabled); 3316 cpu_clear(cpu, cpus_hardware_enabled);
2965 decache_vcpus_on_cpu(cpu); 3317 decache_vcpus_on_cpu(cpu);
2966 kvm_arch_ops->hardware_disable(NULL); 3318 kvm_x86_ops->hardware_disable(NULL);
2967} 3319}
2968 3320
2969static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, 3321static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
@@ -2994,6 +3346,25 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
2994 return NOTIFY_OK; 3346 return NOTIFY_OK;
2995} 3347}
2996 3348
3349static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
3350 void *v)
3351{
3352 if (val == SYS_RESTART) {
3353 /*
3354 * Some (well, at least mine) BIOSes hang on reboot if
3355 * in vmx root mode.
3356 */
3357 printk(KERN_INFO "kvm: exiting hardware virtualization\n");
3358 on_each_cpu(hardware_disable, NULL, 0, 1);
3359 }
3360 return NOTIFY_OK;
3361}
3362
3363static struct notifier_block kvm_reboot_notifier = {
3364 .notifier_call = kvm_reboot,
3365 .priority = 0,
3366};
3367
2997void kvm_io_bus_init(struct kvm_io_bus *bus) 3368void kvm_io_bus_init(struct kvm_io_bus *bus)
2998{ 3369{
2999 memset(bus, 0, sizeof(*bus)); 3370 memset(bus, 0, sizeof(*bus));
@@ -3047,18 +3418,15 @@ static u64 stat_get(void *_offset)
3047 spin_lock(&kvm_lock); 3418 spin_lock(&kvm_lock);
3048 list_for_each_entry(kvm, &vm_list, vm_list) 3419 list_for_each_entry(kvm, &vm_list, vm_list)
3049 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 3420 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
3050 vcpu = &kvm->vcpus[i]; 3421 vcpu = kvm->vcpus[i];
3051 total += *(u32 *)((void *)vcpu + offset); 3422 if (vcpu)
3423 total += *(u32 *)((void *)vcpu + offset);
3052 } 3424 }
3053 spin_unlock(&kvm_lock); 3425 spin_unlock(&kvm_lock);
3054 return total; 3426 return total;
3055} 3427}
3056 3428
3057static void stat_set(void *offset, u64 val) 3429DEFINE_SIMPLE_ATTRIBUTE(stat_fops, stat_get, NULL, "%llu\n");
3058{
3059}
3060
3061DEFINE_SIMPLE_ATTRIBUTE(stat_fops, stat_get, stat_set, "%llu\n");
3062 3430
3063static __init void kvm_init_debug(void) 3431static __init void kvm_init_debug(void)
3064{ 3432{
@@ -3105,11 +3473,34 @@ static struct sys_device kvm_sysdev = {
3105 3473
3106hpa_t bad_page_address; 3474hpa_t bad_page_address;
3107 3475
3108int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) 3476static inline
3477struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
3478{
3479 return container_of(pn, struct kvm_vcpu, preempt_notifier);
3480}
3481
3482static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
3483{
3484 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
3485
3486 kvm_x86_ops->vcpu_load(vcpu, cpu);
3487}
3488
3489static void kvm_sched_out(struct preempt_notifier *pn,
3490 struct task_struct *next)
3491{
3492 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
3493
3494 kvm_x86_ops->vcpu_put(vcpu);
3495}
3496
3497int kvm_init_x86(struct kvm_x86_ops *ops, unsigned int vcpu_size,
3498 struct module *module)
3109{ 3499{
3110 int r; 3500 int r;
3501 int cpu;
3111 3502
3112 if (kvm_arch_ops) { 3503 if (kvm_x86_ops) {
3113 printk(KERN_ERR "kvm: already loaded the other module\n"); 3504 printk(KERN_ERR "kvm: already loaded the other module\n");
3114 return -EEXIST; 3505 return -EEXIST;
3115 } 3506 }
@@ -3123,12 +3514,20 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
3123 return -EOPNOTSUPP; 3514 return -EOPNOTSUPP;
3124 } 3515 }
3125 3516
3126 kvm_arch_ops = ops; 3517 kvm_x86_ops = ops;
3127 3518
3128 r = kvm_arch_ops->hardware_setup(); 3519 r = kvm_x86_ops->hardware_setup();
3129 if (r < 0) 3520 if (r < 0)
3130 goto out; 3521 goto out;
3131 3522
3523 for_each_online_cpu(cpu) {
3524 smp_call_function_single(cpu,
3525 kvm_x86_ops->check_processor_compatibility,
3526 &r, 0, 1);
3527 if (r < 0)
3528 goto out_free_0;
3529 }
3530
3132 on_each_cpu(hardware_enable, NULL, 0, 1); 3531 on_each_cpu(hardware_enable, NULL, 0, 1);
3133 r = register_cpu_notifier(&kvm_cpu_notifier); 3532 r = register_cpu_notifier(&kvm_cpu_notifier);
3134 if (r) 3533 if (r)
@@ -3143,6 +3542,14 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
3143 if (r) 3542 if (r)
3144 goto out_free_3; 3543 goto out_free_3;
3145 3544
3545 /* A kmem cache lets us meet the alignment requirements of fx_save. */
3546 kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size,
3547 __alignof__(struct kvm_vcpu), 0, 0);
3548 if (!kvm_vcpu_cache) {
3549 r = -ENOMEM;
3550 goto out_free_4;
3551 }
3552
3146 kvm_chardev_ops.owner = module; 3553 kvm_chardev_ops.owner = module;
3147 3554
3148 r = misc_register(&kvm_dev); 3555 r = misc_register(&kvm_dev);
@@ -3151,9 +3558,14 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
3151 goto out_free; 3558 goto out_free;
3152 } 3559 }
3153 3560
3561 kvm_preempt_ops.sched_in = kvm_sched_in;
3562 kvm_preempt_ops.sched_out = kvm_sched_out;
3563
3154 return r; 3564 return r;
3155 3565
3156out_free: 3566out_free:
3567 kmem_cache_destroy(kvm_vcpu_cache);
3568out_free_4:
3157 sysdev_unregister(&kvm_sysdev); 3569 sysdev_unregister(&kvm_sysdev);
3158out_free_3: 3570out_free_3:
3159 sysdev_class_unregister(&kvm_sysdev_class); 3571 sysdev_class_unregister(&kvm_sysdev_class);
@@ -3162,22 +3574,24 @@ out_free_2:
3162 unregister_cpu_notifier(&kvm_cpu_notifier); 3574 unregister_cpu_notifier(&kvm_cpu_notifier);
3163out_free_1: 3575out_free_1:
3164 on_each_cpu(hardware_disable, NULL, 0, 1); 3576 on_each_cpu(hardware_disable, NULL, 0, 1);
3165 kvm_arch_ops->hardware_unsetup(); 3577out_free_0:
3578 kvm_x86_ops->hardware_unsetup();
3166out: 3579out:
3167 kvm_arch_ops = NULL; 3580 kvm_x86_ops = NULL;
3168 return r; 3581 return r;
3169} 3582}
3170 3583
3171void kvm_exit_arch(void) 3584void kvm_exit_x86(void)
3172{ 3585{
3173 misc_deregister(&kvm_dev); 3586 misc_deregister(&kvm_dev);
3587 kmem_cache_destroy(kvm_vcpu_cache);
3174 sysdev_unregister(&kvm_sysdev); 3588 sysdev_unregister(&kvm_sysdev);
3175 sysdev_class_unregister(&kvm_sysdev_class); 3589 sysdev_class_unregister(&kvm_sysdev_class);
3176 unregister_reboot_notifier(&kvm_reboot_notifier); 3590 unregister_reboot_notifier(&kvm_reboot_notifier);
3177 unregister_cpu_notifier(&kvm_cpu_notifier); 3591 unregister_cpu_notifier(&kvm_cpu_notifier);
3178 on_each_cpu(hardware_disable, NULL, 0, 1); 3592 on_each_cpu(hardware_disable, NULL, 0, 1);
3179 kvm_arch_ops->hardware_unsetup(); 3593 kvm_x86_ops->hardware_unsetup();
3180 kvm_arch_ops = NULL; 3594 kvm_x86_ops = NULL;
3181} 3595}
3182 3596
3183static __init int kvm_init(void) 3597static __init int kvm_init(void)
@@ -3220,5 +3634,5 @@ static __exit void kvm_exit(void)
3220module_init(kvm_init) 3634module_init(kvm_init)
3221module_exit(kvm_exit) 3635module_exit(kvm_exit)
3222 3636
3223EXPORT_SYMBOL_GPL(kvm_init_arch); 3637EXPORT_SYMBOL_GPL(kvm_init_x86);
3224EXPORT_SYMBOL_GPL(kvm_exit_arch); 3638EXPORT_SYMBOL_GPL(kvm_exit_x86);