aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/kvm
diff options
context:
space:
mode:
authorAvi Kivity <avi@qumranet.com>2007-07-11 11:17:21 -0400
committerAvi Kivity <avi@qumranet.com>2007-10-13 04:18:20 -0400
commit15ad71460d75fd7ca41bb248a2310f3f39b302ba (patch)
tree1ea549e5c5629561c121a54def146fb6b706c2d4 /drivers/kvm
parent519ef35341b4f360f072ea74e398b70a5a2fc270 (diff)
KVM: Use the scheduler preemption notifiers to make kvm preemptible
Current kvm disables preemption while the new virtualization registers are in use. This of course is not very good for latency sensitive workloads (one use of virtualization is to offload user interface and other latency insensitive stuff to a container, so that it is easier to analyze the remaining workload). This patch re-enables preemption for kvm; preemption is now only disabled when switching the registers in and out, and during the switch to guest mode and back. Contains fixes from Shaohua Li <shaohua.li@intel.com>. Signed-off-by: Avi Kivity <avi@qumranet.com>
Diffstat (limited to 'drivers/kvm')
-rw-r--r--drivers/kvm/Kconfig1
-rw-r--r--drivers/kvm/kvm.h4
-rw-r--r--drivers/kvm/kvm_main.c43
-rw-r--r--drivers/kvm/mmu.c2
-rw-r--r--drivers/kvm/svm.c6
-rw-r--r--drivers/kvm/vmx.c22
6 files changed, 56 insertions, 22 deletions
diff --git a/drivers/kvm/Kconfig b/drivers/kvm/Kconfig
index 0a419a0de603..8749fa4ffcee 100644
--- a/drivers/kvm/Kconfig
+++ b/drivers/kvm/Kconfig
@@ -17,6 +17,7 @@ if VIRTUALIZATION
17config KVM 17config KVM
18 tristate "Kernel-based Virtual Machine (KVM) support" 18 tristate "Kernel-based Virtual Machine (KVM) support"
19 depends on X86 && EXPERIMENTAL 19 depends on X86 && EXPERIMENTAL
20 select PREEMPT_NOTIFIERS
20 select ANON_INODES 21 select ANON_INODES
21 ---help--- 22 ---help---
22 Support hosting fully virtualized guest machines using hardware 23 Support hosting fully virtualized guest machines using hardware
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index e92c84b04c1f..0667183ecbed 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -13,6 +13,7 @@
13#include <linux/signal.h> 13#include <linux/signal.h>
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/preempt.h>
16#include <asm/signal.h> 17#include <asm/signal.h>
17 18
18#include <linux/kvm.h> 19#include <linux/kvm.h>
@@ -301,6 +302,7 @@ void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
301 302
302struct kvm_vcpu { 303struct kvm_vcpu {
303 struct kvm *kvm; 304 struct kvm *kvm;
305 struct preempt_notifier preempt_notifier;
304 int vcpu_id; 306 int vcpu_id;
305 struct mutex mutex; 307 struct mutex mutex;
306 int cpu; 308 int cpu;
@@ -429,7 +431,7 @@ struct kvm_arch_ops {
429 struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); 431 struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
430 void (*vcpu_free)(struct kvm_vcpu *vcpu); 432 void (*vcpu_free)(struct kvm_vcpu *vcpu);
431 433
432 void (*vcpu_load)(struct kvm_vcpu *vcpu); 434 void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
433 void (*vcpu_put)(struct kvm_vcpu *vcpu); 435 void (*vcpu_put)(struct kvm_vcpu *vcpu);
434 void (*vcpu_decache)(struct kvm_vcpu *vcpu); 436 void (*vcpu_decache)(struct kvm_vcpu *vcpu);
435 437
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 20947462f401..6035e6d35417 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -54,6 +54,8 @@ static cpumask_t cpus_hardware_enabled;
54 54
55struct kvm_arch_ops *kvm_arch_ops; 55struct kvm_arch_ops *kvm_arch_ops;
56 56
57static __read_mostly struct preempt_ops kvm_preempt_ops;
58
57#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x) 59#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
58 60
59static struct kvm_stats_debugfs_item { 61static struct kvm_stats_debugfs_item {
@@ -239,13 +241,21 @@ EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
239 */ 241 */
240static void vcpu_load(struct kvm_vcpu *vcpu) 242static void vcpu_load(struct kvm_vcpu *vcpu)
241{ 243{
244 int cpu;
245
242 mutex_lock(&vcpu->mutex); 246 mutex_lock(&vcpu->mutex);
243 kvm_arch_ops->vcpu_load(vcpu); 247 cpu = get_cpu();
248 preempt_notifier_register(&vcpu->preempt_notifier);
249 kvm_arch_ops->vcpu_load(vcpu, cpu);
250 put_cpu();
244} 251}
245 252
246static void vcpu_put(struct kvm_vcpu *vcpu) 253static void vcpu_put(struct kvm_vcpu *vcpu)
247{ 254{
255 preempt_disable();
248 kvm_arch_ops->vcpu_put(vcpu); 256 kvm_arch_ops->vcpu_put(vcpu);
257 preempt_notifier_unregister(&vcpu->preempt_notifier);
258 preempt_enable();
249 mutex_unlock(&vcpu->mutex); 259 mutex_unlock(&vcpu->mutex);
250} 260}
251 261
@@ -1672,9 +1682,7 @@ void kvm_resched(struct kvm_vcpu *vcpu)
1672{ 1682{
1673 if (!need_resched()) 1683 if (!need_resched())
1674 return; 1684 return;
1675 vcpu_put(vcpu);
1676 cond_resched(); 1685 cond_resched();
1677 vcpu_load(vcpu);
1678} 1686}
1679EXPORT_SYMBOL_GPL(kvm_resched); 1687EXPORT_SYMBOL_GPL(kvm_resched);
1680 1688
@@ -1722,11 +1730,9 @@ static int pio_copy_data(struct kvm_vcpu *vcpu)
1722 unsigned bytes; 1730 unsigned bytes;
1723 int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1; 1731 int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1;
1724 1732
1725 kvm_arch_ops->vcpu_put(vcpu);
1726 q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE, 1733 q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
1727 PAGE_KERNEL); 1734 PAGE_KERNEL);
1728 if (!q) { 1735 if (!q) {
1729 kvm_arch_ops->vcpu_load(vcpu);
1730 free_pio_guest_pages(vcpu); 1736 free_pio_guest_pages(vcpu);
1731 return -ENOMEM; 1737 return -ENOMEM;
1732 } 1738 }
@@ -1738,7 +1744,6 @@ static int pio_copy_data(struct kvm_vcpu *vcpu)
1738 memcpy(p, q, bytes); 1744 memcpy(p, q, bytes);
1739 q -= vcpu->pio.guest_page_offset; 1745 q -= vcpu->pio.guest_page_offset;
1740 vunmap(q); 1746 vunmap(q);
1741 kvm_arch_ops->vcpu_load(vcpu);
1742 free_pio_guest_pages(vcpu); 1747 free_pio_guest_pages(vcpu);
1743 return 0; 1748 return 0;
1744} 1749}
@@ -2413,6 +2418,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
2413 if (IS_ERR(vcpu)) 2418 if (IS_ERR(vcpu))
2414 return PTR_ERR(vcpu); 2419 return PTR_ERR(vcpu);
2415 2420
2421 preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
2422
2416 vcpu_load(vcpu); 2423 vcpu_load(vcpu);
2417 r = kvm_mmu_setup(vcpu); 2424 r = kvm_mmu_setup(vcpu);
2418 vcpu_put(vcpu); 2425 vcpu_put(vcpu);
@@ -3145,6 +3152,27 @@ static struct sys_device kvm_sysdev = {
3145 3152
3146hpa_t bad_page_address; 3153hpa_t bad_page_address;
3147 3154
3155static inline
3156struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
3157{
3158 return container_of(pn, struct kvm_vcpu, preempt_notifier);
3159}
3160
3161static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
3162{
3163 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
3164
3165 kvm_arch_ops->vcpu_load(vcpu, cpu);
3166}
3167
3168static void kvm_sched_out(struct preempt_notifier *pn,
3169 struct task_struct *next)
3170{
3171 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
3172
3173 kvm_arch_ops->vcpu_put(vcpu);
3174}
3175
3148int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) 3176int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
3149{ 3177{
3150 int r; 3178 int r;
@@ -3191,6 +3219,9 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
3191 goto out_free; 3219 goto out_free;
3192 } 3220 }
3193 3221
3222 kvm_preempt_ops.sched_in = kvm_sched_in;
3223 kvm_preempt_ops.sched_out = kvm_sched_out;
3224
3194 return r; 3225 return r;
3195 3226
3196out_free: 3227out_free:
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index 5437de2aa2d8..396c736e546b 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -276,9 +276,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
276 kvm_mmu_free_some_pages(vcpu); 276 kvm_mmu_free_some_pages(vcpu);
277 if (r < 0) { 277 if (r < 0) {
278 spin_unlock(&vcpu->kvm->lock); 278 spin_unlock(&vcpu->kvm->lock);
279 kvm_arch_ops->vcpu_put(vcpu);
280 r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL); 279 r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL);
281 kvm_arch_ops->vcpu_load(vcpu);
282 spin_lock(&vcpu->kvm->lock); 280 spin_lock(&vcpu->kvm->lock);
283 kvm_mmu_free_some_pages(vcpu); 281 kvm_mmu_free_some_pages(vcpu);
284 } 282 }
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 0feec8558599..3997bbd78fb7 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -625,12 +625,11 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
625 kfree(svm); 625 kfree(svm);
626} 626}
627 627
628static void svm_vcpu_load(struct kvm_vcpu *vcpu) 628static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
629{ 629{
630 struct vcpu_svm *svm = to_svm(vcpu); 630 struct vcpu_svm *svm = to_svm(vcpu);
631 int cpu, i; 631 int i;
632 632
633 cpu = get_cpu();
634 if (unlikely(cpu != vcpu->cpu)) { 633 if (unlikely(cpu != vcpu->cpu)) {
635 u64 tsc_this, delta; 634 u64 tsc_this, delta;
636 635
@@ -657,7 +656,6 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
657 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); 656 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
658 657
659 rdtscll(vcpu->host_tsc); 658 rdtscll(vcpu->host_tsc);
660 put_cpu();
661} 659}
662 660
663static void svm_vcpu_decache(struct kvm_vcpu *vcpu) 661static void svm_vcpu_decache(struct kvm_vcpu *vcpu)
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 18f9b0b3fb1f..8c87d20f8e39 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -396,6 +396,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
396static void vmx_load_host_state(struct kvm_vcpu *vcpu) 396static void vmx_load_host_state(struct kvm_vcpu *vcpu)
397{ 397{
398 struct vcpu_vmx *vmx = to_vmx(vcpu); 398 struct vcpu_vmx *vmx = to_vmx(vcpu);
399 unsigned long flags;
399 400
400 if (!vmx->host_state.loaded) 401 if (!vmx->host_state.loaded)
401 return; 402 return;
@@ -408,12 +409,12 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu)
408 * If we have to reload gs, we must take care to 409 * If we have to reload gs, we must take care to
409 * preserve our gs base. 410 * preserve our gs base.
410 */ 411 */
411 local_irq_disable(); 412 local_irq_save(flags);
412 load_gs(vmx->host_state.gs_sel); 413 load_gs(vmx->host_state.gs_sel);
413#ifdef CONFIG_X86_64 414#ifdef CONFIG_X86_64
414 wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); 415 wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
415#endif 416#endif
416 local_irq_enable(); 417 local_irq_restore(flags);
417 418
418 reload_tss(); 419 reload_tss();
419 } 420 }
@@ -427,15 +428,12 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu)
427 * Switches to specified vcpu, until a matching vcpu_put(), but assumes 428 * Switches to specified vcpu, until a matching vcpu_put(), but assumes
428 * vcpu mutex is already taken. 429 * vcpu mutex is already taken.
429 */ 430 */
430static void vmx_vcpu_load(struct kvm_vcpu *vcpu) 431static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
431{ 432{
432 struct vcpu_vmx *vmx = to_vmx(vcpu); 433 struct vcpu_vmx *vmx = to_vmx(vcpu);
433 u64 phys_addr = __pa(vmx->vmcs); 434 u64 phys_addr = __pa(vmx->vmcs);
434 int cpu;
435 u64 tsc_this, delta; 435 u64 tsc_this, delta;
436 436
437 cpu = get_cpu();
438
439 if (vcpu->cpu != cpu) 437 if (vcpu->cpu != cpu)
440 vcpu_clear(vcpu); 438 vcpu_clear(vcpu);
441 439
@@ -480,7 +478,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
480{ 478{
481 vmx_load_host_state(vcpu); 479 vmx_load_host_state(vcpu);
482 kvm_put_guest_fpu(vcpu); 480 kvm_put_guest_fpu(vcpu);
483 put_cpu();
484} 481}
485 482
486static void vmx_fpu_activate(struct kvm_vcpu *vcpu) 483static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
@@ -2127,6 +2124,8 @@ again:
2127 if (unlikely(r)) 2124 if (unlikely(r))
2128 goto out; 2125 goto out;
2129 2126
2127 preempt_disable();
2128
2130 if (!vcpu->mmio_read_completed) 2129 if (!vcpu->mmio_read_completed)
2131 do_interrupt_requests(vcpu, kvm_run); 2130 do_interrupt_requests(vcpu, kvm_run);
2132 2131
@@ -2269,6 +2268,9 @@ again:
2269 vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; 2268 vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
2270 2269
2271 asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); 2270 asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
2271 vmx->launched = 1;
2272
2273 preempt_enable();
2272 2274
2273 if (unlikely(fail)) { 2275 if (unlikely(fail)) {
2274 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 2276 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
@@ -2283,7 +2285,6 @@ again:
2283 if (unlikely(prof_on == KVM_PROFILING)) 2285 if (unlikely(prof_on == KVM_PROFILING))
2284 profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); 2286 profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP));
2285 2287
2286 vmx->launched = 1;
2287 r = kvm_handle_exit(kvm_run, vcpu); 2288 r = kvm_handle_exit(kvm_run, vcpu);
2288 if (r > 0) { 2289 if (r > 0) {
2289 /* Give scheduler a change to reschedule. */ 2290 /* Give scheduler a change to reschedule. */
@@ -2372,6 +2373,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
2372{ 2373{
2373 int err; 2374 int err;
2374 struct vcpu_vmx *vmx = kzalloc(sizeof(*vmx), GFP_KERNEL); 2375 struct vcpu_vmx *vmx = kzalloc(sizeof(*vmx), GFP_KERNEL);
2376 int cpu;
2375 2377
2376 if (!vmx) 2378 if (!vmx)
2377 return ERR_PTR(-ENOMEM); 2379 return ERR_PTR(-ENOMEM);
@@ -2396,9 +2398,11 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
2396 2398
2397 vmcs_clear(vmx->vmcs); 2399 vmcs_clear(vmx->vmcs);
2398 2400
2399 vmx_vcpu_load(&vmx->vcpu); 2401 cpu = get_cpu();
2402 vmx_vcpu_load(&vmx->vcpu, cpu);
2400 err = vmx_vcpu_setup(&vmx->vcpu); 2403 err = vmx_vcpu_setup(&vmx->vcpu);
2401 vmx_vcpu_put(&vmx->vcpu); 2404 vmx_vcpu_put(&vmx->vcpu);
2405 put_cpu();
2402 if (err) 2406 if (err)
2403 goto free_vmcs; 2407 goto free_vmcs;
2404 2408