aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSrivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>2013-08-06 05:25:41 -0400
committerIngo Molnar <mingo@kernel.org>2013-08-14 07:12:35 -0400
commit92b75202e5e8790905f9441ccaea2456cc4621a5 (patch)
treec683864840122de1b5490a70bcffd29bfd6af683
parent1e20eb8557cdabf76473b09572be8aa8a2bb9bc0 (diff)
kvm: Paravirtual ticketlocks support for linux guests running on KVM hypervisor
During smp_boot_cpus paravirtualied KVM guest detects if the hypervisor has required feature (KVM_FEATURE_PV_UNHALT) to support pv-ticketlocks. If so, support for pv-ticketlocks is registered via pv_lock_ops. Use KVM_HC_KICK_CPU hypercall to wakeup waiting/halted vcpu. Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com> Link: http://lkml.kernel.org/r/20130810193849.GA25260@linux.vnet.ibm.com Signed-off-by: Suzuki Poulose <suzuki@in.ibm.com> [Raghu: check_zero race fix, enum for kvm_contention_stat, jumplabel related changes, addition of safe_halt for irq enabled case, bailout spinning in nmi case(Gleb)] Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com> Acked-by: Gleb Natapov <gleb@redhat.com> Acked-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
-rw-r--r--arch/x86/include/asm/kvm_para.h14
-rw-r--r--arch/x86/kernel/kvm.c262
2 files changed, 274 insertions, 2 deletions
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 695399f2d5eb..427afcbf3d55 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -118,10 +118,20 @@ void kvm_async_pf_task_wait(u32 token);
118void kvm_async_pf_task_wake(u32 token); 118void kvm_async_pf_task_wake(u32 token);
119u32 kvm_read_and_reset_pf_reason(void); 119u32 kvm_read_and_reset_pf_reason(void);
120extern void kvm_disable_steal_time(void); 120extern void kvm_disable_steal_time(void);
121#else 121
122#define kvm_guest_init() do { } while (0) 122#ifdef CONFIG_PARAVIRT_SPINLOCKS
123void __init kvm_spinlock_init(void);
124#else /* !CONFIG_PARAVIRT_SPINLOCKS */
125static inline void kvm_spinlock_init(void)
126{
127}
128#endif /* CONFIG_PARAVIRT_SPINLOCKS */
129
130#else /* CONFIG_KVM_GUEST */
131#define kvm_guest_init() do {} while (0)
123#define kvm_async_pf_task_wait(T) do {} while(0) 132#define kvm_async_pf_task_wait(T) do {} while(0)
124#define kvm_async_pf_task_wake(T) do {} while(0) 133#define kvm_async_pf_task_wake(T) do {} while(0)
134
125static inline u32 kvm_read_and_reset_pf_reason(void) 135static inline u32 kvm_read_and_reset_pf_reason(void)
126{ 136{
127 return 0; 137 return 0;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index a96d32cc55b8..b8ef6305cf35 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -34,6 +34,7 @@
34#include <linux/sched.h> 34#include <linux/sched.h>
35#include <linux/slab.h> 35#include <linux/slab.h>
36#include <linux/kprobes.h> 36#include <linux/kprobes.h>
37#include <linux/debugfs.h>
37#include <asm/timer.h> 38#include <asm/timer.h>
38#include <asm/cpu.h> 39#include <asm/cpu.h>
39#include <asm/traps.h> 40#include <asm/traps.h>
@@ -419,6 +420,7 @@ static void __init kvm_smp_prepare_boot_cpu(void)
419 WARN_ON(kvm_register_clock("primary cpu clock")); 420 WARN_ON(kvm_register_clock("primary cpu clock"));
420 kvm_guest_cpu_init(); 421 kvm_guest_cpu_init();
421 native_smp_prepare_boot_cpu(); 422 native_smp_prepare_boot_cpu();
423 kvm_spinlock_init();
422} 424}
423 425
424static void kvm_guest_cpu_online(void *dummy) 426static void kvm_guest_cpu_online(void *dummy)
@@ -523,3 +525,263 @@ static __init int activate_jump_labels(void)
523 return 0; 525 return 0;
524} 526}
525arch_initcall(activate_jump_labels); 527arch_initcall(activate_jump_labels);
528
529#ifdef CONFIG_PARAVIRT_SPINLOCKS
530
531/* Kick a cpu by its apicid. Used to wake up a halted vcpu */
532void kvm_kick_cpu(int cpu)
533{
534 int apicid;
535 unsigned long flags = 0;
536
537 apicid = per_cpu(x86_cpu_to_apicid, cpu);
538 kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
539}
540
541enum kvm_contention_stat {
542 TAKEN_SLOW,
543 TAKEN_SLOW_PICKUP,
544 RELEASED_SLOW,
545 RELEASED_SLOW_KICKED,
546 NR_CONTENTION_STATS
547};
548
549#ifdef CONFIG_KVM_DEBUG_FS
550#define HISTO_BUCKETS 30
551
552static struct kvm_spinlock_stats
553{
554 u32 contention_stats[NR_CONTENTION_STATS];
555 u32 histo_spin_blocked[HISTO_BUCKETS+1];
556 u64 time_blocked;
557} spinlock_stats;
558
559static u8 zero_stats;
560
561static inline void check_zero(void)
562{
563 u8 ret;
564 u8 old;
565
566 old = ACCESS_ONCE(zero_stats);
567 if (unlikely(old)) {
568 ret = cmpxchg(&zero_stats, old, 0);
569 /* This ensures only one fellow resets the stat */
570 if (ret == old)
571 memset(&spinlock_stats, 0, sizeof(spinlock_stats));
572 }
573}
574
575static inline void add_stats(enum kvm_contention_stat var, u32 val)
576{
577 check_zero();
578 spinlock_stats.contention_stats[var] += val;
579}
580
581
582static inline u64 spin_time_start(void)
583{
584 return sched_clock();
585}
586
587static void __spin_time_accum(u64 delta, u32 *array)
588{
589 unsigned index;
590
591 index = ilog2(delta);
592 check_zero();
593
594 if (index < HISTO_BUCKETS)
595 array[index]++;
596 else
597 array[HISTO_BUCKETS]++;
598}
599
600static inline void spin_time_accum_blocked(u64 start)
601{
602 u32 delta;
603
604 delta = sched_clock() - start;
605 __spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
606 spinlock_stats.time_blocked += delta;
607}
608
609static struct dentry *d_spin_debug;
610static struct dentry *d_kvm_debug;
611
612struct dentry *kvm_init_debugfs(void)
613{
614 d_kvm_debug = debugfs_create_dir("kvm", NULL);
615 if (!d_kvm_debug)
616 printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n");
617
618 return d_kvm_debug;
619}
620
621static int __init kvm_spinlock_debugfs(void)
622{
623 struct dentry *d_kvm;
624
625 d_kvm = kvm_init_debugfs();
626 if (d_kvm == NULL)
627 return -ENOMEM;
628
629 d_spin_debug = debugfs_create_dir("spinlocks", d_kvm);
630
631 debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
632
633 debugfs_create_u32("taken_slow", 0444, d_spin_debug,
634 &spinlock_stats.contention_stats[TAKEN_SLOW]);
635 debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
636 &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
637
638 debugfs_create_u32("released_slow", 0444, d_spin_debug,
639 &spinlock_stats.contention_stats[RELEASED_SLOW]);
640 debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
641 &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
642
643 debugfs_create_u64("time_blocked", 0444, d_spin_debug,
644 &spinlock_stats.time_blocked);
645
646 debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
647 spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
648
649 return 0;
650}
651fs_initcall(kvm_spinlock_debugfs);
652#else /* !CONFIG_KVM_DEBUG_FS */
653static inline void add_stats(enum kvm_contention_stat var, u32 val)
654{
655}
656
657static inline u64 spin_time_start(void)
658{
659 return 0;
660}
661
662static inline void spin_time_accum_blocked(u64 start)
663{
664}
665#endif /* CONFIG_KVM_DEBUG_FS */
666
667struct kvm_lock_waiting {
668 struct arch_spinlock *lock;
669 __ticket_t want;
670};
671
672/* cpus 'waiting' on a spinlock to become available */
673static cpumask_t waiting_cpus;
674
675/* Track spinlock on which a cpu is waiting */
676static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting);
677
678static void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
679{
680 struct kvm_lock_waiting *w;
681 int cpu;
682 u64 start;
683 unsigned long flags;
684
685 if (in_nmi())
686 return;
687
688 w = &__get_cpu_var(klock_waiting);
689 cpu = smp_processor_id();
690 start = spin_time_start();
691
692 /*
693 * Make sure an interrupt handler can't upset things in a
694 * partially setup state.
695 */
696 local_irq_save(flags);
697
698 /*
699 * The ordering protocol on this is that the "lock" pointer
700 * may only be set non-NULL if the "want" ticket is correct.
701 * If we're updating "want", we must first clear "lock".
702 */
703 w->lock = NULL;
704 smp_wmb();
705 w->want = want;
706 smp_wmb();
707 w->lock = lock;
708
709 add_stats(TAKEN_SLOW, 1);
710
711 /*
712 * This uses set_bit, which is atomic but we should not rely on its
713 * reordering gurantees. So barrier is needed after this call.
714 */
715 cpumask_set_cpu(cpu, &waiting_cpus);
716
717 barrier();
718
719 /*
720 * Mark entry to slowpath before doing the pickup test to make
721 * sure we don't deadlock with an unlocker.
722 */
723 __ticket_enter_slowpath(lock);
724
725 /*
726 * check again make sure it didn't become free while
727 * we weren't looking.
728 */
729 if (ACCESS_ONCE(lock->tickets.head) == want) {
730 add_stats(TAKEN_SLOW_PICKUP, 1);
731 goto out;
732 }
733
734 /*
735 * halt until it's our turn and kicked. Note that we do safe halt
736 * for irq enabled case to avoid hang when lock info is overwritten
737 * in irq spinlock slowpath and no spurious interrupt occur to save us.
738 */
739 if (arch_irqs_disabled_flags(flags))
740 halt();
741 else
742 safe_halt();
743
744out:
745 cpumask_clear_cpu(cpu, &waiting_cpus);
746 w->lock = NULL;
747 local_irq_restore(flags);
748 spin_time_accum_blocked(start);
749}
750PV_CALLEE_SAVE_REGS_THUNK(kvm_lock_spinning);
751
752/* Kick vcpu waiting on @lock->head to reach value @ticket */
753static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
754{
755 int cpu;
756
757 add_stats(RELEASED_SLOW, 1);
758 for_each_cpu(cpu, &waiting_cpus) {
759 const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu);
760 if (ACCESS_ONCE(w->lock) == lock &&
761 ACCESS_ONCE(w->want) == ticket) {
762 add_stats(RELEASED_SLOW_KICKED, 1);
763 kvm_kick_cpu(cpu);
764 break;
765 }
766 }
767}
768
769/*
770 * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
771 */
772void __init kvm_spinlock_init(void)
773{
774 if (!kvm_para_available())
775 return;
776 /* Does host kernel support KVM_FEATURE_PV_UNHALT? */
777 if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
778 return;
779
780 printk(KERN_INFO "KVM setup paravirtual spinlock\n");
781
782 static_key_slow_inc(&paravirt_ticketlocks_enabled);
783
784 pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
785 pv_lock_ops.unlock_kick = kvm_unlock_kick;
786}
787#endif /* CONFIG_PARAVIRT_SPINLOCKS */