diff options
author | Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com> | 2013-08-06 05:25:41 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-08-14 07:12:35 -0400 |
commit | 92b75202e5e8790905f9441ccaea2456cc4621a5 (patch) | |
tree | c683864840122de1b5490a70bcffd29bfd6af683 | |
parent | 1e20eb8557cdabf76473b09572be8aa8a2bb9bc0 (diff) |
kvm: Paravirtual ticketlocks support for linux guests running on KVM hypervisor
During smp_boot_cpus paravirtualied KVM guest detects if the hypervisor has
required feature (KVM_FEATURE_PV_UNHALT) to support pv-ticketlocks. If so,
support for pv-ticketlocks is registered via pv_lock_ops.
Use KVM_HC_KICK_CPU hypercall to wakeup waiting/halted vcpu.
Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/20130810193849.GA25260@linux.vnet.ibm.com
Signed-off-by: Suzuki Poulose <suzuki@in.ibm.com>
[Raghu: check_zero race fix, enum for kvm_contention_stat, jumplabel related changes,
addition of safe_halt for irq enabled case, bailout spinning in nmi case(Gleb)]
Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Acked-by: Gleb Natapov <gleb@redhat.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
-rw-r--r-- | arch/x86/include/asm/kvm_para.h | 14 | ||||
-rw-r--r-- | arch/x86/kernel/kvm.c | 262 |
2 files changed, 274 insertions, 2 deletions
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 695399f2d5eb..427afcbf3d55 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
@@ -118,10 +118,20 @@ void kvm_async_pf_task_wait(u32 token); | |||
118 | void kvm_async_pf_task_wake(u32 token); | 118 | void kvm_async_pf_task_wake(u32 token); |
119 | u32 kvm_read_and_reset_pf_reason(void); | 119 | u32 kvm_read_and_reset_pf_reason(void); |
120 | extern void kvm_disable_steal_time(void); | 120 | extern void kvm_disable_steal_time(void); |
121 | #else | 121 | |
122 | #define kvm_guest_init() do { } while (0) | 122 | #ifdef CONFIG_PARAVIRT_SPINLOCKS |
123 | void __init kvm_spinlock_init(void); | ||
124 | #else /* !CONFIG_PARAVIRT_SPINLOCKS */ | ||
125 | static inline void kvm_spinlock_init(void) | ||
126 | { | ||
127 | } | ||
128 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | ||
129 | |||
130 | #else /* CONFIG_KVM_GUEST */ | ||
131 | #define kvm_guest_init() do {} while (0) | ||
123 | #define kvm_async_pf_task_wait(T) do {} while(0) | 132 | #define kvm_async_pf_task_wait(T) do {} while(0) |
124 | #define kvm_async_pf_task_wake(T) do {} while(0) | 133 | #define kvm_async_pf_task_wake(T) do {} while(0) |
134 | |||
125 | static inline u32 kvm_read_and_reset_pf_reason(void) | 135 | static inline u32 kvm_read_and_reset_pf_reason(void) |
126 | { | 136 | { |
127 | return 0; | 137 | return 0; |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index a96d32cc55b8..b8ef6305cf35 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/sched.h> | 34 | #include <linux/sched.h> |
35 | #include <linux/slab.h> | 35 | #include <linux/slab.h> |
36 | #include <linux/kprobes.h> | 36 | #include <linux/kprobes.h> |
37 | #include <linux/debugfs.h> | ||
37 | #include <asm/timer.h> | 38 | #include <asm/timer.h> |
38 | #include <asm/cpu.h> | 39 | #include <asm/cpu.h> |
39 | #include <asm/traps.h> | 40 | #include <asm/traps.h> |
@@ -419,6 +420,7 @@ static void __init kvm_smp_prepare_boot_cpu(void) | |||
419 | WARN_ON(kvm_register_clock("primary cpu clock")); | 420 | WARN_ON(kvm_register_clock("primary cpu clock")); |
420 | kvm_guest_cpu_init(); | 421 | kvm_guest_cpu_init(); |
421 | native_smp_prepare_boot_cpu(); | 422 | native_smp_prepare_boot_cpu(); |
423 | kvm_spinlock_init(); | ||
422 | } | 424 | } |
423 | 425 | ||
424 | static void kvm_guest_cpu_online(void *dummy) | 426 | static void kvm_guest_cpu_online(void *dummy) |
@@ -523,3 +525,263 @@ static __init int activate_jump_labels(void) | |||
523 | return 0; | 525 | return 0; |
524 | } | 526 | } |
525 | arch_initcall(activate_jump_labels); | 527 | arch_initcall(activate_jump_labels); |
528 | |||
529 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | ||
530 | |||
531 | /* Kick a cpu by its apicid. Used to wake up a halted vcpu */ | ||
532 | void kvm_kick_cpu(int cpu) | ||
533 | { | ||
534 | int apicid; | ||
535 | unsigned long flags = 0; | ||
536 | |||
537 | apicid = per_cpu(x86_cpu_to_apicid, cpu); | ||
538 | kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid); | ||
539 | } | ||
540 | |||
541 | enum kvm_contention_stat { | ||
542 | TAKEN_SLOW, | ||
543 | TAKEN_SLOW_PICKUP, | ||
544 | RELEASED_SLOW, | ||
545 | RELEASED_SLOW_KICKED, | ||
546 | NR_CONTENTION_STATS | ||
547 | }; | ||
548 | |||
549 | #ifdef CONFIG_KVM_DEBUG_FS | ||
550 | #define HISTO_BUCKETS 30 | ||
551 | |||
552 | static struct kvm_spinlock_stats | ||
553 | { | ||
554 | u32 contention_stats[NR_CONTENTION_STATS]; | ||
555 | u32 histo_spin_blocked[HISTO_BUCKETS+1]; | ||
556 | u64 time_blocked; | ||
557 | } spinlock_stats; | ||
558 | |||
559 | static u8 zero_stats; | ||
560 | |||
561 | static inline void check_zero(void) | ||
562 | { | ||
563 | u8 ret; | ||
564 | u8 old; | ||
565 | |||
566 | old = ACCESS_ONCE(zero_stats); | ||
567 | if (unlikely(old)) { | ||
568 | ret = cmpxchg(&zero_stats, old, 0); | ||
569 | /* This ensures only one fellow resets the stat */ | ||
570 | if (ret == old) | ||
571 | memset(&spinlock_stats, 0, sizeof(spinlock_stats)); | ||
572 | } | ||
573 | } | ||
574 | |||
575 | static inline void add_stats(enum kvm_contention_stat var, u32 val) | ||
576 | { | ||
577 | check_zero(); | ||
578 | spinlock_stats.contention_stats[var] += val; | ||
579 | } | ||
580 | |||
581 | |||
582 | static inline u64 spin_time_start(void) | ||
583 | { | ||
584 | return sched_clock(); | ||
585 | } | ||
586 | |||
587 | static void __spin_time_accum(u64 delta, u32 *array) | ||
588 | { | ||
589 | unsigned index; | ||
590 | |||
591 | index = ilog2(delta); | ||
592 | check_zero(); | ||
593 | |||
594 | if (index < HISTO_BUCKETS) | ||
595 | array[index]++; | ||
596 | else | ||
597 | array[HISTO_BUCKETS]++; | ||
598 | } | ||
599 | |||
600 | static inline void spin_time_accum_blocked(u64 start) | ||
601 | { | ||
602 | u32 delta; | ||
603 | |||
604 | delta = sched_clock() - start; | ||
605 | __spin_time_accum(delta, spinlock_stats.histo_spin_blocked); | ||
606 | spinlock_stats.time_blocked += delta; | ||
607 | } | ||
608 | |||
609 | static struct dentry *d_spin_debug; | ||
610 | static struct dentry *d_kvm_debug; | ||
611 | |||
612 | struct dentry *kvm_init_debugfs(void) | ||
613 | { | ||
614 | d_kvm_debug = debugfs_create_dir("kvm", NULL); | ||
615 | if (!d_kvm_debug) | ||
616 | printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n"); | ||
617 | |||
618 | return d_kvm_debug; | ||
619 | } | ||
620 | |||
621 | static int __init kvm_spinlock_debugfs(void) | ||
622 | { | ||
623 | struct dentry *d_kvm; | ||
624 | |||
625 | d_kvm = kvm_init_debugfs(); | ||
626 | if (d_kvm == NULL) | ||
627 | return -ENOMEM; | ||
628 | |||
629 | d_spin_debug = debugfs_create_dir("spinlocks", d_kvm); | ||
630 | |||
631 | debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); | ||
632 | |||
633 | debugfs_create_u32("taken_slow", 0444, d_spin_debug, | ||
634 | &spinlock_stats.contention_stats[TAKEN_SLOW]); | ||
635 | debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, | ||
636 | &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]); | ||
637 | |||
638 | debugfs_create_u32("released_slow", 0444, d_spin_debug, | ||
639 | &spinlock_stats.contention_stats[RELEASED_SLOW]); | ||
640 | debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, | ||
641 | &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]); | ||
642 | |||
643 | debugfs_create_u64("time_blocked", 0444, d_spin_debug, | ||
644 | &spinlock_stats.time_blocked); | ||
645 | |||
646 | debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, | ||
647 | spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); | ||
648 | |||
649 | return 0; | ||
650 | } | ||
651 | fs_initcall(kvm_spinlock_debugfs); | ||
652 | #else /* !CONFIG_KVM_DEBUG_FS */ | ||
653 | static inline void add_stats(enum kvm_contention_stat var, u32 val) | ||
654 | { | ||
655 | } | ||
656 | |||
657 | static inline u64 spin_time_start(void) | ||
658 | { | ||
659 | return 0; | ||
660 | } | ||
661 | |||
662 | static inline void spin_time_accum_blocked(u64 start) | ||
663 | { | ||
664 | } | ||
665 | #endif /* CONFIG_KVM_DEBUG_FS */ | ||
666 | |||
667 | struct kvm_lock_waiting { | ||
668 | struct arch_spinlock *lock; | ||
669 | __ticket_t want; | ||
670 | }; | ||
671 | |||
672 | /* cpus 'waiting' on a spinlock to become available */ | ||
673 | static cpumask_t waiting_cpus; | ||
674 | |||
675 | /* Track spinlock on which a cpu is waiting */ | ||
676 | static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting); | ||
677 | |||
678 | static void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want) | ||
679 | { | ||
680 | struct kvm_lock_waiting *w; | ||
681 | int cpu; | ||
682 | u64 start; | ||
683 | unsigned long flags; | ||
684 | |||
685 | if (in_nmi()) | ||
686 | return; | ||
687 | |||
688 | w = &__get_cpu_var(klock_waiting); | ||
689 | cpu = smp_processor_id(); | ||
690 | start = spin_time_start(); | ||
691 | |||
692 | /* | ||
693 | * Make sure an interrupt handler can't upset things in a | ||
694 | * partially setup state. | ||
695 | */ | ||
696 | local_irq_save(flags); | ||
697 | |||
698 | /* | ||
699 | * The ordering protocol on this is that the "lock" pointer | ||
700 | * may only be set non-NULL if the "want" ticket is correct. | ||
701 | * If we're updating "want", we must first clear "lock". | ||
702 | */ | ||
703 | w->lock = NULL; | ||
704 | smp_wmb(); | ||
705 | w->want = want; | ||
706 | smp_wmb(); | ||
707 | w->lock = lock; | ||
708 | |||
709 | add_stats(TAKEN_SLOW, 1); | ||
710 | |||
711 | /* | ||
712 | * This uses set_bit, which is atomic but we should not rely on its | ||
713 | * reordering gurantees. So barrier is needed after this call. | ||
714 | */ | ||
715 | cpumask_set_cpu(cpu, &waiting_cpus); | ||
716 | |||
717 | barrier(); | ||
718 | |||
719 | /* | ||
720 | * Mark entry to slowpath before doing the pickup test to make | ||
721 | * sure we don't deadlock with an unlocker. | ||
722 | */ | ||
723 | __ticket_enter_slowpath(lock); | ||
724 | |||
725 | /* | ||
726 | * check again make sure it didn't become free while | ||
727 | * we weren't looking. | ||
728 | */ | ||
729 | if (ACCESS_ONCE(lock->tickets.head) == want) { | ||
730 | add_stats(TAKEN_SLOW_PICKUP, 1); | ||
731 | goto out; | ||
732 | } | ||
733 | |||
734 | /* | ||
735 | * halt until it's our turn and kicked. Note that we do safe halt | ||
736 | * for irq enabled case to avoid hang when lock info is overwritten | ||
737 | * in irq spinlock slowpath and no spurious interrupt occur to save us. | ||
738 | */ | ||
739 | if (arch_irqs_disabled_flags(flags)) | ||
740 | halt(); | ||
741 | else | ||
742 | safe_halt(); | ||
743 | |||
744 | out: | ||
745 | cpumask_clear_cpu(cpu, &waiting_cpus); | ||
746 | w->lock = NULL; | ||
747 | local_irq_restore(flags); | ||
748 | spin_time_accum_blocked(start); | ||
749 | } | ||
750 | PV_CALLEE_SAVE_REGS_THUNK(kvm_lock_spinning); | ||
751 | |||
752 | /* Kick vcpu waiting on @lock->head to reach value @ticket */ | ||
753 | static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket) | ||
754 | { | ||
755 | int cpu; | ||
756 | |||
757 | add_stats(RELEASED_SLOW, 1); | ||
758 | for_each_cpu(cpu, &waiting_cpus) { | ||
759 | const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu); | ||
760 | if (ACCESS_ONCE(w->lock) == lock && | ||
761 | ACCESS_ONCE(w->want) == ticket) { | ||
762 | add_stats(RELEASED_SLOW_KICKED, 1); | ||
763 | kvm_kick_cpu(cpu); | ||
764 | break; | ||
765 | } | ||
766 | } | ||
767 | } | ||
768 | |||
769 | /* | ||
770 | * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present. | ||
771 | */ | ||
772 | void __init kvm_spinlock_init(void) | ||
773 | { | ||
774 | if (!kvm_para_available()) | ||
775 | return; | ||
776 | /* Does host kernel support KVM_FEATURE_PV_UNHALT? */ | ||
777 | if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) | ||
778 | return; | ||
779 | |||
780 | printk(KERN_INFO "KVM setup paravirtual spinlock\n"); | ||
781 | |||
782 | static_key_slow_inc(¶virt_ticketlocks_enabled); | ||
783 | |||
784 | pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning); | ||
785 | pv_lock_ops.unlock_kick = kvm_unlock_kick; | ||
786 | } | ||
787 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | ||