aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2016-05-18 14:43:02 -0400
committerIngo Molnar <mingo@kernel.org>2016-09-30 04:56:00 -0400
commitcfd8983f03c7b2f977faab8dfc4ec5f6dbf9c1f3 (patch)
treef0343a19a513bfd5727c8133e62dc4d3e42bc5eb
parent0b429e18c284af3e7a39f8ec44d95116c473fef8 (diff)
x86, locking/spinlocks: Remove ticket (spin)lock implementation
We've unconditionally used the queued spinlock for many releases now. Its time to remove the old ticket lock code. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Waiman Long <waiman.long@hpe.com> Cc: Waiman.Long@hpe.com Cc: david.vrabel@citrix.com Cc: dhowells@redhat.com Cc: pbonzini@redhat.com Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/20160518184302.GO3193@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/include/asm/paravirt.h18
-rw-r--r--arch/x86/include/asm/paravirt_types.h7
-rw-r--r--arch/x86/include/asm/spinlock.h174
-rw-r--r--arch/x86/include/asm/spinlock_types.h13
-rw-r--r--arch/x86/kernel/kvm.c245
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c7
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c4
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c4
-rw-r--r--arch/x86/xen/spinlock.c250
10 files changed, 6 insertions, 719 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2a1f0ce7c59a..0cc8811af4e0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -705,7 +705,6 @@ config PARAVIRT_DEBUG
705config PARAVIRT_SPINLOCKS 705config PARAVIRT_SPINLOCKS
706 bool "Paravirtualization layer for spinlocks" 706 bool "Paravirtualization layer for spinlocks"
707 depends on PARAVIRT && SMP 707 depends on PARAVIRT && SMP
708 select UNINLINE_SPIN_UNLOCK if !QUEUED_SPINLOCKS
709 ---help--- 708 ---help---
710 Paravirtualized spinlocks allow a pvops backend to replace the 709 Paravirtualized spinlocks allow a pvops backend to replace the
711 spinlock implementation with something virtualization-friendly 710 spinlock implementation with something virtualization-friendly
@@ -718,7 +717,7 @@ config PARAVIRT_SPINLOCKS
718 717
719config QUEUED_LOCK_STAT 718config QUEUED_LOCK_STAT
720 bool "Paravirt queued spinlock statistics" 719 bool "Paravirt queued spinlock statistics"
721 depends on PARAVIRT_SPINLOCKS && DEBUG_FS && QUEUED_SPINLOCKS 720 depends on PARAVIRT_SPINLOCKS && DEBUG_FS
722 ---help--- 721 ---help---
723 Enable the collection of statistical data on the slowpath 722 Enable the collection of statistical data on the slowpath
724 behavior of paravirtualized queued spinlocks and report 723 behavior of paravirtualized queued spinlocks and report
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 2970d22d7766..4cd8db05301f 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -661,8 +661,6 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
661 661
662#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) 662#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
663 663
664#ifdef CONFIG_QUEUED_SPINLOCKS
665
666static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock, 664static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock,
667 u32 val) 665 u32 val)
668{ 666{
@@ -684,22 +682,6 @@ static __always_inline void pv_kick(int cpu)
684 PVOP_VCALL1(pv_lock_ops.kick, cpu); 682 PVOP_VCALL1(pv_lock_ops.kick, cpu);
685} 683}
686 684
687#else /* !CONFIG_QUEUED_SPINLOCKS */
688
689static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
690 __ticket_t ticket)
691{
692 PVOP_VCALLEE2(pv_lock_ops.lock_spinning, lock, ticket);
693}
694
695static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
696 __ticket_t ticket)
697{
698 PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket);
699}
700
701#endif /* CONFIG_QUEUED_SPINLOCKS */
702
703#endif /* SMP && PARAVIRT_SPINLOCKS */ 685#endif /* SMP && PARAVIRT_SPINLOCKS */
704 686
705#ifdef CONFIG_X86_32 687#ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 7fa9e7740ba3..60aac60ba25f 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -301,23 +301,16 @@ struct pv_mmu_ops {
301struct arch_spinlock; 301struct arch_spinlock;
302#ifdef CONFIG_SMP 302#ifdef CONFIG_SMP
303#include <asm/spinlock_types.h> 303#include <asm/spinlock_types.h>
304#else
305typedef u16 __ticket_t;
306#endif 304#endif
307 305
308struct qspinlock; 306struct qspinlock;
309 307
310struct pv_lock_ops { 308struct pv_lock_ops {
311#ifdef CONFIG_QUEUED_SPINLOCKS
312 void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val); 309 void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val);
313 struct paravirt_callee_save queued_spin_unlock; 310 struct paravirt_callee_save queued_spin_unlock;
314 311
315 void (*wait)(u8 *ptr, u8 val); 312 void (*wait)(u8 *ptr, u8 val);
316 void (*kick)(int cpu); 313 void (*kick)(int cpu);
317#else /* !CONFIG_QUEUED_SPINLOCKS */
318 struct paravirt_callee_save lock_spinning;
319 void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket);
320#endif /* !CONFIG_QUEUED_SPINLOCKS */
321}; 314};
322 315
323/* This contains all the paravirt structures: we get a convenient 316/* This contains all the paravirt structures: we get a convenient
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index be0a05913b91..921bea7a2708 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -20,187 +20,13 @@
20 * (the type definitions are in asm/spinlock_types.h) 20 * (the type definitions are in asm/spinlock_types.h)
21 */ 21 */
22 22
23#ifdef CONFIG_X86_32
24# define LOCK_PTR_REG "a"
25#else
26# define LOCK_PTR_REG "D"
27#endif
28
29#if defined(CONFIG_X86_32) && (defined(CONFIG_X86_PPRO_FENCE))
30/*
31 * On PPro SMP, we use a locked operation to unlock
32 * (PPro errata 66, 92)
33 */
34# define UNLOCK_LOCK_PREFIX LOCK_PREFIX
35#else
36# define UNLOCK_LOCK_PREFIX
37#endif
38
39/* How long a lock should spin before we consider blocking */ 23/* How long a lock should spin before we consider blocking */
40#define SPIN_THRESHOLD (1 << 15) 24#define SPIN_THRESHOLD (1 << 15)
41 25
42extern struct static_key paravirt_ticketlocks_enabled; 26extern struct static_key paravirt_ticketlocks_enabled;
43static __always_inline bool static_key_false(struct static_key *key); 27static __always_inline bool static_key_false(struct static_key *key);
44 28
45#ifdef CONFIG_QUEUED_SPINLOCKS
46#include <asm/qspinlock.h> 29#include <asm/qspinlock.h>
47#else
48
49#ifdef CONFIG_PARAVIRT_SPINLOCKS
50
51static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
52{
53 set_bit(0, (volatile unsigned long *)&lock->tickets.head);
54}
55
56#else /* !CONFIG_PARAVIRT_SPINLOCKS */
57static __always_inline void __ticket_lock_spinning(arch_spinlock_t *lock,
58 __ticket_t ticket)
59{
60}
61static inline void __ticket_unlock_kick(arch_spinlock_t *lock,
62 __ticket_t ticket)
63{
64}
65
66#endif /* CONFIG_PARAVIRT_SPINLOCKS */
67static inline int __tickets_equal(__ticket_t one, __ticket_t two)
68{
69 return !((one ^ two) & ~TICKET_SLOWPATH_FLAG);
70}
71
72static inline void __ticket_check_and_clear_slowpath(arch_spinlock_t *lock,
73 __ticket_t head)
74{
75 if (head & TICKET_SLOWPATH_FLAG) {
76 arch_spinlock_t old, new;
77
78 old.tickets.head = head;
79 new.tickets.head = head & ~TICKET_SLOWPATH_FLAG;
80 old.tickets.tail = new.tickets.head + TICKET_LOCK_INC;
81 new.tickets.tail = old.tickets.tail;
82
83 /* try to clear slowpath flag when there are no contenders */
84 cmpxchg(&lock->head_tail, old.head_tail, new.head_tail);
85 }
86}
87
88static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
89{
90 return __tickets_equal(lock.tickets.head, lock.tickets.tail);
91}
92
93/*
94 * Ticket locks are conceptually two parts, one indicating the current head of
95 * the queue, and the other indicating the current tail. The lock is acquired
96 * by atomically noting the tail and incrementing it by one (thus adding
97 * ourself to the queue and noting our position), then waiting until the head
98 * becomes equal to the the initial value of the tail.
99 *
100 * We use an xadd covering *both* parts of the lock, to increment the tail and
101 * also load the position of the head, which takes care of memory ordering
102 * issues and should be optimal for the uncontended case. Note the tail must be
103 * in the high part, because a wide xadd increment of the low part would carry
104 * up and contaminate the high part.
105 */
106static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
107{
108 register struct __raw_tickets inc = { .tail = TICKET_LOCK_INC };
109
110 inc = xadd(&lock->tickets, inc);
111 if (likely(inc.head == inc.tail))
112 goto out;
113
114 for (;;) {
115 unsigned count = SPIN_THRESHOLD;
116
117 do {
118 inc.head = READ_ONCE(lock->tickets.head);
119 if (__tickets_equal(inc.head, inc.tail))
120 goto clear_slowpath;
121 cpu_relax();
122 } while (--count);
123 __ticket_lock_spinning(lock, inc.tail);
124 }
125clear_slowpath:
126 __ticket_check_and_clear_slowpath(lock, inc.head);
127out:
128 barrier(); /* make sure nothing creeps before the lock is taken */
129}
130
131static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
132{
133 arch_spinlock_t old, new;
134
135 old.tickets = READ_ONCE(lock->tickets);
136 if (!__tickets_equal(old.tickets.head, old.tickets.tail))
137 return 0;
138
139 new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT);
140 new.head_tail &= ~TICKET_SLOWPATH_FLAG;
141
142 /* cmpxchg is a full barrier, so nothing can move before it */
143 return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
144}
145
146static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
147{
148 if (TICKET_SLOWPATH_FLAG &&
149 static_key_false(&paravirt_ticketlocks_enabled)) {
150 __ticket_t head;
151
152 BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
153
154 head = xadd(&lock->tickets.head, TICKET_LOCK_INC);
155
156 if (unlikely(head & TICKET_SLOWPATH_FLAG)) {
157 head &= ~TICKET_SLOWPATH_FLAG;
158 __ticket_unlock_kick(lock, (head + TICKET_LOCK_INC));
159 }
160 } else
161 __add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX);
162}
163
164static inline int arch_spin_is_locked(arch_spinlock_t *lock)
165{
166 struct __raw_tickets tmp = READ_ONCE(lock->tickets);
167
168 return !__tickets_equal(tmp.tail, tmp.head);
169}
170
171static inline int arch_spin_is_contended(arch_spinlock_t *lock)
172{
173 struct __raw_tickets tmp = READ_ONCE(lock->tickets);
174
175 tmp.head &= ~TICKET_SLOWPATH_FLAG;
176 return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
177}
178#define arch_spin_is_contended arch_spin_is_contended
179
180static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
181 unsigned long flags)
182{
183 arch_spin_lock(lock);
184}
185
186static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
187{
188 __ticket_t head = READ_ONCE(lock->tickets.head);
189
190 for (;;) {
191 struct __raw_tickets tmp = READ_ONCE(lock->tickets);
192 /*
193 * We need to check "unlocked" in a loop, tmp.head == head
194 * can be false positive because of overflow.
195 */
196 if (__tickets_equal(tmp.head, tmp.tail) ||
197 !__tickets_equal(tmp.head, head))
198 break;
199
200 cpu_relax();
201 }
202}
203#endif /* CONFIG_QUEUED_SPINLOCKS */
204 30
205/* 31/*
206 * Read-write spinlocks, allowing multiple readers 32 * Read-write spinlocks, allowing multiple readers
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 65c3e37f879a..25311ebb446c 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -23,20 +23,7 @@ typedef u32 __ticketpair_t;
23 23
24#define TICKET_SHIFT (sizeof(__ticket_t) * 8) 24#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
25 25
26#ifdef CONFIG_QUEUED_SPINLOCKS
27#include <asm-generic/qspinlock_types.h> 26#include <asm-generic/qspinlock_types.h>
28#else
29typedef struct arch_spinlock {
30 union {
31 __ticketpair_t head_tail;
32 struct __raw_tickets {
33 __ticket_t head, tail;
34 } tickets;
35 };
36} arch_spinlock_t;
37
38#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
39#endif /* CONFIG_QUEUED_SPINLOCKS */
40 27
41#include <asm-generic/qrwlock_types.h> 28#include <asm-generic/qrwlock_types.h>
42 29
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 1726c4c12336..865058d087ac 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -575,9 +575,6 @@ static void kvm_kick_cpu(int cpu)
575 kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid); 575 kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
576} 576}
577 577
578
579#ifdef CONFIG_QUEUED_SPINLOCKS
580
581#include <asm/qspinlock.h> 578#include <asm/qspinlock.h>
582 579
583static void kvm_wait(u8 *ptr, u8 val) 580static void kvm_wait(u8 *ptr, u8 val)
@@ -606,243 +603,6 @@ out:
606 local_irq_restore(flags); 603 local_irq_restore(flags);
607} 604}
608 605
609#else /* !CONFIG_QUEUED_SPINLOCKS */
610
611enum kvm_contention_stat {
612 TAKEN_SLOW,
613 TAKEN_SLOW_PICKUP,
614 RELEASED_SLOW,
615 RELEASED_SLOW_KICKED,
616 NR_CONTENTION_STATS
617};
618
619#ifdef CONFIG_KVM_DEBUG_FS
620#define HISTO_BUCKETS 30
621
622static struct kvm_spinlock_stats
623{
624 u32 contention_stats[NR_CONTENTION_STATS];
625 u32 histo_spin_blocked[HISTO_BUCKETS+1];
626 u64 time_blocked;
627} spinlock_stats;
628
629static u8 zero_stats;
630
631static inline void check_zero(void)
632{
633 u8 ret;
634 u8 old;
635
636 old = READ_ONCE(zero_stats);
637 if (unlikely(old)) {
638 ret = cmpxchg(&zero_stats, old, 0);
639 /* This ensures only one fellow resets the stat */
640 if (ret == old)
641 memset(&spinlock_stats, 0, sizeof(spinlock_stats));
642 }
643}
644
645static inline void add_stats(enum kvm_contention_stat var, u32 val)
646{
647 check_zero();
648 spinlock_stats.contention_stats[var] += val;
649}
650
651
652static inline u64 spin_time_start(void)
653{
654 return sched_clock();
655}
656
657static void __spin_time_accum(u64 delta, u32 *array)
658{
659 unsigned index;
660
661 index = ilog2(delta);
662 check_zero();
663
664 if (index < HISTO_BUCKETS)
665 array[index]++;
666 else
667 array[HISTO_BUCKETS]++;
668}
669
670static inline void spin_time_accum_blocked(u64 start)
671{
672 u32 delta;
673
674 delta = sched_clock() - start;
675 __spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
676 spinlock_stats.time_blocked += delta;
677}
678
679static struct dentry *d_spin_debug;
680static struct dentry *d_kvm_debug;
681
682static struct dentry *kvm_init_debugfs(void)
683{
684 d_kvm_debug = debugfs_create_dir("kvm-guest", NULL);
685 if (!d_kvm_debug)
686 printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n");
687
688 return d_kvm_debug;
689}
690
691static int __init kvm_spinlock_debugfs(void)
692{
693 struct dentry *d_kvm;
694
695 d_kvm = kvm_init_debugfs();
696 if (d_kvm == NULL)
697 return -ENOMEM;
698
699 d_spin_debug = debugfs_create_dir("spinlocks", d_kvm);
700
701 debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
702
703 debugfs_create_u32("taken_slow", 0444, d_spin_debug,
704 &spinlock_stats.contention_stats[TAKEN_SLOW]);
705 debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
706 &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
707
708 debugfs_create_u32("released_slow", 0444, d_spin_debug,
709 &spinlock_stats.contention_stats[RELEASED_SLOW]);
710 debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
711 &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
712
713 debugfs_create_u64("time_blocked", 0444, d_spin_debug,
714 &spinlock_stats.time_blocked);
715
716 debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
717 spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
718
719 return 0;
720}
721fs_initcall(kvm_spinlock_debugfs);
722#else /* !CONFIG_KVM_DEBUG_FS */
723static inline void add_stats(enum kvm_contention_stat var, u32 val)
724{
725}
726
727static inline u64 spin_time_start(void)
728{
729 return 0;
730}
731
732static inline void spin_time_accum_blocked(u64 start)
733{
734}
735#endif /* CONFIG_KVM_DEBUG_FS */
736
737struct kvm_lock_waiting {
738 struct arch_spinlock *lock;
739 __ticket_t want;
740};
741
742/* cpus 'waiting' on a spinlock to become available */
743static cpumask_t waiting_cpus;
744
745/* Track spinlock on which a cpu is waiting */
746static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting);
747
748__visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
749{
750 struct kvm_lock_waiting *w;
751 int cpu;
752 u64 start;
753 unsigned long flags;
754 __ticket_t head;
755
756 if (in_nmi())
757 return;
758
759 w = this_cpu_ptr(&klock_waiting);
760 cpu = smp_processor_id();
761 start = spin_time_start();
762
763 /*
764 * Make sure an interrupt handler can't upset things in a
765 * partially setup state.
766 */
767 local_irq_save(flags);
768
769 /*
770 * The ordering protocol on this is that the "lock" pointer
771 * may only be set non-NULL if the "want" ticket is correct.
772 * If we're updating "want", we must first clear "lock".
773 */
774 w->lock = NULL;
775 smp_wmb();
776 w->want = want;
777 smp_wmb();
778 w->lock = lock;
779
780 add_stats(TAKEN_SLOW, 1);
781
782 /*
783 * This uses set_bit, which is atomic but we should not rely on its
784 * reordering gurantees. So barrier is needed after this call.
785 */
786 cpumask_set_cpu(cpu, &waiting_cpus);
787
788 barrier();
789
790 /*
791 * Mark entry to slowpath before doing the pickup test to make
792 * sure we don't deadlock with an unlocker.
793 */
794 __ticket_enter_slowpath(lock);
795
796 /* make sure enter_slowpath, which is atomic does not cross the read */
797 smp_mb__after_atomic();
798
799 /*
800 * check again make sure it didn't become free while
801 * we weren't looking.
802 */
803 head = READ_ONCE(lock->tickets.head);
804 if (__tickets_equal(head, want)) {
805 add_stats(TAKEN_SLOW_PICKUP, 1);
806 goto out;
807 }
808
809 /*
810 * halt until it's our turn and kicked. Note that we do safe halt
811 * for irq enabled case to avoid hang when lock info is overwritten
812 * in irq spinlock slowpath and no spurious interrupt occur to save us.
813 */
814 if (arch_irqs_disabled_flags(flags))
815 halt();
816 else
817 safe_halt();
818
819out:
820 cpumask_clear_cpu(cpu, &waiting_cpus);
821 w->lock = NULL;
822 local_irq_restore(flags);
823 spin_time_accum_blocked(start);
824}
825PV_CALLEE_SAVE_REGS_THUNK(kvm_lock_spinning);
826
827/* Kick vcpu waiting on @lock->head to reach value @ticket */
828static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
829{
830 int cpu;
831
832 add_stats(RELEASED_SLOW, 1);
833 for_each_cpu(cpu, &waiting_cpus) {
834 const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu);
835 if (READ_ONCE(w->lock) == lock &&
836 READ_ONCE(w->want) == ticket) {
837 add_stats(RELEASED_SLOW_KICKED, 1);
838 kvm_kick_cpu(cpu);
839 break;
840 }
841 }
842}
843
844#endif /* !CONFIG_QUEUED_SPINLOCKS */
845
846/* 606/*
847 * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present. 607 * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
848 */ 608 */
@@ -854,16 +614,11 @@ void __init kvm_spinlock_init(void)
854 if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) 614 if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
855 return; 615 return;
856 616
857#ifdef CONFIG_QUEUED_SPINLOCKS
858 __pv_init_lock_hash(); 617 __pv_init_lock_hash();
859 pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; 618 pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
860 pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); 619 pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
861 pv_lock_ops.wait = kvm_wait; 620 pv_lock_ops.wait = kvm_wait;
862 pv_lock_ops.kick = kvm_kick_cpu; 621 pv_lock_ops.kick = kvm_kick_cpu;
863#else /* !CONFIG_QUEUED_SPINLOCKS */
864 pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
865 pv_lock_ops.unlock_kick = kvm_unlock_kick;
866#endif
867} 622}
868 623
869static __init int kvm_spinlock_init_jump(void) 624static __init int kvm_spinlock_init_jump(void)
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 1939a0269377..2c55a003b793 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -8,7 +8,6 @@
8 8
9#include <asm/paravirt.h> 9#include <asm/paravirt.h>
10 10
11#ifdef CONFIG_QUEUED_SPINLOCKS
12__visible void __native_queued_spin_unlock(struct qspinlock *lock) 11__visible void __native_queued_spin_unlock(struct qspinlock *lock)
13{ 12{
14 native_queued_spin_unlock(lock); 13 native_queued_spin_unlock(lock);
@@ -21,19 +20,13 @@ bool pv_is_native_spin_unlock(void)
21 return pv_lock_ops.queued_spin_unlock.func == 20 return pv_lock_ops.queued_spin_unlock.func ==
22 __raw_callee_save___native_queued_spin_unlock; 21 __raw_callee_save___native_queued_spin_unlock;
23} 22}
24#endif
25 23
26struct pv_lock_ops pv_lock_ops = { 24struct pv_lock_ops pv_lock_ops = {
27#ifdef CONFIG_SMP 25#ifdef CONFIG_SMP
28#ifdef CONFIG_QUEUED_SPINLOCKS
29 .queued_spin_lock_slowpath = native_queued_spin_lock_slowpath, 26 .queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
30 .queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock), 27 .queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock),
31 .wait = paravirt_nop, 28 .wait = paravirt_nop,
32 .kick = paravirt_nop, 29 .kick = paravirt_nop,
33#else /* !CONFIG_QUEUED_SPINLOCKS */
34 .lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop),
35 .unlock_kick = paravirt_nop,
36#endif /* !CONFIG_QUEUED_SPINLOCKS */
37#endif /* SMP */ 30#endif /* SMP */
38}; 31};
39EXPORT_SYMBOL(pv_lock_ops); 32EXPORT_SYMBOL(pv_lock_ops);
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 158dc0650d5d..920c6ae08592 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -10,7 +10,7 @@ DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
10DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); 10DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
11DEF_NATIVE(pv_cpu_ops, clts, "clts"); 11DEF_NATIVE(pv_cpu_ops, clts, "clts");
12 12
13#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS) 13#if defined(CONFIG_PARAVIRT_SPINLOCKS)
14DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)"); 14DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
15#endif 15#endif
16 16
@@ -49,7 +49,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
49 PATCH_SITE(pv_mmu_ops, read_cr3); 49 PATCH_SITE(pv_mmu_ops, read_cr3);
50 PATCH_SITE(pv_mmu_ops, write_cr3); 50 PATCH_SITE(pv_mmu_ops, write_cr3);
51 PATCH_SITE(pv_cpu_ops, clts); 51 PATCH_SITE(pv_cpu_ops, clts);
52#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS) 52#if defined(CONFIG_PARAVIRT_SPINLOCKS)
53 case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): 53 case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
54 if (pv_is_native_spin_unlock()) { 54 if (pv_is_native_spin_unlock()) {
55 start = start_pv_lock_ops_queued_spin_unlock; 55 start = start_pv_lock_ops_queued_spin_unlock;
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index e70087a04cc8..bb3840cedb4f 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -19,7 +19,7 @@ DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
19DEF_NATIVE(, mov32, "mov %edi, %eax"); 19DEF_NATIVE(, mov32, "mov %edi, %eax");
20DEF_NATIVE(, mov64, "mov %rdi, %rax"); 20DEF_NATIVE(, mov64, "mov %rdi, %rax");
21 21
22#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS) 22#if defined(CONFIG_PARAVIRT_SPINLOCKS)
23DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)"); 23DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
24#endif 24#endif
25 25
@@ -61,7 +61,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
61 PATCH_SITE(pv_cpu_ops, clts); 61 PATCH_SITE(pv_cpu_ops, clts);
62 PATCH_SITE(pv_mmu_ops, flush_tlb_single); 62 PATCH_SITE(pv_mmu_ops, flush_tlb_single);
63 PATCH_SITE(pv_cpu_ops, wbinvd); 63 PATCH_SITE(pv_cpu_ops, wbinvd);
64#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS) 64#if defined(CONFIG_PARAVIRT_SPINLOCKS)
65 case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): 65 case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
66 if (pv_is_native_spin_unlock()) { 66 if (pv_is_native_spin_unlock()) {
67 start = start_pv_lock_ops_queued_spin_unlock; 67 start = start_pv_lock_ops_queued_spin_unlock;
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index f42e78de1e10..3d6e0064cbfc 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -21,8 +21,6 @@ static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
21static DEFINE_PER_CPU(char *, irq_name); 21static DEFINE_PER_CPU(char *, irq_name);
22static bool xen_pvspin = true; 22static bool xen_pvspin = true;
23 23
24#ifdef CONFIG_QUEUED_SPINLOCKS
25
26#include <asm/qspinlock.h> 24#include <asm/qspinlock.h>
27 25
28static void xen_qlock_kick(int cpu) 26static void xen_qlock_kick(int cpu)
@@ -71,207 +69,6 @@ static void xen_qlock_wait(u8 *byte, u8 val)
71 xen_poll_irq(irq); 69 xen_poll_irq(irq);
72} 70}
73 71
74#else /* CONFIG_QUEUED_SPINLOCKS */
75
76enum xen_contention_stat {
77 TAKEN_SLOW,
78 TAKEN_SLOW_PICKUP,
79 TAKEN_SLOW_SPURIOUS,
80 RELEASED_SLOW,
81 RELEASED_SLOW_KICKED,
82 NR_CONTENTION_STATS
83};
84
85
86#ifdef CONFIG_XEN_DEBUG_FS
87#define HISTO_BUCKETS 30
88static struct xen_spinlock_stats
89{
90 u32 contention_stats[NR_CONTENTION_STATS];
91 u32 histo_spin_blocked[HISTO_BUCKETS+1];
92 u64 time_blocked;
93} spinlock_stats;
94
95static u8 zero_stats;
96
97static inline void check_zero(void)
98{
99 u8 ret;
100 u8 old = READ_ONCE(zero_stats);
101 if (unlikely(old)) {
102 ret = cmpxchg(&zero_stats, old, 0);
103 /* This ensures only one fellow resets the stat */
104 if (ret == old)
105 memset(&spinlock_stats, 0, sizeof(spinlock_stats));
106 }
107}
108
109static inline void add_stats(enum xen_contention_stat var, u32 val)
110{
111 check_zero();
112 spinlock_stats.contention_stats[var] += val;
113}
114
115static inline u64 spin_time_start(void)
116{
117 return xen_clocksource_read();
118}
119
120static void __spin_time_accum(u64 delta, u32 *array)
121{
122 unsigned index = ilog2(delta);
123
124 check_zero();
125
126 if (index < HISTO_BUCKETS)
127 array[index]++;
128 else
129 array[HISTO_BUCKETS]++;
130}
131
132static inline void spin_time_accum_blocked(u64 start)
133{
134 u32 delta = xen_clocksource_read() - start;
135
136 __spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
137 spinlock_stats.time_blocked += delta;
138}
139#else /* !CONFIG_XEN_DEBUG_FS */
140static inline void add_stats(enum xen_contention_stat var, u32 val)
141{
142}
143
144static inline u64 spin_time_start(void)
145{
146 return 0;
147}
148
149static inline void spin_time_accum_blocked(u64 start)
150{
151}
152#endif /* CONFIG_XEN_DEBUG_FS */
153
154struct xen_lock_waiting {
155 struct arch_spinlock *lock;
156 __ticket_t want;
157};
158
159static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
160static cpumask_t waiting_cpus;
161
162__visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
163{
164 int irq = __this_cpu_read(lock_kicker_irq);
165 struct xen_lock_waiting *w = this_cpu_ptr(&lock_waiting);
166 int cpu = smp_processor_id();
167 u64 start;
168 __ticket_t head;
169 unsigned long flags;
170
171 /* If kicker interrupts not initialized yet, just spin */
172 if (irq == -1)
173 return;
174
175 start = spin_time_start();
176
177 /*
178 * Make sure an interrupt handler can't upset things in a
179 * partially setup state.
180 */
181 local_irq_save(flags);
182 /*
183 * We don't really care if we're overwriting some other
184 * (lock,want) pair, as that would mean that we're currently
185 * in an interrupt context, and the outer context had
186 * interrupts enabled. That has already kicked the VCPU out
187 * of xen_poll_irq(), so it will just return spuriously and
188 * retry with newly setup (lock,want).
189 *
190 * The ordering protocol on this is that the "lock" pointer
191 * may only be set non-NULL if the "want" ticket is correct.
192 * If we're updating "want", we must first clear "lock".
193 */
194 w->lock = NULL;
195 smp_wmb();
196 w->want = want;
197 smp_wmb();
198 w->lock = lock;
199
200 /* This uses set_bit, which atomic and therefore a barrier */
201 cpumask_set_cpu(cpu, &waiting_cpus);
202 add_stats(TAKEN_SLOW, 1);
203
204 /* clear pending */
205 xen_clear_irq_pending(irq);
206
207 /* Only check lock once pending cleared */
208 barrier();
209
210 /*
211 * Mark entry to slowpath before doing the pickup test to make
212 * sure we don't deadlock with an unlocker.
213 */
214 __ticket_enter_slowpath(lock);
215
216 /* make sure enter_slowpath, which is atomic does not cross the read */
217 smp_mb__after_atomic();
218
219 /*
220 * check again make sure it didn't become free while
221 * we weren't looking
222 */
223 head = READ_ONCE(lock->tickets.head);
224 if (__tickets_equal(head, want)) {
225 add_stats(TAKEN_SLOW_PICKUP, 1);
226 goto out;
227 }
228
229 /* Allow interrupts while blocked */
230 local_irq_restore(flags);
231
232 /*
233 * If an interrupt happens here, it will leave the wakeup irq
234 * pending, which will cause xen_poll_irq() to return
235 * immediately.
236 */
237
238 /* Block until irq becomes pending (or perhaps a spurious wakeup) */
239 xen_poll_irq(irq);
240 add_stats(TAKEN_SLOW_SPURIOUS, !xen_test_irq_pending(irq));
241
242 local_irq_save(flags);
243
244 kstat_incr_irq_this_cpu(irq);
245out:
246 cpumask_clear_cpu(cpu, &waiting_cpus);
247 w->lock = NULL;
248
249 local_irq_restore(flags);
250
251 spin_time_accum_blocked(start);
252}
253PV_CALLEE_SAVE_REGS_THUNK(xen_lock_spinning);
254
255static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
256{
257 int cpu;
258
259 add_stats(RELEASED_SLOW, 1);
260
261 for_each_cpu(cpu, &waiting_cpus) {
262 const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu);
263
264 /* Make sure we read lock before want */
265 if (READ_ONCE(w->lock) == lock &&
266 READ_ONCE(w->want) == next) {
267 add_stats(RELEASED_SLOW_KICKED, 1);
268 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
269 break;
270 }
271 }
272}
273#endif /* CONFIG_QUEUED_SPINLOCKS */
274
275static irqreturn_t dummy_handler(int irq, void *dev_id) 72static irqreturn_t dummy_handler(int irq, void *dev_id)
276{ 73{
277 BUG(); 74 BUG();
@@ -334,16 +131,12 @@ void __init xen_init_spinlocks(void)
334 return; 131 return;
335 } 132 }
336 printk(KERN_DEBUG "xen: PV spinlocks enabled\n"); 133 printk(KERN_DEBUG "xen: PV spinlocks enabled\n");
337#ifdef CONFIG_QUEUED_SPINLOCKS 134
338 __pv_init_lock_hash(); 135 __pv_init_lock_hash();
339 pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; 136 pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
340 pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); 137 pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
341 pv_lock_ops.wait = xen_qlock_wait; 138 pv_lock_ops.wait = xen_qlock_wait;
342 pv_lock_ops.kick = xen_qlock_kick; 139 pv_lock_ops.kick = xen_qlock_kick;
343#else
344 pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
345 pv_lock_ops.unlock_kick = xen_unlock_kick;
346#endif
347} 140}
348 141
349/* 142/*
@@ -372,44 +165,3 @@ static __init int xen_parse_nopvspin(char *arg)
372} 165}
373early_param("xen_nopvspin", xen_parse_nopvspin); 166early_param("xen_nopvspin", xen_parse_nopvspin);
374 167
375#if defined(CONFIG_XEN_DEBUG_FS) && !defined(CONFIG_QUEUED_SPINLOCKS)
376
377static struct dentry *d_spin_debug;
378
379static int __init xen_spinlock_debugfs(void)
380{
381 struct dentry *d_xen = xen_init_debugfs();
382
383 if (d_xen == NULL)
384 return -ENOMEM;
385
386 if (!xen_pvspin)
387 return 0;
388
389 d_spin_debug = debugfs_create_dir("spinlocks", d_xen);
390
391 debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
392
393 debugfs_create_u32("taken_slow", 0444, d_spin_debug,
394 &spinlock_stats.contention_stats[TAKEN_SLOW]);
395 debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
396 &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
397 debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug,
398 &spinlock_stats.contention_stats[TAKEN_SLOW_SPURIOUS]);
399
400 debugfs_create_u32("released_slow", 0444, d_spin_debug,
401 &spinlock_stats.contention_stats[RELEASED_SLOW]);
402 debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
403 &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
404
405 debugfs_create_u64("time_blocked", 0444, d_spin_debug,
406 &spinlock_stats.time_blocked);
407
408 debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
409 spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
410
411 return 0;
412}
413fs_initcall(xen_spinlock_debugfs);
414
415#endif /* CONFIG_XEN_DEBUG_FS */