aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2013-09-09 12:01:15 -0400
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2013-09-09 12:01:15 -0400
commitc3f31f6a6f68bcb51689c90733282ec263602a9d (patch)
tree07c2c7ae966b07d5adabe78215d9c76fa4ec531a /arch/x86
parente1a9c16b303725ac900fee2a3ec4dbe2c2f846ab (diff)
parent36bd621337c91a1ecda588e5bbbae8dd9698bae7 (diff)
Merge branch 'x86/spinlocks' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into stable/for-linus-3.12
* 'x86/spinlocks' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/kvm/guest: Fix sparse warning: "symbol 'klock_waiting' was not declared as static" kvm: Paravirtual ticketlocks support for linux guests running on KVM hypervisor kvm guest: Add configuration support to enable debug information for KVM Guests kvm uapi: Add KICK_CPU and PV_UNHALT definition to uapi xen, pvticketlock: Allow interrupts to be enabled while blocking x86, ticketlock: Add slowpath logic jump_label: Split jumplabel ratelimit x86, pvticketlock: When paravirtualizing ticket locks, increment by 2 x86, pvticketlock: Use callee-save for lock_spinning xen, pvticketlocks: Add xen_nopvspin parameter to disable xen pv ticketlocks xen, pvticketlock: Xen implementation for PV ticket locks xen: Defer spinlock setup until boot CPU setup x86, ticketlock: Collapse a layer of functions x86, ticketlock: Don't inline _spin_unlock when using paravirt spinlocks x86, spinlock: Replace pv spinlocks with pv ticketlocks
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig10
-rw-r--r--arch/x86/include/asm/kvm_para.h14
-rw-r--r--arch/x86/include/asm/paravirt.h32
-rw-r--r--arch/x86/include/asm/paravirt_types.h14
-rw-r--r--arch/x86/include/asm/spinlock.h128
-rw-r--r--arch/x86/include/asm/spinlock_types.h16
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h1
-rw-r--r--arch/x86/kernel/kvm.c262
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c18
-rw-r--r--arch/x86/xen/smp.c2
-rw-r--r--arch/x86/xen/spinlock.c387
11 files changed, 531 insertions, 353 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b32ebf92b0ce..b1fb846e6dac 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -632,6 +632,7 @@ config PARAVIRT_DEBUG
632config PARAVIRT_SPINLOCKS 632config PARAVIRT_SPINLOCKS
633 bool "Paravirtualization layer for spinlocks" 633 bool "Paravirtualization layer for spinlocks"
634 depends on PARAVIRT && SMP 634 depends on PARAVIRT && SMP
635 select UNINLINE_SPIN_UNLOCK
635 ---help--- 636 ---help---
636 Paravirtualized spinlocks allow a pvops backend to replace the 637 Paravirtualized spinlocks allow a pvops backend to replace the
637 spinlock implementation with something virtualization-friendly 638 spinlock implementation with something virtualization-friendly
@@ -656,6 +657,15 @@ config KVM_GUEST
656 underlying device model, the host provides the guest with 657 underlying device model, the host provides the guest with
657 timing infrastructure such as time of day, and system time 658 timing infrastructure such as time of day, and system time
658 659
660config KVM_DEBUG_FS
661 bool "Enable debug information for KVM Guests in debugfs"
662 depends on KVM_GUEST && DEBUG_FS
663 default n
664 ---help---
665 This option enables collection of various statistics for KVM guest.
666 Statistics are displayed in debugfs filesystem. Enabling this option
667 may incur significant overhead.
668
659source "arch/x86/lguest/Kconfig" 669source "arch/x86/lguest/Kconfig"
660 670
661config PARAVIRT_TIME_ACCOUNTING 671config PARAVIRT_TIME_ACCOUNTING
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 695399f2d5eb..427afcbf3d55 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -118,10 +118,20 @@ void kvm_async_pf_task_wait(u32 token);
118void kvm_async_pf_task_wake(u32 token); 118void kvm_async_pf_task_wake(u32 token);
119u32 kvm_read_and_reset_pf_reason(void); 119u32 kvm_read_and_reset_pf_reason(void);
120extern void kvm_disable_steal_time(void); 120extern void kvm_disable_steal_time(void);
121#else 121
122#define kvm_guest_init() do { } while (0) 122#ifdef CONFIG_PARAVIRT_SPINLOCKS
123void __init kvm_spinlock_init(void);
124#else /* !CONFIG_PARAVIRT_SPINLOCKS */
125static inline void kvm_spinlock_init(void)
126{
127}
128#endif /* CONFIG_PARAVIRT_SPINLOCKS */
129
130#else /* CONFIG_KVM_GUEST */
131#define kvm_guest_init() do {} while (0)
123#define kvm_async_pf_task_wait(T) do {} while(0) 132#define kvm_async_pf_task_wait(T) do {} while(0)
124#define kvm_async_pf_task_wake(T) do {} while(0) 133#define kvm_async_pf_task_wake(T) do {} while(0)
134
125static inline u32 kvm_read_and_reset_pf_reason(void) 135static inline u32 kvm_read_and_reset_pf_reason(void)
126{ 136{
127 return 0; 137 return 0;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index cfdc9ee4c900..401f350ef71b 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -712,36 +712,16 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
712 712
713#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) 713#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
714 714
715static inline int arch_spin_is_locked(struct arch_spinlock *lock) 715static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
716 __ticket_t ticket)
716{ 717{
717 return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock); 718 PVOP_VCALLEE2(pv_lock_ops.lock_spinning, lock, ticket);
718} 719}
719 720
720static inline int arch_spin_is_contended(struct arch_spinlock *lock) 721static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
722 __ticket_t ticket)
721{ 723{
722 return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); 724 PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket);
723}
724#define arch_spin_is_contended arch_spin_is_contended
725
726static __always_inline void arch_spin_lock(struct arch_spinlock *lock)
727{
728 PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
729}
730
731static __always_inline void arch_spin_lock_flags(struct arch_spinlock *lock,
732 unsigned long flags)
733{
734 PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
735}
736
737static __always_inline int arch_spin_trylock(struct arch_spinlock *lock)
738{
739 return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
740}
741
742static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
743{
744 PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
745} 725}
746 726
747#endif 727#endif
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 0db1fcac668c..04ac40e192eb 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -327,13 +327,15 @@ struct pv_mmu_ops {
327}; 327};
328 328
329struct arch_spinlock; 329struct arch_spinlock;
330#ifdef CONFIG_SMP
331#include <asm/spinlock_types.h>
332#else
333typedef u16 __ticket_t;
334#endif
335
330struct pv_lock_ops { 336struct pv_lock_ops {
331 int (*spin_is_locked)(struct arch_spinlock *lock); 337 struct paravirt_callee_save lock_spinning;
332 int (*spin_is_contended)(struct arch_spinlock *lock); 338 void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket);
333 void (*spin_lock)(struct arch_spinlock *lock);
334 void (*spin_lock_flags)(struct arch_spinlock *lock, unsigned long flags);
335 int (*spin_trylock)(struct arch_spinlock *lock);
336 void (*spin_unlock)(struct arch_spinlock *lock);
337}; 339};
338 340
339/* This contains all the paravirt structures: we get a convenient 341/* This contains all the paravirt structures: we get a convenient
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 33692eaabab5..d68883dd133c 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -1,11 +1,14 @@
1#ifndef _ASM_X86_SPINLOCK_H 1#ifndef _ASM_X86_SPINLOCK_H
2#define _ASM_X86_SPINLOCK_H 2#define _ASM_X86_SPINLOCK_H
3 3
4#include <linux/jump_label.h>
4#include <linux/atomic.h> 5#include <linux/atomic.h>
5#include <asm/page.h> 6#include <asm/page.h>
6#include <asm/processor.h> 7#include <asm/processor.h>
7#include <linux/compiler.h> 8#include <linux/compiler.h>
8#include <asm/paravirt.h> 9#include <asm/paravirt.h>
10#include <asm/bitops.h>
11
9/* 12/*
10 * Your basic SMP spinlocks, allowing only a single CPU anywhere 13 * Your basic SMP spinlocks, allowing only a single CPU anywhere
11 * 14 *
@@ -34,6 +37,31 @@
34# define UNLOCK_LOCK_PREFIX 37# define UNLOCK_LOCK_PREFIX
35#endif 38#endif
36 39
40/* How long a lock should spin before we consider blocking */
41#define SPIN_THRESHOLD (1 << 15)
42
43extern struct static_key paravirt_ticketlocks_enabled;
44static __always_inline bool static_key_false(struct static_key *key);
45
46#ifdef CONFIG_PARAVIRT_SPINLOCKS
47
48static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
49{
50 set_bit(0, (volatile unsigned long *)&lock->tickets.tail);
51}
52
53#else /* !CONFIG_PARAVIRT_SPINLOCKS */
54static __always_inline void __ticket_lock_spinning(arch_spinlock_t *lock,
55 __ticket_t ticket)
56{
57}
58static inline void __ticket_unlock_kick(arch_spinlock_t *lock,
59 __ticket_t ticket)
60{
61}
62
63#endif /* CONFIG_PARAVIRT_SPINLOCKS */
64
37/* 65/*
38 * Ticket locks are conceptually two parts, one indicating the current head of 66 * Ticket locks are conceptually two parts, one indicating the current head of
39 * the queue, and the other indicating the current tail. The lock is acquired 67 * the queue, and the other indicating the current tail. The lock is acquired
@@ -47,81 +75,101 @@
47 * in the high part, because a wide xadd increment of the low part would carry 75 * in the high part, because a wide xadd increment of the low part would carry
48 * up and contaminate the high part. 76 * up and contaminate the high part.
49 */ 77 */
50static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) 78static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
51{ 79{
52 register struct __raw_tickets inc = { .tail = 1 }; 80 register struct __raw_tickets inc = { .tail = TICKET_LOCK_INC };
53 81
54 inc = xadd(&lock->tickets, inc); 82 inc = xadd(&lock->tickets, inc);
83 if (likely(inc.head == inc.tail))
84 goto out;
55 85
86 inc.tail &= ~TICKET_SLOWPATH_FLAG;
56 for (;;) { 87 for (;;) {
57 if (inc.head == inc.tail) 88 unsigned count = SPIN_THRESHOLD;
58 break; 89
59 cpu_relax(); 90 do {
60 inc.head = ACCESS_ONCE(lock->tickets.head); 91 if (ACCESS_ONCE(lock->tickets.head) == inc.tail)
92 goto out;
93 cpu_relax();
94 } while (--count);
95 __ticket_lock_spinning(lock, inc.tail);
61 } 96 }
62 barrier(); /* make sure nothing creeps before the lock is taken */ 97out: barrier(); /* make sure nothing creeps before the lock is taken */
63} 98}
64 99
65static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) 100static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
66{ 101{
67 arch_spinlock_t old, new; 102 arch_spinlock_t old, new;
68 103
69 old.tickets = ACCESS_ONCE(lock->tickets); 104 old.tickets = ACCESS_ONCE(lock->tickets);
70 if (old.tickets.head != old.tickets.tail) 105 if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG))
71 return 0; 106 return 0;
72 107
73 new.head_tail = old.head_tail + (1 << TICKET_SHIFT); 108 new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT);
74 109
75 /* cmpxchg is a full barrier, so nothing can move before it */ 110 /* cmpxchg is a full barrier, so nothing can move before it */
76 return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; 111 return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
77} 112}
78 113
79static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) 114static inline void __ticket_unlock_slowpath(arch_spinlock_t *lock,
115 arch_spinlock_t old)
80{ 116{
81 __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX); 117 arch_spinlock_t new;
118
119 BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
120
121 /* Perform the unlock on the "before" copy */
122 old.tickets.head += TICKET_LOCK_INC;
123
124 /* Clear the slowpath flag */
125 new.head_tail = old.head_tail & ~(TICKET_SLOWPATH_FLAG << TICKET_SHIFT);
126
127 /*
128 * If the lock is uncontended, clear the flag - use cmpxchg in
129 * case it changes behind our back though.
130 */
131 if (new.tickets.head != new.tickets.tail ||
132 cmpxchg(&lock->head_tail, old.head_tail,
133 new.head_tail) != old.head_tail) {
134 /*
135 * Lock still has someone queued for it, so wake up an
136 * appropriate waiter.
137 */
138 __ticket_unlock_kick(lock, old.tickets.head);
139 }
82} 140}
83 141
84static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) 142static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
85{ 143{
86 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); 144 if (TICKET_SLOWPATH_FLAG &&
145 static_key_false(&paravirt_ticketlocks_enabled)) {
146 arch_spinlock_t prev;
87 147
88 return tmp.tail != tmp.head; 148 prev = *lock;
89} 149 add_smp(&lock->tickets.head, TICKET_LOCK_INC);
90 150
91static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) 151 /* add_smp() is a full mb() */
92{
93 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
94 152
95 return (__ticket_t)(tmp.tail - tmp.head) > 1; 153 if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG))
154 __ticket_unlock_slowpath(lock, prev);
155 } else
156 __add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX);
96} 157}
97 158
98#ifndef CONFIG_PARAVIRT_SPINLOCKS
99
100static inline int arch_spin_is_locked(arch_spinlock_t *lock) 159static inline int arch_spin_is_locked(arch_spinlock_t *lock)
101{ 160{
102 return __ticket_spin_is_locked(lock); 161 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
103}
104
105static inline int arch_spin_is_contended(arch_spinlock_t *lock)
106{
107 return __ticket_spin_is_contended(lock);
108}
109#define arch_spin_is_contended arch_spin_is_contended
110 162
111static __always_inline void arch_spin_lock(arch_spinlock_t *lock) 163 return tmp.tail != tmp.head;
112{
113 __ticket_spin_lock(lock);
114} 164}
115 165
116static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) 166static inline int arch_spin_is_contended(arch_spinlock_t *lock)
117{ 167{
118 return __ticket_spin_trylock(lock); 168 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
119}
120 169
121static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) 170 return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
122{
123 __ticket_spin_unlock(lock);
124} 171}
172#define arch_spin_is_contended arch_spin_is_contended
125 173
126static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, 174static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
127 unsigned long flags) 175 unsigned long flags)
@@ -129,8 +177,6 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
129 arch_spin_lock(lock); 177 arch_spin_lock(lock);
130} 178}
131 179
132#endif /* CONFIG_PARAVIRT_SPINLOCKS */
133
134static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) 180static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
135{ 181{
136 while (arch_spin_is_locked(lock)) 182 while (arch_spin_is_locked(lock))
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index ad0ad07fc006..4f1bea19945b 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -1,13 +1,17 @@
1#ifndef _ASM_X86_SPINLOCK_TYPES_H 1#ifndef _ASM_X86_SPINLOCK_TYPES_H
2#define _ASM_X86_SPINLOCK_TYPES_H 2#define _ASM_X86_SPINLOCK_TYPES_H
3 3
4#ifndef __LINUX_SPINLOCK_TYPES_H
5# error "please don't include this file directly"
6#endif
7
8#include <linux/types.h> 4#include <linux/types.h>
9 5
10#if (CONFIG_NR_CPUS < 256) 6#ifdef CONFIG_PARAVIRT_SPINLOCKS
7#define __TICKET_LOCK_INC 2
8#define TICKET_SLOWPATH_FLAG ((__ticket_t)1)
9#else
10#define __TICKET_LOCK_INC 1
11#define TICKET_SLOWPATH_FLAG ((__ticket_t)0)
12#endif
13
14#if (CONFIG_NR_CPUS < (256 / __TICKET_LOCK_INC))
11typedef u8 __ticket_t; 15typedef u8 __ticket_t;
12typedef u16 __ticketpair_t; 16typedef u16 __ticketpair_t;
13#else 17#else
@@ -15,6 +19,8 @@ typedef u16 __ticket_t;
15typedef u32 __ticketpair_t; 19typedef u32 __ticketpair_t;
16#endif 20#endif
17 21
22#define TICKET_LOCK_INC ((__ticket_t)__TICKET_LOCK_INC)
23
18#define TICKET_SHIFT (sizeof(__ticket_t) * 8) 24#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
19 25
20typedef struct arch_spinlock { 26typedef struct arch_spinlock {
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 06fdbd987e97..94dc8ca434e0 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -23,6 +23,7 @@
23#define KVM_FEATURE_ASYNC_PF 4 23#define KVM_FEATURE_ASYNC_PF 4
24#define KVM_FEATURE_STEAL_TIME 5 24#define KVM_FEATURE_STEAL_TIME 5
25#define KVM_FEATURE_PV_EOI 6 25#define KVM_FEATURE_PV_EOI 6
26#define KVM_FEATURE_PV_UNHALT 7
26 27
27/* The last 8 bits are used to indicate how to interpret the flags field 28/* The last 8 bits are used to indicate how to interpret the flags field
28 * in pvclock structure. If no bits are set, all flags are ignored. 29 * in pvclock structure. If no bits are set, all flags are ignored.
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index a96d32cc55b8..56e2fa4a8b13 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -34,6 +34,7 @@
34#include <linux/sched.h> 34#include <linux/sched.h>
35#include <linux/slab.h> 35#include <linux/slab.h>
36#include <linux/kprobes.h> 36#include <linux/kprobes.h>
37#include <linux/debugfs.h>
37#include <asm/timer.h> 38#include <asm/timer.h>
38#include <asm/cpu.h> 39#include <asm/cpu.h>
39#include <asm/traps.h> 40#include <asm/traps.h>
@@ -419,6 +420,7 @@ static void __init kvm_smp_prepare_boot_cpu(void)
419 WARN_ON(kvm_register_clock("primary cpu clock")); 420 WARN_ON(kvm_register_clock("primary cpu clock"));
420 kvm_guest_cpu_init(); 421 kvm_guest_cpu_init();
421 native_smp_prepare_boot_cpu(); 422 native_smp_prepare_boot_cpu();
423 kvm_spinlock_init();
422} 424}
423 425
424static void kvm_guest_cpu_online(void *dummy) 426static void kvm_guest_cpu_online(void *dummy)
@@ -523,3 +525,263 @@ static __init int activate_jump_labels(void)
523 return 0; 525 return 0;
524} 526}
525arch_initcall(activate_jump_labels); 527arch_initcall(activate_jump_labels);
528
529#ifdef CONFIG_PARAVIRT_SPINLOCKS
530
531/* Kick a cpu by its apicid. Used to wake up a halted vcpu */
532static void kvm_kick_cpu(int cpu)
533{
534 int apicid;
535 unsigned long flags = 0;
536
537 apicid = per_cpu(x86_cpu_to_apicid, cpu);
538 kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
539}
540
541enum kvm_contention_stat {
542 TAKEN_SLOW,
543 TAKEN_SLOW_PICKUP,
544 RELEASED_SLOW,
545 RELEASED_SLOW_KICKED,
546 NR_CONTENTION_STATS
547};
548
549#ifdef CONFIG_KVM_DEBUG_FS
550#define HISTO_BUCKETS 30
551
552static struct kvm_spinlock_stats
553{
554 u32 contention_stats[NR_CONTENTION_STATS];
555 u32 histo_spin_blocked[HISTO_BUCKETS+1];
556 u64 time_blocked;
557} spinlock_stats;
558
559static u8 zero_stats;
560
561static inline void check_zero(void)
562{
563 u8 ret;
564 u8 old;
565
566 old = ACCESS_ONCE(zero_stats);
567 if (unlikely(old)) {
568 ret = cmpxchg(&zero_stats, old, 0);
569 /* This ensures only one fellow resets the stat */
570 if (ret == old)
571 memset(&spinlock_stats, 0, sizeof(spinlock_stats));
572 }
573}
574
575static inline void add_stats(enum kvm_contention_stat var, u32 val)
576{
577 check_zero();
578 spinlock_stats.contention_stats[var] += val;
579}
580
581
582static inline u64 spin_time_start(void)
583{
584 return sched_clock();
585}
586
587static void __spin_time_accum(u64 delta, u32 *array)
588{
589 unsigned index;
590
591 index = ilog2(delta);
592 check_zero();
593
594 if (index < HISTO_BUCKETS)
595 array[index]++;
596 else
597 array[HISTO_BUCKETS]++;
598}
599
600static inline void spin_time_accum_blocked(u64 start)
601{
602 u32 delta;
603
604 delta = sched_clock() - start;
605 __spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
606 spinlock_stats.time_blocked += delta;
607}
608
609static struct dentry *d_spin_debug;
610static struct dentry *d_kvm_debug;
611
612struct dentry *kvm_init_debugfs(void)
613{
614 d_kvm_debug = debugfs_create_dir("kvm", NULL);
615 if (!d_kvm_debug)
616 printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n");
617
618 return d_kvm_debug;
619}
620
621static int __init kvm_spinlock_debugfs(void)
622{
623 struct dentry *d_kvm;
624
625 d_kvm = kvm_init_debugfs();
626 if (d_kvm == NULL)
627 return -ENOMEM;
628
629 d_spin_debug = debugfs_create_dir("spinlocks", d_kvm);
630
631 debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
632
633 debugfs_create_u32("taken_slow", 0444, d_spin_debug,
634 &spinlock_stats.contention_stats[TAKEN_SLOW]);
635 debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
636 &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
637
638 debugfs_create_u32("released_slow", 0444, d_spin_debug,
639 &spinlock_stats.contention_stats[RELEASED_SLOW]);
640 debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
641 &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
642
643 debugfs_create_u64("time_blocked", 0444, d_spin_debug,
644 &spinlock_stats.time_blocked);
645
646 debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
647 spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
648
649 return 0;
650}
651fs_initcall(kvm_spinlock_debugfs);
652#else /* !CONFIG_KVM_DEBUG_FS */
653static inline void add_stats(enum kvm_contention_stat var, u32 val)
654{
655}
656
657static inline u64 spin_time_start(void)
658{
659 return 0;
660}
661
662static inline void spin_time_accum_blocked(u64 start)
663{
664}
665#endif /* CONFIG_KVM_DEBUG_FS */
666
667struct kvm_lock_waiting {
668 struct arch_spinlock *lock;
669 __ticket_t want;
670};
671
672/* cpus 'waiting' on a spinlock to become available */
673static cpumask_t waiting_cpus;
674
675/* Track spinlock on which a cpu is waiting */
676static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting);
677
678static void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
679{
680 struct kvm_lock_waiting *w;
681 int cpu;
682 u64 start;
683 unsigned long flags;
684
685 if (in_nmi())
686 return;
687
688 w = &__get_cpu_var(klock_waiting);
689 cpu = smp_processor_id();
690 start = spin_time_start();
691
692 /*
693 * Make sure an interrupt handler can't upset things in a
694 * partially setup state.
695 */
696 local_irq_save(flags);
697
698 /*
699 * The ordering protocol on this is that the "lock" pointer
700 * may only be set non-NULL if the "want" ticket is correct.
701 * If we're updating "want", we must first clear "lock".
702 */
703 w->lock = NULL;
704 smp_wmb();
705 w->want = want;
706 smp_wmb();
707 w->lock = lock;
708
709 add_stats(TAKEN_SLOW, 1);
710
711 /*
712 * This uses set_bit, which is atomic but we should not rely on its
713 * reordering gurantees. So barrier is needed after this call.
714 */
715 cpumask_set_cpu(cpu, &waiting_cpus);
716
717 barrier();
718
719 /*
720 * Mark entry to slowpath before doing the pickup test to make
721 * sure we don't deadlock with an unlocker.
722 */
723 __ticket_enter_slowpath(lock);
724
725 /*
726 * check again make sure it didn't become free while
727 * we weren't looking.
728 */
729 if (ACCESS_ONCE(lock->tickets.head) == want) {
730 add_stats(TAKEN_SLOW_PICKUP, 1);
731 goto out;
732 }
733
734 /*
735 * halt until it's our turn and kicked. Note that we do safe halt
736 * for irq enabled case to avoid hang when lock info is overwritten
737 * in irq spinlock slowpath and no spurious interrupt occur to save us.
738 */
739 if (arch_irqs_disabled_flags(flags))
740 halt();
741 else
742 safe_halt();
743
744out:
745 cpumask_clear_cpu(cpu, &waiting_cpus);
746 w->lock = NULL;
747 local_irq_restore(flags);
748 spin_time_accum_blocked(start);
749}
750PV_CALLEE_SAVE_REGS_THUNK(kvm_lock_spinning);
751
752/* Kick vcpu waiting on @lock->head to reach value @ticket */
753static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
754{
755 int cpu;
756
757 add_stats(RELEASED_SLOW, 1);
758 for_each_cpu(cpu, &waiting_cpus) {
759 const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu);
760 if (ACCESS_ONCE(w->lock) == lock &&
761 ACCESS_ONCE(w->want) == ticket) {
762 add_stats(RELEASED_SLOW_KICKED, 1);
763 kvm_kick_cpu(cpu);
764 break;
765 }
766 }
767}
768
769/*
770 * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
771 */
772void __init kvm_spinlock_init(void)
773{
774 if (!kvm_para_available())
775 return;
776 /* Does host kernel support KVM_FEATURE_PV_UNHALT? */
777 if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
778 return;
779
780 printk(KERN_INFO "KVM setup paravirtual spinlock\n");
781
782 static_key_slow_inc(&paravirt_ticketlocks_enabled);
783
784 pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
785 pv_lock_ops.unlock_kick = kvm_unlock_kick;
786}
787#endif /* CONFIG_PARAVIRT_SPINLOCKS */
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 676b8c77a976..bbb6c7316341 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -4,25 +4,17 @@
4 */ 4 */
5#include <linux/spinlock.h> 5#include <linux/spinlock.h>
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/jump_label.h>
7 8
8#include <asm/paravirt.h> 9#include <asm/paravirt.h>
9 10
10static inline void
11default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
12{
13 arch_spin_lock(lock);
14}
15
16struct pv_lock_ops pv_lock_ops = { 11struct pv_lock_ops pv_lock_ops = {
17#ifdef CONFIG_SMP 12#ifdef CONFIG_SMP
18 .spin_is_locked = __ticket_spin_is_locked, 13 .lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop),
19 .spin_is_contended = __ticket_spin_is_contended, 14 .unlock_kick = paravirt_nop,
20
21 .spin_lock = __ticket_spin_lock,
22 .spin_lock_flags = default_spin_lock_flags,
23 .spin_trylock = __ticket_spin_trylock,
24 .spin_unlock = __ticket_spin_unlock,
25#endif 15#endif
26}; 16};
27EXPORT_SYMBOL(pv_lock_ops); 17EXPORT_SYMBOL(pv_lock_ops);
28 18
19struct static_key paravirt_ticketlocks_enabled = STATIC_KEY_INIT_FALSE;
20EXPORT_SYMBOL(paravirt_ticketlocks_enabled);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 22759c6d309f..368c290929fe 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -279,6 +279,7 @@ static void __init xen_smp_prepare_boot_cpu(void)
279 279
280 xen_filter_cpu_maps(); 280 xen_filter_cpu_maps();
281 xen_setup_vcpu_info_placement(); 281 xen_setup_vcpu_info_placement();
282 xen_init_spinlocks();
282} 283}
283 284
284static void __init xen_smp_prepare_cpus(unsigned int max_cpus) 285static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
@@ -686,7 +687,6 @@ void __init xen_smp_init(void)
686{ 687{
687 smp_ops = xen_smp_ops; 688 smp_ops = xen_smp_ops;
688 xen_fill_possible_map(); 689 xen_fill_possible_map();
689 xen_init_spinlocks();
690} 690}
691 691
692static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) 692static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index cf3caee356b3..0438b9324a72 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -17,45 +17,44 @@
17#include "xen-ops.h" 17#include "xen-ops.h"
18#include "debugfs.h" 18#include "debugfs.h"
19 19
20#ifdef CONFIG_XEN_DEBUG_FS 20enum xen_contention_stat {
21static struct xen_spinlock_stats 21 TAKEN_SLOW,
22{ 22 TAKEN_SLOW_PICKUP,
23 u64 taken; 23 TAKEN_SLOW_SPURIOUS,
24 u32 taken_slow; 24 RELEASED_SLOW,
25 u32 taken_slow_nested; 25 RELEASED_SLOW_KICKED,
26 u32 taken_slow_pickup; 26 NR_CONTENTION_STATS
27 u32 taken_slow_spurious; 27};
28 u32 taken_slow_irqenable;
29 28
30 u64 released;
31 u32 released_slow;
32 u32 released_slow_kicked;
33 29
30#ifdef CONFIG_XEN_DEBUG_FS
34#define HISTO_BUCKETS 30 31#define HISTO_BUCKETS 30
35 u32 histo_spin_total[HISTO_BUCKETS+1]; 32static struct xen_spinlock_stats
36 u32 histo_spin_spinning[HISTO_BUCKETS+1]; 33{
34 u32 contention_stats[NR_CONTENTION_STATS];
37 u32 histo_spin_blocked[HISTO_BUCKETS+1]; 35 u32 histo_spin_blocked[HISTO_BUCKETS+1];
38
39 u64 time_total;
40 u64 time_spinning;
41 u64 time_blocked; 36 u64 time_blocked;
42} spinlock_stats; 37} spinlock_stats;
43 38
44static u8 zero_stats; 39static u8 zero_stats;
45 40
46static unsigned lock_timeout = 1 << 10;
47#define TIMEOUT lock_timeout
48
49static inline void check_zero(void) 41static inline void check_zero(void)
50{ 42{
51 if (unlikely(zero_stats)) { 43 u8 ret;
52 memset(&spinlock_stats, 0, sizeof(spinlock_stats)); 44 u8 old = ACCESS_ONCE(zero_stats);
53 zero_stats = 0; 45 if (unlikely(old)) {
46 ret = cmpxchg(&zero_stats, old, 0);
47 /* This ensures only one fellow resets the stat */
48 if (ret == old)
49 memset(&spinlock_stats, 0, sizeof(spinlock_stats));
54 } 50 }
55} 51}
56 52
57#define ADD_STATS(elem, val) \ 53static inline void add_stats(enum xen_contention_stat var, u32 val)
58 do { check_zero(); spinlock_stats.elem += (val); } while(0) 54{
55 check_zero();
56 spinlock_stats.contention_stats[var] += val;
57}
59 58
60static inline u64 spin_time_start(void) 59static inline u64 spin_time_start(void)
61{ 60{
@@ -74,22 +73,6 @@ static void __spin_time_accum(u64 delta, u32 *array)
74 array[HISTO_BUCKETS]++; 73 array[HISTO_BUCKETS]++;
75} 74}
76 75
77static inline void spin_time_accum_spinning(u64 start)
78{
79 u32 delta = xen_clocksource_read() - start;
80
81 __spin_time_accum(delta, spinlock_stats.histo_spin_spinning);
82 spinlock_stats.time_spinning += delta;
83}
84
85static inline void spin_time_accum_total(u64 start)
86{
87 u32 delta = xen_clocksource_read() - start;
88
89 __spin_time_accum(delta, spinlock_stats.histo_spin_total);
90 spinlock_stats.time_total += delta;
91}
92
93static inline void spin_time_accum_blocked(u64 start) 76static inline void spin_time_accum_blocked(u64 start)
94{ 77{
95 u32 delta = xen_clocksource_read() - start; 78 u32 delta = xen_clocksource_read() - start;
@@ -99,19 +82,15 @@ static inline void spin_time_accum_blocked(u64 start)
99} 82}
100#else /* !CONFIG_XEN_DEBUG_FS */ 83#else /* !CONFIG_XEN_DEBUG_FS */
101#define TIMEOUT (1 << 10) 84#define TIMEOUT (1 << 10)
102#define ADD_STATS(elem, val) do { (void)(val); } while(0) 85static inline void add_stats(enum xen_contention_stat var, u32 val)
86{
87}
103 88
104static inline u64 spin_time_start(void) 89static inline u64 spin_time_start(void)
105{ 90{
106 return 0; 91 return 0;
107} 92}
108 93
109static inline void spin_time_accum_total(u64 start)
110{
111}
112static inline void spin_time_accum_spinning(u64 start)
113{
114}
115static inline void spin_time_accum_blocked(u64 start) 94static inline void spin_time_accum_blocked(u64 start)
116{ 95{
117} 96}
@@ -134,227 +113,123 @@ typedef u16 xen_spinners_t;
134 asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory"); 113 asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory");
135#endif 114#endif
136 115
137struct xen_spinlock { 116struct xen_lock_waiting {
138 unsigned char lock; /* 0 -> free; 1 -> locked */ 117 struct arch_spinlock *lock;
139 xen_spinners_t spinners; /* count of waiting cpus */ 118 __ticket_t want;
140}; 119};
141 120
142static int xen_spin_is_locked(struct arch_spinlock *lock)
143{
144 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
145
146 return xl->lock != 0;
147}
148
149static int xen_spin_is_contended(struct arch_spinlock *lock)
150{
151 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
152
153 /* Not strictly true; this is only the count of contended
154 lock-takers entering the slow path. */
155 return xl->spinners != 0;
156}
157
158static int xen_spin_trylock(struct arch_spinlock *lock)
159{
160 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
161 u8 old = 1;
162
163 asm("xchgb %b0,%1"
164 : "+q" (old), "+m" (xl->lock) : : "memory");
165
166 return old == 0;
167}
168
169static DEFINE_PER_CPU(char *, irq_name);
170static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; 121static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
171static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); 122static DEFINE_PER_CPU(char *, irq_name);
172 123static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
173/* 124static cpumask_t waiting_cpus;
174 * Mark a cpu as interested in a lock. Returns the CPU's previous
175 * lock of interest, in case we got preempted by an interrupt.
176 */
177static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl)
178{
179 struct xen_spinlock *prev;
180
181 prev = __this_cpu_read(lock_spinners);
182 __this_cpu_write(lock_spinners, xl);
183
184 wmb(); /* set lock of interest before count */
185
186 inc_spinners(xl);
187
188 return prev;
189}
190
191/*
192 * Mark a cpu as no longer interested in a lock. Restores previous
193 * lock of interest (NULL for none).
194 */
195static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev)
196{
197 dec_spinners(xl);
198 wmb(); /* decrement count before restoring lock */
199 __this_cpu_write(lock_spinners, prev);
200}
201 125
202static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable) 126static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
203{ 127{
204 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
205 struct xen_spinlock *prev;
206 int irq = __this_cpu_read(lock_kicker_irq); 128 int irq = __this_cpu_read(lock_kicker_irq);
207 int ret; 129 struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting);
130 int cpu = smp_processor_id();
208 u64 start; 131 u64 start;
132 unsigned long flags;
209 133
210 /* If kicker interrupts not initialized yet, just spin */ 134 /* If kicker interrupts not initialized yet, just spin */
211 if (irq == -1) 135 if (irq == -1)
212 return 0; 136 return;
213 137
214 start = spin_time_start(); 138 start = spin_time_start();
215 139
216 /* announce we're spinning */ 140 /*
217 prev = spinning_lock(xl); 141 * Make sure an interrupt handler can't upset things in a
142 * partially setup state.
143 */
144 local_irq_save(flags);
145 /*
146 * We don't really care if we're overwriting some other
147 * (lock,want) pair, as that would mean that we're currently
148 * in an interrupt context, and the outer context had
149 * interrupts enabled. That has already kicked the VCPU out
150 * of xen_poll_irq(), so it will just return spuriously and
151 * retry with newly setup (lock,want).
152 *
153 * The ordering protocol on this is that the "lock" pointer
154 * may only be set non-NULL if the "want" ticket is correct.
155 * If we're updating "want", we must first clear "lock".
156 */
157 w->lock = NULL;
158 smp_wmb();
159 w->want = want;
160 smp_wmb();
161 w->lock = lock;
218 162
219 ADD_STATS(taken_slow, 1); 163 /* This uses set_bit, which atomic and therefore a barrier */
220 ADD_STATS(taken_slow_nested, prev != NULL); 164 cpumask_set_cpu(cpu, &waiting_cpus);
165 add_stats(TAKEN_SLOW, 1);
221 166
222 do { 167 /* clear pending */
223 unsigned long flags; 168 xen_clear_irq_pending(irq);
224 169
225 /* clear pending */ 170 /* Only check lock once pending cleared */
226 xen_clear_irq_pending(irq); 171 barrier();
227 172
228 /* check again make sure it didn't become free while 173 /*
229 we weren't looking */ 174 * Mark entry to slowpath before doing the pickup test to make
230 ret = xen_spin_trylock(lock); 175 * sure we don't deadlock with an unlocker.
231 if (ret) { 176 */
232 ADD_STATS(taken_slow_pickup, 1); 177 __ticket_enter_slowpath(lock);
233 178
234 /* 179 /*
235 * If we interrupted another spinlock while it 180 * check again make sure it didn't become free while
236 * was blocking, make sure it doesn't block 181 * we weren't looking
237 * without rechecking the lock. 182 */
238 */ 183 if (ACCESS_ONCE(lock->tickets.head) == want) {
239 if (prev != NULL) 184 add_stats(TAKEN_SLOW_PICKUP, 1);
240 xen_set_irq_pending(irq); 185 goto out;
241 goto out; 186 }
242 }
243 187
244 flags = arch_local_save_flags(); 188 /* Allow interrupts while blocked */
245 if (irq_enable) { 189 local_irq_restore(flags);
246 ADD_STATS(taken_slow_irqenable, 1);
247 raw_local_irq_enable();
248 }
249 190
250 /* 191 /*
251 * Block until irq becomes pending. If we're 192 * If an interrupt happens here, it will leave the wakeup irq
252 * interrupted at this point (after the trylock but 193 * pending, which will cause xen_poll_irq() to return
253 * before entering the block), then the nested lock 194 * immediately.
254 * handler guarantees that the irq will be left 195 */
255 * pending if there's any chance the lock became free;
256 * xen_poll_irq() returns immediately if the irq is
257 * pending.
258 */
259 xen_poll_irq(irq);
260 196
261 raw_local_irq_restore(flags); 197 /* Block until irq becomes pending (or perhaps a spurious wakeup) */
198 xen_poll_irq(irq);
199 add_stats(TAKEN_SLOW_SPURIOUS, !xen_test_irq_pending(irq));
262 200
263 ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); 201 local_irq_save(flags);
264 } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
265 202
266 kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); 203 kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
267
268out: 204out:
269 unspinning_lock(xl, prev); 205 cpumask_clear_cpu(cpu, &waiting_cpus);
270 spin_time_accum_blocked(start); 206 w->lock = NULL;
271
272 return ret;
273}
274
275static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable)
276{
277 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
278 unsigned timeout;
279 u8 oldval;
280 u64 start_spin;
281
282 ADD_STATS(taken, 1);
283
284 start_spin = spin_time_start();
285
286 do {
287 u64 start_spin_fast = spin_time_start();
288
289 timeout = TIMEOUT;
290
291 asm("1: xchgb %1,%0\n"
292 " testb %1,%1\n"
293 " jz 3f\n"
294 "2: rep;nop\n"
295 " cmpb $0,%0\n"
296 " je 1b\n"
297 " dec %2\n"
298 " jnz 2b\n"
299 "3:\n"
300 : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
301 : "1" (1)
302 : "memory");
303 207
304 spin_time_accum_spinning(start_spin_fast); 208 local_irq_restore(flags);
305 209
306 } while (unlikely(oldval != 0 && 210 spin_time_accum_blocked(start);
307 (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable))));
308
309 spin_time_accum_total(start_spin);
310}
311
312static void xen_spin_lock(struct arch_spinlock *lock)
313{
314 __xen_spin_lock(lock, false);
315}
316
317static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags)
318{
319 __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags));
320} 211}
212PV_CALLEE_SAVE_REGS_THUNK(xen_lock_spinning);
321 213
322static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) 214static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
323{ 215{
324 int cpu; 216 int cpu;
325 217
326 ADD_STATS(released_slow, 1); 218 add_stats(RELEASED_SLOW, 1);
219
220 for_each_cpu(cpu, &waiting_cpus) {
221 const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu);
327 222
328 for_each_online_cpu(cpu) { 223 /* Make sure we read lock before want */
329 /* XXX should mix up next cpu selection */ 224 if (ACCESS_ONCE(w->lock) == lock &&
330 if (per_cpu(lock_spinners, cpu) == xl) { 225 ACCESS_ONCE(w->want) == next) {
331 ADD_STATS(released_slow_kicked, 1); 226 add_stats(RELEASED_SLOW_KICKED, 1);
332 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); 227 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
228 break;
333 } 229 }
334 } 230 }
335} 231}
336 232
337static void xen_spin_unlock(struct arch_spinlock *lock)
338{
339 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
340
341 ADD_STATS(released, 1);
342
343 smp_wmb(); /* make sure no writes get moved after unlock */
344 xl->lock = 0; /* release lock */
345
346 /*
347 * Make sure unlock happens before checking for waiting
348 * spinners. We need a strong barrier to enforce the
349 * write-read ordering to different memory locations, as the
350 * CPU makes no implied guarantees about their ordering.
351 */
352 mb();
353
354 if (unlikely(xl->spinners))
355 xen_spin_unlock_slow(xl);
356}
357
358static irqreturn_t dummy_handler(int irq, void *dev_id) 233static irqreturn_t dummy_handler(int irq, void *dev_id)
359{ 234{
360 BUG(); 235 BUG();
@@ -408,6 +283,8 @@ void xen_uninit_lock_cpu(int cpu)
408 per_cpu(irq_name, cpu) = NULL; 283 per_cpu(irq_name, cpu) = NULL;
409} 284}
410 285
286static bool xen_pvspin __initdata = true;
287
411void __init xen_init_spinlocks(void) 288void __init xen_init_spinlocks(void)
412{ 289{
413 /* 290 /*
@@ -417,15 +294,23 @@ void __init xen_init_spinlocks(void)
417 if (xen_hvm_domain()) 294 if (xen_hvm_domain())
418 return; 295 return;
419 296
420 BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t)); 297 if (!xen_pvspin) {
298 printk(KERN_DEBUG "xen: PV spinlocks disabled\n");
299 return;
300 }
421 301
422 pv_lock_ops.spin_is_locked = xen_spin_is_locked; 302 static_key_slow_inc(&paravirt_ticketlocks_enabled);
423 pv_lock_ops.spin_is_contended = xen_spin_is_contended; 303
424 pv_lock_ops.spin_lock = xen_spin_lock; 304 pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
425 pv_lock_ops.spin_lock_flags = xen_spin_lock_flags; 305 pv_lock_ops.unlock_kick = xen_unlock_kick;
426 pv_lock_ops.spin_trylock = xen_spin_trylock; 306}
427 pv_lock_ops.spin_unlock = xen_spin_unlock; 307
308static __init int xen_parse_nopvspin(char *arg)
309{
310 xen_pvspin = false;
311 return 0;
428} 312}
313early_param("xen_nopvspin", xen_parse_nopvspin);
429 314
430#ifdef CONFIG_XEN_DEBUG_FS 315#ifdef CONFIG_XEN_DEBUG_FS
431 316
@@ -442,37 +327,21 @@ static int __init xen_spinlock_debugfs(void)
442 327
443 debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); 328 debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
444 329
445 debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout);
446
447 debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken);
448 debugfs_create_u32("taken_slow", 0444, d_spin_debug, 330 debugfs_create_u32("taken_slow", 0444, d_spin_debug,
449 &spinlock_stats.taken_slow); 331 &spinlock_stats.contention_stats[TAKEN_SLOW]);
450 debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug,
451 &spinlock_stats.taken_slow_nested);
452 debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, 332 debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
453 &spinlock_stats.taken_slow_pickup); 333 &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
454 debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, 334 debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug,
455 &spinlock_stats.taken_slow_spurious); 335 &spinlock_stats.contention_stats[TAKEN_SLOW_SPURIOUS]);
456 debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug,
457 &spinlock_stats.taken_slow_irqenable);
458 336
459 debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released);
460 debugfs_create_u32("released_slow", 0444, d_spin_debug, 337 debugfs_create_u32("released_slow", 0444, d_spin_debug,
461 &spinlock_stats.released_slow); 338 &spinlock_stats.contention_stats[RELEASED_SLOW]);
462 debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, 339 debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
463 &spinlock_stats.released_slow_kicked); 340 &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
464 341
465 debugfs_create_u64("time_spinning", 0444, d_spin_debug,
466 &spinlock_stats.time_spinning);
467 debugfs_create_u64("time_blocked", 0444, d_spin_debug, 342 debugfs_create_u64("time_blocked", 0444, d_spin_debug,
468 &spinlock_stats.time_blocked); 343 &spinlock_stats.time_blocked);
469 debugfs_create_u64("time_total", 0444, d_spin_debug,
470 &spinlock_stats.time_total);
471 344
472 debugfs_create_u32_array("histo_total", 0444, d_spin_debug,
473 spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1);
474 debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug,
475 spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1);
476 debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, 345 debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
477 spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); 346 spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
478 347