aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 14:55:10 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 14:55:10 -0400
commit816434ec4a674fcdb3c2221a6dffdc8f34020550 (patch)
tree6b8a319171270b20bf1b2e1c98d333f47988553a
parentf357a82048ff1e5645861475b014570e11ad1911 (diff)
parent36bd621337c91a1ecda588e5bbbae8dd9698bae7 (diff)
Merge branch 'x86-spinlocks-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 spinlock changes from Ingo Molnar: "The biggest change here are paravirtualized ticket spinlocks (PV spinlocks), which bring a nice speedup on various benchmarks. The KVM host side will come to you via the KVM tree" * 'x86-spinlocks-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/kvm/guest: Fix sparse warning: "symbol 'klock_waiting' was not declared as static" kvm: Paravirtual ticketlocks support for linux guests running on KVM hypervisor kvm guest: Add configuration support to enable debug information for KVM Guests kvm uapi: Add KICK_CPU and PV_UNHALT definition to uapi xen, pvticketlock: Allow interrupts to be enabled while blocking x86, ticketlock: Add slowpath logic jump_label: Split jumplabel ratelimit x86, pvticketlock: When paravirtualizing ticket locks, increment by 2 x86, pvticketlock: Use callee-save for lock_spinning xen, pvticketlocks: Add xen_nopvspin parameter to disable xen pv ticketlocks xen, pvticketlock: Xen implementation for PV ticket locks xen: Defer spinlock setup until boot CPU setup x86, ticketlock: Collapse a layer of functions x86, ticketlock: Don't inline _spin_unlock when using paravirt spinlocks x86, spinlock: Replace pv spinlocks with pv ticketlocks
-rw-r--r--arch/x86/Kconfig10
-rw-r--r--arch/x86/include/asm/kvm_para.h14
-rw-r--r--arch/x86/include/asm/paravirt.h32
-rw-r--r--arch/x86/include/asm/paravirt_types.h14
-rw-r--r--arch/x86/include/asm/spinlock.h128
-rw-r--r--arch/x86/include/asm/spinlock_types.h16
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h1
-rw-r--r--arch/x86/kernel/kvm.c262
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c18
-rw-r--r--arch/x86/xen/smp.c2
-rw-r--r--arch/x86/xen/spinlock.c387
-rw-r--r--include/linux/jump_label.h28
-rw-r--r--include/linux/jump_label_ratelimit.h34
-rw-r--r--include/linux/perf_event.h1
-rw-r--r--include/uapi/linux/kvm_para.h1
-rw-r--r--kernel/jump_label.c1
16 files changed, 569 insertions, 380 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 30322b0427d1..5c0ed72c02a2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -632,6 +632,7 @@ config PARAVIRT_DEBUG
632config PARAVIRT_SPINLOCKS 632config PARAVIRT_SPINLOCKS
633 bool "Paravirtualization layer for spinlocks" 633 bool "Paravirtualization layer for spinlocks"
634 depends on PARAVIRT && SMP 634 depends on PARAVIRT && SMP
635 select UNINLINE_SPIN_UNLOCK
635 ---help--- 636 ---help---
636 Paravirtualized spinlocks allow a pvops backend to replace the 637 Paravirtualized spinlocks allow a pvops backend to replace the
637 spinlock implementation with something virtualization-friendly 638 spinlock implementation with something virtualization-friendly
@@ -656,6 +657,15 @@ config KVM_GUEST
656 underlying device model, the host provides the guest with 657 underlying device model, the host provides the guest with
657 timing infrastructure such as time of day, and system time 658 timing infrastructure such as time of day, and system time
658 659
660config KVM_DEBUG_FS
661 bool "Enable debug information for KVM Guests in debugfs"
662 depends on KVM_GUEST && DEBUG_FS
663 default n
664 ---help---
665 This option enables collection of various statistics for KVM guest.
666 Statistics are displayed in debugfs filesystem. Enabling this option
667 may incur significant overhead.
668
659source "arch/x86/lguest/Kconfig" 669source "arch/x86/lguest/Kconfig"
660 670
661config PARAVIRT_TIME_ACCOUNTING 671config PARAVIRT_TIME_ACCOUNTING
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 0644129a5333..1df115909758 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -112,10 +112,20 @@ void kvm_async_pf_task_wait(u32 token);
112void kvm_async_pf_task_wake(u32 token); 112void kvm_async_pf_task_wake(u32 token);
113u32 kvm_read_and_reset_pf_reason(void); 113u32 kvm_read_and_reset_pf_reason(void);
114extern void kvm_disable_steal_time(void); 114extern void kvm_disable_steal_time(void);
115#else 115
116#define kvm_guest_init() do { } while (0) 116#ifdef CONFIG_PARAVIRT_SPINLOCKS
117void __init kvm_spinlock_init(void);
118#else /* !CONFIG_PARAVIRT_SPINLOCKS */
119static inline void kvm_spinlock_init(void)
120{
121}
122#endif /* CONFIG_PARAVIRT_SPINLOCKS */
123
124#else /* CONFIG_KVM_GUEST */
125#define kvm_guest_init() do {} while (0)
117#define kvm_async_pf_task_wait(T) do {} while(0) 126#define kvm_async_pf_task_wait(T) do {} while(0)
118#define kvm_async_pf_task_wake(T) do {} while(0) 127#define kvm_async_pf_task_wake(T) do {} while(0)
128
119static inline u32 kvm_read_and_reset_pf_reason(void) 129static inline u32 kvm_read_and_reset_pf_reason(void)
120{ 130{
121 return 0; 131 return 0;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index cfdc9ee4c900..401f350ef71b 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -712,36 +712,16 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
712 712
713#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) 713#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
714 714
715static inline int arch_spin_is_locked(struct arch_spinlock *lock) 715static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
716 __ticket_t ticket)
716{ 717{
717 return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock); 718 PVOP_VCALLEE2(pv_lock_ops.lock_spinning, lock, ticket);
718} 719}
719 720
720static inline int arch_spin_is_contended(struct arch_spinlock *lock) 721static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
722 __ticket_t ticket)
721{ 723{
722 return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); 724 PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket);
723}
724#define arch_spin_is_contended arch_spin_is_contended
725
726static __always_inline void arch_spin_lock(struct arch_spinlock *lock)
727{
728 PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
729}
730
731static __always_inline void arch_spin_lock_flags(struct arch_spinlock *lock,
732 unsigned long flags)
733{
734 PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
735}
736
737static __always_inline int arch_spin_trylock(struct arch_spinlock *lock)
738{
739 return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
740}
741
742static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
743{
744 PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
745} 725}
746 726
747#endif 727#endif
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 0617ff241e8f..aab8f671b523 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -327,13 +327,15 @@ struct pv_mmu_ops {
327}; 327};
328 328
329struct arch_spinlock; 329struct arch_spinlock;
330#ifdef CONFIG_SMP
331#include <asm/spinlock_types.h>
332#else
333typedef u16 __ticket_t;
334#endif
335
330struct pv_lock_ops { 336struct pv_lock_ops {
331 int (*spin_is_locked)(struct arch_spinlock *lock); 337 struct paravirt_callee_save lock_spinning;
332 int (*spin_is_contended)(struct arch_spinlock *lock); 338 void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket);
333 void (*spin_lock)(struct arch_spinlock *lock);
334 void (*spin_lock_flags)(struct arch_spinlock *lock, unsigned long flags);
335 int (*spin_trylock)(struct arch_spinlock *lock);
336 void (*spin_unlock)(struct arch_spinlock *lock);
337}; 339};
338 340
339/* This contains all the paravirt structures: we get a convenient 341/* This contains all the paravirt structures: we get a convenient
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index e0e668422c75..bf156ded74b5 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -1,11 +1,14 @@
1#ifndef _ASM_X86_SPINLOCK_H 1#ifndef _ASM_X86_SPINLOCK_H
2#define _ASM_X86_SPINLOCK_H 2#define _ASM_X86_SPINLOCK_H
3 3
4#include <linux/jump_label.h>
4#include <linux/atomic.h> 5#include <linux/atomic.h>
5#include <asm/page.h> 6#include <asm/page.h>
6#include <asm/processor.h> 7#include <asm/processor.h>
7#include <linux/compiler.h> 8#include <linux/compiler.h>
8#include <asm/paravirt.h> 9#include <asm/paravirt.h>
10#include <asm/bitops.h>
11
9/* 12/*
10 * Your basic SMP spinlocks, allowing only a single CPU anywhere 13 * Your basic SMP spinlocks, allowing only a single CPU anywhere
11 * 14 *
@@ -34,6 +37,31 @@
34# define UNLOCK_LOCK_PREFIX 37# define UNLOCK_LOCK_PREFIX
35#endif 38#endif
36 39
40/* How long a lock should spin before we consider blocking */
41#define SPIN_THRESHOLD (1 << 15)
42
43extern struct static_key paravirt_ticketlocks_enabled;
44static __always_inline bool static_key_false(struct static_key *key);
45
46#ifdef CONFIG_PARAVIRT_SPINLOCKS
47
48static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
49{
50 set_bit(0, (volatile unsigned long *)&lock->tickets.tail);
51}
52
53#else /* !CONFIG_PARAVIRT_SPINLOCKS */
54static __always_inline void __ticket_lock_spinning(arch_spinlock_t *lock,
55 __ticket_t ticket)
56{
57}
58static inline void __ticket_unlock_kick(arch_spinlock_t *lock,
59 __ticket_t ticket)
60{
61}
62
63#endif /* CONFIG_PARAVIRT_SPINLOCKS */
64
37static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) 65static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
38{ 66{
39 return lock.tickets.head == lock.tickets.tail; 67 return lock.tickets.head == lock.tickets.tail;
@@ -52,81 +80,101 @@ static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
52 * in the high part, because a wide xadd increment of the low part would carry 80 * in the high part, because a wide xadd increment of the low part would carry
53 * up and contaminate the high part. 81 * up and contaminate the high part.
54 */ 82 */
55static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) 83static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
56{ 84{
57 register struct __raw_tickets inc = { .tail = 1 }; 85 register struct __raw_tickets inc = { .tail = TICKET_LOCK_INC };
58 86
59 inc = xadd(&lock->tickets, inc); 87 inc = xadd(&lock->tickets, inc);
88 if (likely(inc.head == inc.tail))
89 goto out;
60 90
91 inc.tail &= ~TICKET_SLOWPATH_FLAG;
61 for (;;) { 92 for (;;) {
62 if (inc.head == inc.tail) 93 unsigned count = SPIN_THRESHOLD;
63 break; 94
64 cpu_relax(); 95 do {
65 inc.head = ACCESS_ONCE(lock->tickets.head); 96 if (ACCESS_ONCE(lock->tickets.head) == inc.tail)
97 goto out;
98 cpu_relax();
99 } while (--count);
100 __ticket_lock_spinning(lock, inc.tail);
66 } 101 }
67 barrier(); /* make sure nothing creeps before the lock is taken */ 102out: barrier(); /* make sure nothing creeps before the lock is taken */
68} 103}
69 104
70static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) 105static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
71{ 106{
72 arch_spinlock_t old, new; 107 arch_spinlock_t old, new;
73 108
74 old.tickets = ACCESS_ONCE(lock->tickets); 109 old.tickets = ACCESS_ONCE(lock->tickets);
75 if (old.tickets.head != old.tickets.tail) 110 if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG))
76 return 0; 111 return 0;
77 112
78 new.head_tail = old.head_tail + (1 << TICKET_SHIFT); 113 new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT);
79 114
80 /* cmpxchg is a full barrier, so nothing can move before it */ 115 /* cmpxchg is a full barrier, so nothing can move before it */
81 return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; 116 return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
82} 117}
83 118
84static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) 119static inline void __ticket_unlock_slowpath(arch_spinlock_t *lock,
120 arch_spinlock_t old)
85{ 121{
86 __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX); 122 arch_spinlock_t new;
123
124 BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
125
126 /* Perform the unlock on the "before" copy */
127 old.tickets.head += TICKET_LOCK_INC;
128
129 /* Clear the slowpath flag */
130 new.head_tail = old.head_tail & ~(TICKET_SLOWPATH_FLAG << TICKET_SHIFT);
131
132 /*
133 * If the lock is uncontended, clear the flag - use cmpxchg in
134 * case it changes behind our back though.
135 */
136 if (new.tickets.head != new.tickets.tail ||
137 cmpxchg(&lock->head_tail, old.head_tail,
138 new.head_tail) != old.head_tail) {
139 /*
140 * Lock still has someone queued for it, so wake up an
141 * appropriate waiter.
142 */
143 __ticket_unlock_kick(lock, old.tickets.head);
144 }
87} 145}
88 146
89static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) 147static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
90{ 148{
91 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); 149 if (TICKET_SLOWPATH_FLAG &&
150 static_key_false(&paravirt_ticketlocks_enabled)) {
151 arch_spinlock_t prev;
92 152
93 return tmp.tail != tmp.head; 153 prev = *lock;
94} 154 add_smp(&lock->tickets.head, TICKET_LOCK_INC);
95 155
96static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) 156 /* add_smp() is a full mb() */
97{
98 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
99 157
100 return (__ticket_t)(tmp.tail - tmp.head) > 1; 158 if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG))
159 __ticket_unlock_slowpath(lock, prev);
160 } else
161 __add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX);
101} 162}
102 163
103#ifndef CONFIG_PARAVIRT_SPINLOCKS
104
105static inline int arch_spin_is_locked(arch_spinlock_t *lock) 164static inline int arch_spin_is_locked(arch_spinlock_t *lock)
106{ 165{
107 return __ticket_spin_is_locked(lock); 166 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
108}
109
110static inline int arch_spin_is_contended(arch_spinlock_t *lock)
111{
112 return __ticket_spin_is_contended(lock);
113}
114#define arch_spin_is_contended arch_spin_is_contended
115 167
116static __always_inline void arch_spin_lock(arch_spinlock_t *lock) 168 return tmp.tail != tmp.head;
117{
118 __ticket_spin_lock(lock);
119} 169}
120 170
121static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) 171static inline int arch_spin_is_contended(arch_spinlock_t *lock)
122{ 172{
123 return __ticket_spin_trylock(lock); 173 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
124}
125 174
126static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) 175 return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
127{
128 __ticket_spin_unlock(lock);
129} 176}
177#define arch_spin_is_contended arch_spin_is_contended
130 178
131static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, 179static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
132 unsigned long flags) 180 unsigned long flags)
@@ -134,8 +182,6 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
134 arch_spin_lock(lock); 182 arch_spin_lock(lock);
135} 183}
136 184
137#endif /* CONFIG_PARAVIRT_SPINLOCKS */
138
139static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) 185static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
140{ 186{
141 while (arch_spin_is_locked(lock)) 187 while (arch_spin_is_locked(lock))
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index ad0ad07fc006..4f1bea19945b 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -1,13 +1,17 @@
1#ifndef _ASM_X86_SPINLOCK_TYPES_H 1#ifndef _ASM_X86_SPINLOCK_TYPES_H
2#define _ASM_X86_SPINLOCK_TYPES_H 2#define _ASM_X86_SPINLOCK_TYPES_H
3 3
4#ifndef __LINUX_SPINLOCK_TYPES_H
5# error "please don't include this file directly"
6#endif
7
8#include <linux/types.h> 4#include <linux/types.h>
9 5
10#if (CONFIG_NR_CPUS < 256) 6#ifdef CONFIG_PARAVIRT_SPINLOCKS
7#define __TICKET_LOCK_INC 2
8#define TICKET_SLOWPATH_FLAG ((__ticket_t)1)
9#else
10#define __TICKET_LOCK_INC 1
11#define TICKET_SLOWPATH_FLAG ((__ticket_t)0)
12#endif
13
14#if (CONFIG_NR_CPUS < (256 / __TICKET_LOCK_INC))
11typedef u8 __ticket_t; 15typedef u8 __ticket_t;
12typedef u16 __ticketpair_t; 16typedef u16 __ticketpair_t;
13#else 17#else
@@ -15,6 +19,8 @@ typedef u16 __ticket_t;
15typedef u32 __ticketpair_t; 19typedef u32 __ticketpair_t;
16#endif 20#endif
17 21
22#define TICKET_LOCK_INC ((__ticket_t)__TICKET_LOCK_INC)
23
18#define TICKET_SHIFT (sizeof(__ticket_t) * 8) 24#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
19 25
20typedef struct arch_spinlock { 26typedef struct arch_spinlock {
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 06fdbd987e97..94dc8ca434e0 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -23,6 +23,7 @@
23#define KVM_FEATURE_ASYNC_PF 4 23#define KVM_FEATURE_ASYNC_PF 4
24#define KVM_FEATURE_STEAL_TIME 5 24#define KVM_FEATURE_STEAL_TIME 5
25#define KVM_FEATURE_PV_EOI 6 25#define KVM_FEATURE_PV_EOI 6
26#define KVM_FEATURE_PV_UNHALT 7
26 27
27/* The last 8 bits are used to indicate how to interpret the flags field 28/* The last 8 bits are used to indicate how to interpret the flags field
28 * in pvclock structure. If no bits are set, all flags are ignored. 29 * in pvclock structure. If no bits are set, all flags are ignored.
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 7817afdac301..697b93af02dd 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -34,6 +34,7 @@
34#include <linux/sched.h> 34#include <linux/sched.h>
35#include <linux/slab.h> 35#include <linux/slab.h>
36#include <linux/kprobes.h> 36#include <linux/kprobes.h>
37#include <linux/debugfs.h>
37#include <asm/timer.h> 38#include <asm/timer.h>
38#include <asm/cpu.h> 39#include <asm/cpu.h>
39#include <asm/traps.h> 40#include <asm/traps.h>
@@ -419,6 +420,7 @@ static void __init kvm_smp_prepare_boot_cpu(void)
419 WARN_ON(kvm_register_clock("primary cpu clock")); 420 WARN_ON(kvm_register_clock("primary cpu clock"));
420 kvm_guest_cpu_init(); 421 kvm_guest_cpu_init();
421 native_smp_prepare_boot_cpu(); 422 native_smp_prepare_boot_cpu();
423 kvm_spinlock_init();
422} 424}
423 425
424static void kvm_guest_cpu_online(void *dummy) 426static void kvm_guest_cpu_online(void *dummy)
@@ -521,3 +523,263 @@ static __init int activate_jump_labels(void)
521 return 0; 523 return 0;
522} 524}
523arch_initcall(activate_jump_labels); 525arch_initcall(activate_jump_labels);
526
527#ifdef CONFIG_PARAVIRT_SPINLOCKS
528
529/* Kick a cpu by its apicid. Used to wake up a halted vcpu */
530static void kvm_kick_cpu(int cpu)
531{
532 int apicid;
533 unsigned long flags = 0;
534
535 apicid = per_cpu(x86_cpu_to_apicid, cpu);
536 kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
537}
538
539enum kvm_contention_stat {
540 TAKEN_SLOW,
541 TAKEN_SLOW_PICKUP,
542 RELEASED_SLOW,
543 RELEASED_SLOW_KICKED,
544 NR_CONTENTION_STATS
545};
546
547#ifdef CONFIG_KVM_DEBUG_FS
548#define HISTO_BUCKETS 30
549
550static struct kvm_spinlock_stats
551{
552 u32 contention_stats[NR_CONTENTION_STATS];
553 u32 histo_spin_blocked[HISTO_BUCKETS+1];
554 u64 time_blocked;
555} spinlock_stats;
556
557static u8 zero_stats;
558
559static inline void check_zero(void)
560{
561 u8 ret;
562 u8 old;
563
564 old = ACCESS_ONCE(zero_stats);
565 if (unlikely(old)) {
566 ret = cmpxchg(&zero_stats, old, 0);
567 /* This ensures only one fellow resets the stat */
568 if (ret == old)
569 memset(&spinlock_stats, 0, sizeof(spinlock_stats));
570 }
571}
572
573static inline void add_stats(enum kvm_contention_stat var, u32 val)
574{
575 check_zero();
576 spinlock_stats.contention_stats[var] += val;
577}
578
579
580static inline u64 spin_time_start(void)
581{
582 return sched_clock();
583}
584
585static void __spin_time_accum(u64 delta, u32 *array)
586{
587 unsigned index;
588
589 index = ilog2(delta);
590 check_zero();
591
592 if (index < HISTO_BUCKETS)
593 array[index]++;
594 else
595 array[HISTO_BUCKETS]++;
596}
597
598static inline void spin_time_accum_blocked(u64 start)
599{
600 u32 delta;
601
602 delta = sched_clock() - start;
603 __spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
604 spinlock_stats.time_blocked += delta;
605}
606
607static struct dentry *d_spin_debug;
608static struct dentry *d_kvm_debug;
609
610struct dentry *kvm_init_debugfs(void)
611{
612 d_kvm_debug = debugfs_create_dir("kvm", NULL);
613 if (!d_kvm_debug)
614 printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n");
615
616 return d_kvm_debug;
617}
618
619static int __init kvm_spinlock_debugfs(void)
620{
621 struct dentry *d_kvm;
622
623 d_kvm = kvm_init_debugfs();
624 if (d_kvm == NULL)
625 return -ENOMEM;
626
627 d_spin_debug = debugfs_create_dir("spinlocks", d_kvm);
628
629 debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
630
631 debugfs_create_u32("taken_slow", 0444, d_spin_debug,
632 &spinlock_stats.contention_stats[TAKEN_SLOW]);
633 debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
634 &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
635
636 debugfs_create_u32("released_slow", 0444, d_spin_debug,
637 &spinlock_stats.contention_stats[RELEASED_SLOW]);
638 debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
639 &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
640
641 debugfs_create_u64("time_blocked", 0444, d_spin_debug,
642 &spinlock_stats.time_blocked);
643
644 debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
645 spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
646
647 return 0;
648}
649fs_initcall(kvm_spinlock_debugfs);
650#else /* !CONFIG_KVM_DEBUG_FS */
651static inline void add_stats(enum kvm_contention_stat var, u32 val)
652{
653}
654
655static inline u64 spin_time_start(void)
656{
657 return 0;
658}
659
660static inline void spin_time_accum_blocked(u64 start)
661{
662}
663#endif /* CONFIG_KVM_DEBUG_FS */
664
665struct kvm_lock_waiting {
666 struct arch_spinlock *lock;
667 __ticket_t want;
668};
669
670/* cpus 'waiting' on a spinlock to become available */
671static cpumask_t waiting_cpus;
672
673/* Track spinlock on which a cpu is waiting */
674static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting);
675
676static void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
677{
678 struct kvm_lock_waiting *w;
679 int cpu;
680 u64 start;
681 unsigned long flags;
682
683 if (in_nmi())
684 return;
685
686 w = &__get_cpu_var(klock_waiting);
687 cpu = smp_processor_id();
688 start = spin_time_start();
689
690 /*
691 * Make sure an interrupt handler can't upset things in a
692 * partially setup state.
693 */
694 local_irq_save(flags);
695
696 /*
697 * The ordering protocol on this is that the "lock" pointer
698 * may only be set non-NULL if the "want" ticket is correct.
699 * If we're updating "want", we must first clear "lock".
700 */
701 w->lock = NULL;
702 smp_wmb();
703 w->want = want;
704 smp_wmb();
705 w->lock = lock;
706
707 add_stats(TAKEN_SLOW, 1);
708
709 /*
710 * This uses set_bit, which is atomic but we should not rely on its
711 * reordering gurantees. So barrier is needed after this call.
712 */
713 cpumask_set_cpu(cpu, &waiting_cpus);
714
715 barrier();
716
717 /*
718 * Mark entry to slowpath before doing the pickup test to make
719 * sure we don't deadlock with an unlocker.
720 */
721 __ticket_enter_slowpath(lock);
722
723 /*
724 * check again make sure it didn't become free while
725 * we weren't looking.
726 */
727 if (ACCESS_ONCE(lock->tickets.head) == want) {
728 add_stats(TAKEN_SLOW_PICKUP, 1);
729 goto out;
730 }
731
732 /*
733 * halt until it's our turn and kicked. Note that we do safe halt
734 * for irq enabled case to avoid hang when lock info is overwritten
735 * in irq spinlock slowpath and no spurious interrupt occur to save us.
736 */
737 if (arch_irqs_disabled_flags(flags))
738 halt();
739 else
740 safe_halt();
741
742out:
743 cpumask_clear_cpu(cpu, &waiting_cpus);
744 w->lock = NULL;
745 local_irq_restore(flags);
746 spin_time_accum_blocked(start);
747}
748PV_CALLEE_SAVE_REGS_THUNK(kvm_lock_spinning);
749
750/* Kick vcpu waiting on @lock->head to reach value @ticket */
751static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
752{
753 int cpu;
754
755 add_stats(RELEASED_SLOW, 1);
756 for_each_cpu(cpu, &waiting_cpus) {
757 const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu);
758 if (ACCESS_ONCE(w->lock) == lock &&
759 ACCESS_ONCE(w->want) == ticket) {
760 add_stats(RELEASED_SLOW_KICKED, 1);
761 kvm_kick_cpu(cpu);
762 break;
763 }
764 }
765}
766
767/*
768 * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
769 */
770void __init kvm_spinlock_init(void)
771{
772 if (!kvm_para_available())
773 return;
774 /* Does host kernel support KVM_FEATURE_PV_UNHALT? */
775 if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
776 return;
777
778 printk(KERN_INFO "KVM setup paravirtual spinlock\n");
779
780 static_key_slow_inc(&paravirt_ticketlocks_enabled);
781
782 pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
783 pv_lock_ops.unlock_kick = kvm_unlock_kick;
784}
785#endif /* CONFIG_PARAVIRT_SPINLOCKS */
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 676b8c77a976..bbb6c7316341 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -4,25 +4,17 @@
4 */ 4 */
5#include <linux/spinlock.h> 5#include <linux/spinlock.h>
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/jump_label.h>
7 8
8#include <asm/paravirt.h> 9#include <asm/paravirt.h>
9 10
10static inline void
11default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
12{
13 arch_spin_lock(lock);
14}
15
16struct pv_lock_ops pv_lock_ops = { 11struct pv_lock_ops pv_lock_ops = {
17#ifdef CONFIG_SMP 12#ifdef CONFIG_SMP
18 .spin_is_locked = __ticket_spin_is_locked, 13 .lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop),
19 .spin_is_contended = __ticket_spin_is_contended, 14 .unlock_kick = paravirt_nop,
20
21 .spin_lock = __ticket_spin_lock,
22 .spin_lock_flags = default_spin_lock_flags,
23 .spin_trylock = __ticket_spin_trylock,
24 .spin_unlock = __ticket_spin_unlock,
25#endif 15#endif
26}; 16};
27EXPORT_SYMBOL(pv_lock_ops); 17EXPORT_SYMBOL(pv_lock_ops);
28 18
19struct static_key paravirt_ticketlocks_enabled = STATIC_KEY_INIT_FALSE;
20EXPORT_SYMBOL(paravirt_ticketlocks_enabled);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index b81c88e51daa..597655bd72b0 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -279,6 +279,7 @@ static void __init xen_smp_prepare_boot_cpu(void)
279 279
280 xen_filter_cpu_maps(); 280 xen_filter_cpu_maps();
281 xen_setup_vcpu_info_placement(); 281 xen_setup_vcpu_info_placement();
282 xen_init_spinlocks();
282} 283}
283 284
284static void __init xen_smp_prepare_cpus(unsigned int max_cpus) 285static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
@@ -680,7 +681,6 @@ void __init xen_smp_init(void)
680{ 681{
681 smp_ops = xen_smp_ops; 682 smp_ops = xen_smp_ops;
682 xen_fill_possible_map(); 683 xen_fill_possible_map();
683 xen_init_spinlocks();
684} 684}
685 685
686static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) 686static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index cf3caee356b3..0438b9324a72 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -17,45 +17,44 @@
17#include "xen-ops.h" 17#include "xen-ops.h"
18#include "debugfs.h" 18#include "debugfs.h"
19 19
20#ifdef CONFIG_XEN_DEBUG_FS 20enum xen_contention_stat {
21static struct xen_spinlock_stats 21 TAKEN_SLOW,
22{ 22 TAKEN_SLOW_PICKUP,
23 u64 taken; 23 TAKEN_SLOW_SPURIOUS,
24 u32 taken_slow; 24 RELEASED_SLOW,
25 u32 taken_slow_nested; 25 RELEASED_SLOW_KICKED,
26 u32 taken_slow_pickup; 26 NR_CONTENTION_STATS
27 u32 taken_slow_spurious; 27};
28 u32 taken_slow_irqenable;
29 28
30 u64 released;
31 u32 released_slow;
32 u32 released_slow_kicked;
33 29
30#ifdef CONFIG_XEN_DEBUG_FS
34#define HISTO_BUCKETS 30 31#define HISTO_BUCKETS 30
35 u32 histo_spin_total[HISTO_BUCKETS+1]; 32static struct xen_spinlock_stats
36 u32 histo_spin_spinning[HISTO_BUCKETS+1]; 33{
34 u32 contention_stats[NR_CONTENTION_STATS];
37 u32 histo_spin_blocked[HISTO_BUCKETS+1]; 35 u32 histo_spin_blocked[HISTO_BUCKETS+1];
38
39 u64 time_total;
40 u64 time_spinning;
41 u64 time_blocked; 36 u64 time_blocked;
42} spinlock_stats; 37} spinlock_stats;
43 38
44static u8 zero_stats; 39static u8 zero_stats;
45 40
46static unsigned lock_timeout = 1 << 10;
47#define TIMEOUT lock_timeout
48
49static inline void check_zero(void) 41static inline void check_zero(void)
50{ 42{
51 if (unlikely(zero_stats)) { 43 u8 ret;
52 memset(&spinlock_stats, 0, sizeof(spinlock_stats)); 44 u8 old = ACCESS_ONCE(zero_stats);
53 zero_stats = 0; 45 if (unlikely(old)) {
46 ret = cmpxchg(&zero_stats, old, 0);
47 /* This ensures only one fellow resets the stat */
48 if (ret == old)
49 memset(&spinlock_stats, 0, sizeof(spinlock_stats));
54 } 50 }
55} 51}
56 52
57#define ADD_STATS(elem, val) \ 53static inline void add_stats(enum xen_contention_stat var, u32 val)
58 do { check_zero(); spinlock_stats.elem += (val); } while(0) 54{
55 check_zero();
56 spinlock_stats.contention_stats[var] += val;
57}
59 58
60static inline u64 spin_time_start(void) 59static inline u64 spin_time_start(void)
61{ 60{
@@ -74,22 +73,6 @@ static void __spin_time_accum(u64 delta, u32 *array)
74 array[HISTO_BUCKETS]++; 73 array[HISTO_BUCKETS]++;
75} 74}
76 75
77static inline void spin_time_accum_spinning(u64 start)
78{
79 u32 delta = xen_clocksource_read() - start;
80
81 __spin_time_accum(delta, spinlock_stats.histo_spin_spinning);
82 spinlock_stats.time_spinning += delta;
83}
84
85static inline void spin_time_accum_total(u64 start)
86{
87 u32 delta = xen_clocksource_read() - start;
88
89 __spin_time_accum(delta, spinlock_stats.histo_spin_total);
90 spinlock_stats.time_total += delta;
91}
92
93static inline void spin_time_accum_blocked(u64 start) 76static inline void spin_time_accum_blocked(u64 start)
94{ 77{
95 u32 delta = xen_clocksource_read() - start; 78 u32 delta = xen_clocksource_read() - start;
@@ -99,19 +82,15 @@ static inline void spin_time_accum_blocked(u64 start)
99} 82}
100#else /* !CONFIG_XEN_DEBUG_FS */ 83#else /* !CONFIG_XEN_DEBUG_FS */
101#define TIMEOUT (1 << 10) 84#define TIMEOUT (1 << 10)
102#define ADD_STATS(elem, val) do { (void)(val); } while(0) 85static inline void add_stats(enum xen_contention_stat var, u32 val)
86{
87}
103 88
104static inline u64 spin_time_start(void) 89static inline u64 spin_time_start(void)
105{ 90{
106 return 0; 91 return 0;
107} 92}
108 93
109static inline void spin_time_accum_total(u64 start)
110{
111}
112static inline void spin_time_accum_spinning(u64 start)
113{
114}
115static inline void spin_time_accum_blocked(u64 start) 94static inline void spin_time_accum_blocked(u64 start)
116{ 95{
117} 96}
@@ -134,227 +113,123 @@ typedef u16 xen_spinners_t;
134 asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory"); 113 asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory");
135#endif 114#endif
136 115
137struct xen_spinlock { 116struct xen_lock_waiting {
138 unsigned char lock; /* 0 -> free; 1 -> locked */ 117 struct arch_spinlock *lock;
139 xen_spinners_t spinners; /* count of waiting cpus */ 118 __ticket_t want;
140}; 119};
141 120
142static int xen_spin_is_locked(struct arch_spinlock *lock)
143{
144 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
145
146 return xl->lock != 0;
147}
148
149static int xen_spin_is_contended(struct arch_spinlock *lock)
150{
151 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
152
153 /* Not strictly true; this is only the count of contended
154 lock-takers entering the slow path. */
155 return xl->spinners != 0;
156}
157
158static int xen_spin_trylock(struct arch_spinlock *lock)
159{
160 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
161 u8 old = 1;
162
163 asm("xchgb %b0,%1"
164 : "+q" (old), "+m" (xl->lock) : : "memory");
165
166 return old == 0;
167}
168
169static DEFINE_PER_CPU(char *, irq_name);
170static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; 121static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
171static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); 122static DEFINE_PER_CPU(char *, irq_name);
172 123static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
173/* 124static cpumask_t waiting_cpus;
174 * Mark a cpu as interested in a lock. Returns the CPU's previous
175 * lock of interest, in case we got preempted by an interrupt.
176 */
177static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl)
178{
179 struct xen_spinlock *prev;
180
181 prev = __this_cpu_read(lock_spinners);
182 __this_cpu_write(lock_spinners, xl);
183
184 wmb(); /* set lock of interest before count */
185
186 inc_spinners(xl);
187
188 return prev;
189}
190
191/*
192 * Mark a cpu as no longer interested in a lock. Restores previous
193 * lock of interest (NULL for none).
194 */
195static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev)
196{
197 dec_spinners(xl);
198 wmb(); /* decrement count before restoring lock */
199 __this_cpu_write(lock_spinners, prev);
200}
201 125
202static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable) 126static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
203{ 127{
204 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
205 struct xen_spinlock *prev;
206 int irq = __this_cpu_read(lock_kicker_irq); 128 int irq = __this_cpu_read(lock_kicker_irq);
207 int ret; 129 struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting);
130 int cpu = smp_processor_id();
208 u64 start; 131 u64 start;
132 unsigned long flags;
209 133
210 /* If kicker interrupts not initialized yet, just spin */ 134 /* If kicker interrupts not initialized yet, just spin */
211 if (irq == -1) 135 if (irq == -1)
212 return 0; 136 return;
213 137
214 start = spin_time_start(); 138 start = spin_time_start();
215 139
216 /* announce we're spinning */ 140 /*
217 prev = spinning_lock(xl); 141 * Make sure an interrupt handler can't upset things in a
142 * partially setup state.
143 */
144 local_irq_save(flags);
145 /*
146 * We don't really care if we're overwriting some other
147 * (lock,want) pair, as that would mean that we're currently
148 * in an interrupt context, and the outer context had
149 * interrupts enabled. That has already kicked the VCPU out
150 * of xen_poll_irq(), so it will just return spuriously and
151 * retry with newly setup (lock,want).
152 *
153 * The ordering protocol on this is that the "lock" pointer
154 * may only be set non-NULL if the "want" ticket is correct.
155 * If we're updating "want", we must first clear "lock".
156 */
157 w->lock = NULL;
158 smp_wmb();
159 w->want = want;
160 smp_wmb();
161 w->lock = lock;
218 162
219 ADD_STATS(taken_slow, 1); 163 /* This uses set_bit, which atomic and therefore a barrier */
220 ADD_STATS(taken_slow_nested, prev != NULL); 164 cpumask_set_cpu(cpu, &waiting_cpus);
165 add_stats(TAKEN_SLOW, 1);
221 166
222 do { 167 /* clear pending */
223 unsigned long flags; 168 xen_clear_irq_pending(irq);
224 169
225 /* clear pending */ 170 /* Only check lock once pending cleared */
226 xen_clear_irq_pending(irq); 171 barrier();
227 172
228 /* check again make sure it didn't become free while 173 /*
229 we weren't looking */ 174 * Mark entry to slowpath before doing the pickup test to make
230 ret = xen_spin_trylock(lock); 175 * sure we don't deadlock with an unlocker.
231 if (ret) { 176 */
232 ADD_STATS(taken_slow_pickup, 1); 177 __ticket_enter_slowpath(lock);
233 178
234 /* 179 /*
235 * If we interrupted another spinlock while it 180 * check again make sure it didn't become free while
236 * was blocking, make sure it doesn't block 181 * we weren't looking
237 * without rechecking the lock. 182 */
238 */ 183 if (ACCESS_ONCE(lock->tickets.head) == want) {
239 if (prev != NULL) 184 add_stats(TAKEN_SLOW_PICKUP, 1);
240 xen_set_irq_pending(irq); 185 goto out;
241 goto out; 186 }
242 }
243 187
244 flags = arch_local_save_flags(); 188 /* Allow interrupts while blocked */
245 if (irq_enable) { 189 local_irq_restore(flags);
246 ADD_STATS(taken_slow_irqenable, 1);
247 raw_local_irq_enable();
248 }
249 190
250 /* 191 /*
251 * Block until irq becomes pending. If we're 192 * If an interrupt happens here, it will leave the wakeup irq
252 * interrupted at this point (after the trylock but 193 * pending, which will cause xen_poll_irq() to return
253 * before entering the block), then the nested lock 194 * immediately.
254 * handler guarantees that the irq will be left 195 */
255 * pending if there's any chance the lock became free;
256 * xen_poll_irq() returns immediately if the irq is
257 * pending.
258 */
259 xen_poll_irq(irq);
260 196
261 raw_local_irq_restore(flags); 197 /* Block until irq becomes pending (or perhaps a spurious wakeup) */
198 xen_poll_irq(irq);
199 add_stats(TAKEN_SLOW_SPURIOUS, !xen_test_irq_pending(irq));
262 200
263 ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); 201 local_irq_save(flags);
264 } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
265 202
266 kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); 203 kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
267
268out: 204out:
269 unspinning_lock(xl, prev); 205 cpumask_clear_cpu(cpu, &waiting_cpus);
270 spin_time_accum_blocked(start); 206 w->lock = NULL;
271
272 return ret;
273}
274
275static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable)
276{
277 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
278 unsigned timeout;
279 u8 oldval;
280 u64 start_spin;
281
282 ADD_STATS(taken, 1);
283
284 start_spin = spin_time_start();
285
286 do {
287 u64 start_spin_fast = spin_time_start();
288
289 timeout = TIMEOUT;
290
291 asm("1: xchgb %1,%0\n"
292 " testb %1,%1\n"
293 " jz 3f\n"
294 "2: rep;nop\n"
295 " cmpb $0,%0\n"
296 " je 1b\n"
297 " dec %2\n"
298 " jnz 2b\n"
299 "3:\n"
300 : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
301 : "1" (1)
302 : "memory");
303 207
304 spin_time_accum_spinning(start_spin_fast); 208 local_irq_restore(flags);
305 209
306 } while (unlikely(oldval != 0 && 210 spin_time_accum_blocked(start);
307 (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable))));
308
309 spin_time_accum_total(start_spin);
310}
311
312static void xen_spin_lock(struct arch_spinlock *lock)
313{
314 __xen_spin_lock(lock, false);
315}
316
317static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags)
318{
319 __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags));
320} 211}
212PV_CALLEE_SAVE_REGS_THUNK(xen_lock_spinning);
321 213
322static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) 214static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
323{ 215{
324 int cpu; 216 int cpu;
325 217
326 ADD_STATS(released_slow, 1); 218 add_stats(RELEASED_SLOW, 1);
219
220 for_each_cpu(cpu, &waiting_cpus) {
221 const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu);
327 222
328 for_each_online_cpu(cpu) { 223 /* Make sure we read lock before want */
329 /* XXX should mix up next cpu selection */ 224 if (ACCESS_ONCE(w->lock) == lock &&
330 if (per_cpu(lock_spinners, cpu) == xl) { 225 ACCESS_ONCE(w->want) == next) {
331 ADD_STATS(released_slow_kicked, 1); 226 add_stats(RELEASED_SLOW_KICKED, 1);
332 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); 227 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
228 break;
333 } 229 }
334 } 230 }
335} 231}
336 232
337static void xen_spin_unlock(struct arch_spinlock *lock)
338{
339 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
340
341 ADD_STATS(released, 1);
342
343 smp_wmb(); /* make sure no writes get moved after unlock */
344 xl->lock = 0; /* release lock */
345
346 /*
347 * Make sure unlock happens before checking for waiting
348 * spinners. We need a strong barrier to enforce the
349 * write-read ordering to different memory locations, as the
350 * CPU makes no implied guarantees about their ordering.
351 */
352 mb();
353
354 if (unlikely(xl->spinners))
355 xen_spin_unlock_slow(xl);
356}
357
358static irqreturn_t dummy_handler(int irq, void *dev_id) 233static irqreturn_t dummy_handler(int irq, void *dev_id)
359{ 234{
360 BUG(); 235 BUG();
@@ -408,6 +283,8 @@ void xen_uninit_lock_cpu(int cpu)
408 per_cpu(irq_name, cpu) = NULL; 283 per_cpu(irq_name, cpu) = NULL;
409} 284}
410 285
286static bool xen_pvspin __initdata = true;
287
411void __init xen_init_spinlocks(void) 288void __init xen_init_spinlocks(void)
412{ 289{
413 /* 290 /*
@@ -417,15 +294,23 @@ void __init xen_init_spinlocks(void)
417 if (xen_hvm_domain()) 294 if (xen_hvm_domain())
418 return; 295 return;
419 296
420 BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t)); 297 if (!xen_pvspin) {
298 printk(KERN_DEBUG "xen: PV spinlocks disabled\n");
299 return;
300 }
421 301
422 pv_lock_ops.spin_is_locked = xen_spin_is_locked; 302 static_key_slow_inc(&paravirt_ticketlocks_enabled);
423 pv_lock_ops.spin_is_contended = xen_spin_is_contended; 303
424 pv_lock_ops.spin_lock = xen_spin_lock; 304 pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
425 pv_lock_ops.spin_lock_flags = xen_spin_lock_flags; 305 pv_lock_ops.unlock_kick = xen_unlock_kick;
426 pv_lock_ops.spin_trylock = xen_spin_trylock; 306}
427 pv_lock_ops.spin_unlock = xen_spin_unlock; 307
308static __init int xen_parse_nopvspin(char *arg)
309{
310 xen_pvspin = false;
311 return 0;
428} 312}
313early_param("xen_nopvspin", xen_parse_nopvspin);
429 314
430#ifdef CONFIG_XEN_DEBUG_FS 315#ifdef CONFIG_XEN_DEBUG_FS
431 316
@@ -442,37 +327,21 @@ static int __init xen_spinlock_debugfs(void)
442 327
443 debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); 328 debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
444 329
445 debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout);
446
447 debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken);
448 debugfs_create_u32("taken_slow", 0444, d_spin_debug, 330 debugfs_create_u32("taken_slow", 0444, d_spin_debug,
449 &spinlock_stats.taken_slow); 331 &spinlock_stats.contention_stats[TAKEN_SLOW]);
450 debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug,
451 &spinlock_stats.taken_slow_nested);
452 debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, 332 debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
453 &spinlock_stats.taken_slow_pickup); 333 &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
454 debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, 334 debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug,
455 &spinlock_stats.taken_slow_spurious); 335 &spinlock_stats.contention_stats[TAKEN_SLOW_SPURIOUS]);
456 debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug,
457 &spinlock_stats.taken_slow_irqenable);
458 336
459 debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released);
460 debugfs_create_u32("released_slow", 0444, d_spin_debug, 337 debugfs_create_u32("released_slow", 0444, d_spin_debug,
461 &spinlock_stats.released_slow); 338 &spinlock_stats.contention_stats[RELEASED_SLOW]);
462 debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, 339 debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
463 &spinlock_stats.released_slow_kicked); 340 &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
464 341
465 debugfs_create_u64("time_spinning", 0444, d_spin_debug,
466 &spinlock_stats.time_spinning);
467 debugfs_create_u64("time_blocked", 0444, d_spin_debug, 342 debugfs_create_u64("time_blocked", 0444, d_spin_debug,
468 &spinlock_stats.time_blocked); 343 &spinlock_stats.time_blocked);
469 debugfs_create_u64("time_total", 0444, d_spin_debug,
470 &spinlock_stats.time_total);
471 344
472 debugfs_create_u32_array("histo_total", 0444, d_spin_debug,
473 spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1);
474 debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug,
475 spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1);
476 debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, 345 debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
477 spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); 346 spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
478 347
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 0976fc46d1e0..a5079072da66 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -48,7 +48,6 @@
48 48
49#include <linux/types.h> 49#include <linux/types.h>
50#include <linux/compiler.h> 50#include <linux/compiler.h>
51#include <linux/workqueue.h>
52 51
53#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) 52#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
54 53
@@ -61,12 +60,6 @@ struct static_key {
61#endif 60#endif
62}; 61};
63 62
64struct static_key_deferred {
65 struct static_key key;
66 unsigned long timeout;
67 struct delayed_work work;
68};
69
70# include <asm/jump_label.h> 63# include <asm/jump_label.h>
71# define HAVE_JUMP_LABEL 64# define HAVE_JUMP_LABEL
72#endif /* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */ 65#endif /* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */
@@ -78,6 +71,7 @@ enum jump_label_type {
78 71
79struct module; 72struct module;
80 73
74#include <linux/atomic.h>
81#ifdef HAVE_JUMP_LABEL 75#ifdef HAVE_JUMP_LABEL
82 76
83#define JUMP_LABEL_TRUE_BRANCH 1UL 77#define JUMP_LABEL_TRUE_BRANCH 1UL
@@ -119,10 +113,7 @@ extern void arch_jump_label_transform_static(struct jump_entry *entry,
119extern int jump_label_text_reserved(void *start, void *end); 113extern int jump_label_text_reserved(void *start, void *end);
120extern void static_key_slow_inc(struct static_key *key); 114extern void static_key_slow_inc(struct static_key *key);
121extern void static_key_slow_dec(struct static_key *key); 115extern void static_key_slow_dec(struct static_key *key);
122extern void static_key_slow_dec_deferred(struct static_key_deferred *key);
123extern void jump_label_apply_nops(struct module *mod); 116extern void jump_label_apply_nops(struct module *mod);
124extern void
125jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl);
126 117
127#define STATIC_KEY_INIT_TRUE ((struct static_key) \ 118#define STATIC_KEY_INIT_TRUE ((struct static_key) \
128 { .enabled = ATOMIC_INIT(1), .entries = (void *)1 }) 119 { .enabled = ATOMIC_INIT(1), .entries = (void *)1 })
@@ -131,8 +122,6 @@ jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl);
131 122
132#else /* !HAVE_JUMP_LABEL */ 123#else /* !HAVE_JUMP_LABEL */
133 124
134#include <linux/atomic.h>
135
136struct static_key { 125struct static_key {
137 atomic_t enabled; 126 atomic_t enabled;
138}; 127};
@@ -141,10 +130,6 @@ static __always_inline void jump_label_init(void)
141{ 130{
142} 131}
143 132
144struct static_key_deferred {
145 struct static_key key;
146};
147
148static __always_inline bool static_key_false(struct static_key *key) 133static __always_inline bool static_key_false(struct static_key *key)
149{ 134{
150 if (unlikely(atomic_read(&key->enabled)) > 0) 135 if (unlikely(atomic_read(&key->enabled)) > 0)
@@ -169,11 +154,6 @@ static inline void static_key_slow_dec(struct static_key *key)
169 atomic_dec(&key->enabled); 154 atomic_dec(&key->enabled);
170} 155}
171 156
172static inline void static_key_slow_dec_deferred(struct static_key_deferred *key)
173{
174 static_key_slow_dec(&key->key);
175}
176
177static inline int jump_label_text_reserved(void *start, void *end) 157static inline int jump_label_text_reserved(void *start, void *end)
178{ 158{
179 return 0; 159 return 0;
@@ -187,12 +167,6 @@ static inline int jump_label_apply_nops(struct module *mod)
187 return 0; 167 return 0;
188} 168}
189 169
190static inline void
191jump_label_rate_limit(struct static_key_deferred *key,
192 unsigned long rl)
193{
194}
195
196#define STATIC_KEY_INIT_TRUE ((struct static_key) \ 170#define STATIC_KEY_INIT_TRUE ((struct static_key) \
197 { .enabled = ATOMIC_INIT(1) }) 171 { .enabled = ATOMIC_INIT(1) })
198#define STATIC_KEY_INIT_FALSE ((struct static_key) \ 172#define STATIC_KEY_INIT_FALSE ((struct static_key) \
diff --git a/include/linux/jump_label_ratelimit.h b/include/linux/jump_label_ratelimit.h
new file mode 100644
index 000000000000..113788389b3d
--- /dev/null
+++ b/include/linux/jump_label_ratelimit.h
@@ -0,0 +1,34 @@
1#ifndef _LINUX_JUMP_LABEL_RATELIMIT_H
2#define _LINUX_JUMP_LABEL_RATELIMIT_H
3
4#include <linux/jump_label.h>
5#include <linux/workqueue.h>
6
7#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
8struct static_key_deferred {
9 struct static_key key;
10 unsigned long timeout;
11 struct delayed_work work;
12};
13#endif
14
15#ifdef HAVE_JUMP_LABEL
16extern void static_key_slow_dec_deferred(struct static_key_deferred *key);
17extern void
18jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl);
19
20#else /* !HAVE_JUMP_LABEL */
21struct static_key_deferred {
22 struct static_key key;
23};
24static inline void static_key_slow_dec_deferred(struct static_key_deferred *key)
25{
26 static_key_slow_dec(&key->key);
27}
28static inline void
29jump_label_rate_limit(struct static_key_deferred *key,
30 unsigned long rl)
31{
32}
33#endif /* HAVE_JUMP_LABEL */
34#endif /* _LINUX_JUMP_LABEL_RATELIMIT_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4019d82c3d03..866e85c5eb94 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -48,6 +48,7 @@ struct perf_guest_info_callbacks {
48#include <linux/cpu.h> 48#include <linux/cpu.h>
49#include <linux/irq_work.h> 49#include <linux/irq_work.h>
50#include <linux/static_key.h> 50#include <linux/static_key.h>
51#include <linux/jump_label_ratelimit.h>
51#include <linux/atomic.h> 52#include <linux/atomic.h>
52#include <linux/sysfs.h> 53#include <linux/sysfs.h>
53#include <linux/perf_regs.h> 54#include <linux/perf_regs.h>
diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
index cea2c5c72d26..2841f86eae0b 100644
--- a/include/uapi/linux/kvm_para.h
+++ b/include/uapi/linux/kvm_para.h
@@ -19,6 +19,7 @@
19#define KVM_HC_MMU_OP 2 19#define KVM_HC_MMU_OP 2
20#define KVM_HC_FEATURES 3 20#define KVM_HC_FEATURES 3
21#define KVM_HC_PPC_MAP_MAGIC_PAGE 4 21#define KVM_HC_PPC_MAP_MAGIC_PAGE 4
22#define KVM_HC_KICK_CPU 5
22 23
23/* 24/*
24 * hypercalls use architecture specific 25 * hypercalls use architecture specific
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 60f48fa0fd0d..297a9247a3b3 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -13,6 +13,7 @@
13#include <linux/sort.h> 13#include <linux/sort.h>
14#include <linux/err.h> 14#include <linux/err.h>
15#include <linux/static_key.h> 15#include <linux/static_key.h>
16#include <linux/jump_label_ratelimit.h>
16 17
17#ifdef HAVE_JUMP_LABEL 18#ifdef HAVE_JUMP_LABEL
18 19