diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-04 14:55:10 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-04 14:55:10 -0400 |
commit | 816434ec4a674fcdb3c2221a6dffdc8f34020550 (patch) | |
tree | 6b8a319171270b20bf1b2e1c98d333f47988553a /arch/x86 | |
parent | f357a82048ff1e5645861475b014570e11ad1911 (diff) | |
parent | 36bd621337c91a1ecda588e5bbbae8dd9698bae7 (diff) |
Merge branch 'x86-spinlocks-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 spinlock changes from Ingo Molnar:
"The biggest change here are paravirtualized ticket spinlocks (PV
spinlocks), which bring a nice speedup on various benchmarks.
The KVM host side will come to you via the KVM tree"
* 'x86-spinlocks-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/kvm/guest: Fix sparse warning: "symbol 'klock_waiting' was not declared as static"
kvm: Paravirtual ticketlocks support for linux guests running on KVM hypervisor
kvm guest: Add configuration support to enable debug information for KVM Guests
kvm uapi: Add KICK_CPU and PV_UNHALT definition to uapi
xen, pvticketlock: Allow interrupts to be enabled while blocking
x86, ticketlock: Add slowpath logic
jump_label: Split jumplabel ratelimit
x86, pvticketlock: When paravirtualizing ticket locks, increment by 2
x86, pvticketlock: Use callee-save for lock_spinning
xen, pvticketlocks: Add xen_nopvspin parameter to disable xen pv ticketlocks
xen, pvticketlock: Xen implementation for PV ticket locks
xen: Defer spinlock setup until boot CPU setup
x86, ticketlock: Collapse a layer of functions
x86, ticketlock: Don't inline _spin_unlock when using paravirt spinlocks
x86, spinlock: Replace pv spinlocks with pv ticketlocks
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig | 10 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_para.h | 14 | ||||
-rw-r--r-- | arch/x86/include/asm/paravirt.h | 32 | ||||
-rw-r--r-- | arch/x86/include/asm/paravirt_types.h | 14 | ||||
-rw-r--r-- | arch/x86/include/asm/spinlock.h | 128 | ||||
-rw-r--r-- | arch/x86/include/asm/spinlock_types.h | 16 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/kvm_para.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/kvm.c | 262 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt-spinlocks.c | 18 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/spinlock.c | 387 |
11 files changed, 531 insertions, 353 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 30322b0427d1..5c0ed72c02a2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -632,6 +632,7 @@ config PARAVIRT_DEBUG | |||
632 | config PARAVIRT_SPINLOCKS | 632 | config PARAVIRT_SPINLOCKS |
633 | bool "Paravirtualization layer for spinlocks" | 633 | bool "Paravirtualization layer for spinlocks" |
634 | depends on PARAVIRT && SMP | 634 | depends on PARAVIRT && SMP |
635 | select UNINLINE_SPIN_UNLOCK | ||
635 | ---help--- | 636 | ---help--- |
636 | Paravirtualized spinlocks allow a pvops backend to replace the | 637 | Paravirtualized spinlocks allow a pvops backend to replace the |
637 | spinlock implementation with something virtualization-friendly | 638 | spinlock implementation with something virtualization-friendly |
@@ -656,6 +657,15 @@ config KVM_GUEST | |||
656 | underlying device model, the host provides the guest with | 657 | underlying device model, the host provides the guest with |
657 | timing infrastructure such as time of day, and system time | 658 | timing infrastructure such as time of day, and system time |
658 | 659 | ||
660 | config KVM_DEBUG_FS | ||
661 | bool "Enable debug information for KVM Guests in debugfs" | ||
662 | depends on KVM_GUEST && DEBUG_FS | ||
663 | default n | ||
664 | ---help--- | ||
665 | This option enables collection of various statistics for KVM guest. | ||
666 | Statistics are displayed in debugfs filesystem. Enabling this option | ||
667 | may incur significant overhead. | ||
668 | |||
659 | source "arch/x86/lguest/Kconfig" | 669 | source "arch/x86/lguest/Kconfig" |
660 | 670 | ||
661 | config PARAVIRT_TIME_ACCOUNTING | 671 | config PARAVIRT_TIME_ACCOUNTING |
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 0644129a5333..1df115909758 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
@@ -112,10 +112,20 @@ void kvm_async_pf_task_wait(u32 token); | |||
112 | void kvm_async_pf_task_wake(u32 token); | 112 | void kvm_async_pf_task_wake(u32 token); |
113 | u32 kvm_read_and_reset_pf_reason(void); | 113 | u32 kvm_read_and_reset_pf_reason(void); |
114 | extern void kvm_disable_steal_time(void); | 114 | extern void kvm_disable_steal_time(void); |
115 | #else | 115 | |
116 | #define kvm_guest_init() do { } while (0) | 116 | #ifdef CONFIG_PARAVIRT_SPINLOCKS |
117 | void __init kvm_spinlock_init(void); | ||
118 | #else /* !CONFIG_PARAVIRT_SPINLOCKS */ | ||
119 | static inline void kvm_spinlock_init(void) | ||
120 | { | ||
121 | } | ||
122 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | ||
123 | |||
124 | #else /* CONFIG_KVM_GUEST */ | ||
125 | #define kvm_guest_init() do {} while (0) | ||
117 | #define kvm_async_pf_task_wait(T) do {} while(0) | 126 | #define kvm_async_pf_task_wait(T) do {} while(0) |
118 | #define kvm_async_pf_task_wake(T) do {} while(0) | 127 | #define kvm_async_pf_task_wake(T) do {} while(0) |
128 | |||
119 | static inline u32 kvm_read_and_reset_pf_reason(void) | 129 | static inline u32 kvm_read_and_reset_pf_reason(void) |
120 | { | 130 | { |
121 | return 0; | 131 | return 0; |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index cfdc9ee4c900..401f350ef71b 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -712,36 +712,16 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, | |||
712 | 712 | ||
713 | #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) | 713 | #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) |
714 | 714 | ||
715 | static inline int arch_spin_is_locked(struct arch_spinlock *lock) | 715 | static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock, |
716 | __ticket_t ticket) | ||
716 | { | 717 | { |
717 | return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock); | 718 | PVOP_VCALLEE2(pv_lock_ops.lock_spinning, lock, ticket); |
718 | } | 719 | } |
719 | 720 | ||
720 | static inline int arch_spin_is_contended(struct arch_spinlock *lock) | 721 | static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock, |
722 | __ticket_t ticket) | ||
721 | { | 723 | { |
722 | return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); | 724 | PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket); |
723 | } | ||
724 | #define arch_spin_is_contended arch_spin_is_contended | ||
725 | |||
726 | static __always_inline void arch_spin_lock(struct arch_spinlock *lock) | ||
727 | { | ||
728 | PVOP_VCALL1(pv_lock_ops.spin_lock, lock); | ||
729 | } | ||
730 | |||
731 | static __always_inline void arch_spin_lock_flags(struct arch_spinlock *lock, | ||
732 | unsigned long flags) | ||
733 | { | ||
734 | PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags); | ||
735 | } | ||
736 | |||
737 | static __always_inline int arch_spin_trylock(struct arch_spinlock *lock) | ||
738 | { | ||
739 | return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock); | ||
740 | } | ||
741 | |||
742 | static __always_inline void arch_spin_unlock(struct arch_spinlock *lock) | ||
743 | { | ||
744 | PVOP_VCALL1(pv_lock_ops.spin_unlock, lock); | ||
745 | } | 725 | } |
746 | 726 | ||
747 | #endif | 727 | #endif |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 0617ff241e8f..aab8f671b523 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -327,13 +327,15 @@ struct pv_mmu_ops { | |||
327 | }; | 327 | }; |
328 | 328 | ||
329 | struct arch_spinlock; | 329 | struct arch_spinlock; |
330 | #ifdef CONFIG_SMP | ||
331 | #include <asm/spinlock_types.h> | ||
332 | #else | ||
333 | typedef u16 __ticket_t; | ||
334 | #endif | ||
335 | |||
330 | struct pv_lock_ops { | 336 | struct pv_lock_ops { |
331 | int (*spin_is_locked)(struct arch_spinlock *lock); | 337 | struct paravirt_callee_save lock_spinning; |
332 | int (*spin_is_contended)(struct arch_spinlock *lock); | 338 | void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket); |
333 | void (*spin_lock)(struct arch_spinlock *lock); | ||
334 | void (*spin_lock_flags)(struct arch_spinlock *lock, unsigned long flags); | ||
335 | int (*spin_trylock)(struct arch_spinlock *lock); | ||
336 | void (*spin_unlock)(struct arch_spinlock *lock); | ||
337 | }; | 339 | }; |
338 | 340 | ||
339 | /* This contains all the paravirt structures: we get a convenient | 341 | /* This contains all the paravirt structures: we get a convenient |
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index e0e668422c75..bf156ded74b5 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h | |||
@@ -1,11 +1,14 @@ | |||
1 | #ifndef _ASM_X86_SPINLOCK_H | 1 | #ifndef _ASM_X86_SPINLOCK_H |
2 | #define _ASM_X86_SPINLOCK_H | 2 | #define _ASM_X86_SPINLOCK_H |
3 | 3 | ||
4 | #include <linux/jump_label.h> | ||
4 | #include <linux/atomic.h> | 5 | #include <linux/atomic.h> |
5 | #include <asm/page.h> | 6 | #include <asm/page.h> |
6 | #include <asm/processor.h> | 7 | #include <asm/processor.h> |
7 | #include <linux/compiler.h> | 8 | #include <linux/compiler.h> |
8 | #include <asm/paravirt.h> | 9 | #include <asm/paravirt.h> |
10 | #include <asm/bitops.h> | ||
11 | |||
9 | /* | 12 | /* |
10 | * Your basic SMP spinlocks, allowing only a single CPU anywhere | 13 | * Your basic SMP spinlocks, allowing only a single CPU anywhere |
11 | * | 14 | * |
@@ -34,6 +37,31 @@ | |||
34 | # define UNLOCK_LOCK_PREFIX | 37 | # define UNLOCK_LOCK_PREFIX |
35 | #endif | 38 | #endif |
36 | 39 | ||
40 | /* How long a lock should spin before we consider blocking */ | ||
41 | #define SPIN_THRESHOLD (1 << 15) | ||
42 | |||
43 | extern struct static_key paravirt_ticketlocks_enabled; | ||
44 | static __always_inline bool static_key_false(struct static_key *key); | ||
45 | |||
46 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | ||
47 | |||
48 | static inline void __ticket_enter_slowpath(arch_spinlock_t *lock) | ||
49 | { | ||
50 | set_bit(0, (volatile unsigned long *)&lock->tickets.tail); | ||
51 | } | ||
52 | |||
53 | #else /* !CONFIG_PARAVIRT_SPINLOCKS */ | ||
54 | static __always_inline void __ticket_lock_spinning(arch_spinlock_t *lock, | ||
55 | __ticket_t ticket) | ||
56 | { | ||
57 | } | ||
58 | static inline void __ticket_unlock_kick(arch_spinlock_t *lock, | ||
59 | __ticket_t ticket) | ||
60 | { | ||
61 | } | ||
62 | |||
63 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | ||
64 | |||
37 | static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) | 65 | static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) |
38 | { | 66 | { |
39 | return lock.tickets.head == lock.tickets.tail; | 67 | return lock.tickets.head == lock.tickets.tail; |
@@ -52,81 +80,101 @@ static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) | |||
52 | * in the high part, because a wide xadd increment of the low part would carry | 80 | * in the high part, because a wide xadd increment of the low part would carry |
53 | * up and contaminate the high part. | 81 | * up and contaminate the high part. |
54 | */ | 82 | */ |
55 | static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) | 83 | static __always_inline void arch_spin_lock(arch_spinlock_t *lock) |
56 | { | 84 | { |
57 | register struct __raw_tickets inc = { .tail = 1 }; | 85 | register struct __raw_tickets inc = { .tail = TICKET_LOCK_INC }; |
58 | 86 | ||
59 | inc = xadd(&lock->tickets, inc); | 87 | inc = xadd(&lock->tickets, inc); |
88 | if (likely(inc.head == inc.tail)) | ||
89 | goto out; | ||
60 | 90 | ||
91 | inc.tail &= ~TICKET_SLOWPATH_FLAG; | ||
61 | for (;;) { | 92 | for (;;) { |
62 | if (inc.head == inc.tail) | 93 | unsigned count = SPIN_THRESHOLD; |
63 | break; | 94 | |
64 | cpu_relax(); | 95 | do { |
65 | inc.head = ACCESS_ONCE(lock->tickets.head); | 96 | if (ACCESS_ONCE(lock->tickets.head) == inc.tail) |
97 | goto out; | ||
98 | cpu_relax(); | ||
99 | } while (--count); | ||
100 | __ticket_lock_spinning(lock, inc.tail); | ||
66 | } | 101 | } |
67 | barrier(); /* make sure nothing creeps before the lock is taken */ | 102 | out: barrier(); /* make sure nothing creeps before the lock is taken */ |
68 | } | 103 | } |
69 | 104 | ||
70 | static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) | 105 | static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) |
71 | { | 106 | { |
72 | arch_spinlock_t old, new; | 107 | arch_spinlock_t old, new; |
73 | 108 | ||
74 | old.tickets = ACCESS_ONCE(lock->tickets); | 109 | old.tickets = ACCESS_ONCE(lock->tickets); |
75 | if (old.tickets.head != old.tickets.tail) | 110 | if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG)) |
76 | return 0; | 111 | return 0; |
77 | 112 | ||
78 | new.head_tail = old.head_tail + (1 << TICKET_SHIFT); | 113 | new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT); |
79 | 114 | ||
80 | /* cmpxchg is a full barrier, so nothing can move before it */ | 115 | /* cmpxchg is a full barrier, so nothing can move before it */ |
81 | return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; | 116 | return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; |
82 | } | 117 | } |
83 | 118 | ||
84 | static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) | 119 | static inline void __ticket_unlock_slowpath(arch_spinlock_t *lock, |
120 | arch_spinlock_t old) | ||
85 | { | 121 | { |
86 | __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX); | 122 | arch_spinlock_t new; |
123 | |||
124 | BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS); | ||
125 | |||
126 | /* Perform the unlock on the "before" copy */ | ||
127 | old.tickets.head += TICKET_LOCK_INC; | ||
128 | |||
129 | /* Clear the slowpath flag */ | ||
130 | new.head_tail = old.head_tail & ~(TICKET_SLOWPATH_FLAG << TICKET_SHIFT); | ||
131 | |||
132 | /* | ||
133 | * If the lock is uncontended, clear the flag - use cmpxchg in | ||
134 | * case it changes behind our back though. | ||
135 | */ | ||
136 | if (new.tickets.head != new.tickets.tail || | ||
137 | cmpxchg(&lock->head_tail, old.head_tail, | ||
138 | new.head_tail) != old.head_tail) { | ||
139 | /* | ||
140 | * Lock still has someone queued for it, so wake up an | ||
141 | * appropriate waiter. | ||
142 | */ | ||
143 | __ticket_unlock_kick(lock, old.tickets.head); | ||
144 | } | ||
87 | } | 145 | } |
88 | 146 | ||
89 | static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) | 147 | static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) |
90 | { | 148 | { |
91 | struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); | 149 | if (TICKET_SLOWPATH_FLAG && |
150 | static_key_false(¶virt_ticketlocks_enabled)) { | ||
151 | arch_spinlock_t prev; | ||
92 | 152 | ||
93 | return tmp.tail != tmp.head; | 153 | prev = *lock; |
94 | } | 154 | add_smp(&lock->tickets.head, TICKET_LOCK_INC); |
95 | 155 | ||
96 | static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) | 156 | /* add_smp() is a full mb() */ |
97 | { | ||
98 | struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); | ||
99 | 157 | ||
100 | return (__ticket_t)(tmp.tail - tmp.head) > 1; | 158 | if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG)) |
159 | __ticket_unlock_slowpath(lock, prev); | ||
160 | } else | ||
161 | __add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX); | ||
101 | } | 162 | } |
102 | 163 | ||
103 | #ifndef CONFIG_PARAVIRT_SPINLOCKS | ||
104 | |||
105 | static inline int arch_spin_is_locked(arch_spinlock_t *lock) | 164 | static inline int arch_spin_is_locked(arch_spinlock_t *lock) |
106 | { | 165 | { |
107 | return __ticket_spin_is_locked(lock); | 166 | struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); |
108 | } | ||
109 | |||
110 | static inline int arch_spin_is_contended(arch_spinlock_t *lock) | ||
111 | { | ||
112 | return __ticket_spin_is_contended(lock); | ||
113 | } | ||
114 | #define arch_spin_is_contended arch_spin_is_contended | ||
115 | 167 | ||
116 | static __always_inline void arch_spin_lock(arch_spinlock_t *lock) | 168 | return tmp.tail != tmp.head; |
117 | { | ||
118 | __ticket_spin_lock(lock); | ||
119 | } | 169 | } |
120 | 170 | ||
121 | static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) | 171 | static inline int arch_spin_is_contended(arch_spinlock_t *lock) |
122 | { | 172 | { |
123 | return __ticket_spin_trylock(lock); | 173 | struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); |
124 | } | ||
125 | 174 | ||
126 | static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) | 175 | return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC; |
127 | { | ||
128 | __ticket_spin_unlock(lock); | ||
129 | } | 176 | } |
177 | #define arch_spin_is_contended arch_spin_is_contended | ||
130 | 178 | ||
131 | static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, | 179 | static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, |
132 | unsigned long flags) | 180 | unsigned long flags) |
@@ -134,8 +182,6 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, | |||
134 | arch_spin_lock(lock); | 182 | arch_spin_lock(lock); |
135 | } | 183 | } |
136 | 184 | ||
137 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | ||
138 | |||
139 | static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) | 185 | static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) |
140 | { | 186 | { |
141 | while (arch_spin_is_locked(lock)) | 187 | while (arch_spin_is_locked(lock)) |
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index ad0ad07fc006..4f1bea19945b 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h | |||
@@ -1,13 +1,17 @@ | |||
1 | #ifndef _ASM_X86_SPINLOCK_TYPES_H | 1 | #ifndef _ASM_X86_SPINLOCK_TYPES_H |
2 | #define _ASM_X86_SPINLOCK_TYPES_H | 2 | #define _ASM_X86_SPINLOCK_TYPES_H |
3 | 3 | ||
4 | #ifndef __LINUX_SPINLOCK_TYPES_H | ||
5 | # error "please don't include this file directly" | ||
6 | #endif | ||
7 | |||
8 | #include <linux/types.h> | 4 | #include <linux/types.h> |
9 | 5 | ||
10 | #if (CONFIG_NR_CPUS < 256) | 6 | #ifdef CONFIG_PARAVIRT_SPINLOCKS |
7 | #define __TICKET_LOCK_INC 2 | ||
8 | #define TICKET_SLOWPATH_FLAG ((__ticket_t)1) | ||
9 | #else | ||
10 | #define __TICKET_LOCK_INC 1 | ||
11 | #define TICKET_SLOWPATH_FLAG ((__ticket_t)0) | ||
12 | #endif | ||
13 | |||
14 | #if (CONFIG_NR_CPUS < (256 / __TICKET_LOCK_INC)) | ||
11 | typedef u8 __ticket_t; | 15 | typedef u8 __ticket_t; |
12 | typedef u16 __ticketpair_t; | 16 | typedef u16 __ticketpair_t; |
13 | #else | 17 | #else |
@@ -15,6 +19,8 @@ typedef u16 __ticket_t; | |||
15 | typedef u32 __ticketpair_t; | 19 | typedef u32 __ticketpair_t; |
16 | #endif | 20 | #endif |
17 | 21 | ||
22 | #define TICKET_LOCK_INC ((__ticket_t)__TICKET_LOCK_INC) | ||
23 | |||
18 | #define TICKET_SHIFT (sizeof(__ticket_t) * 8) | 24 | #define TICKET_SHIFT (sizeof(__ticket_t) * 8) |
19 | 25 | ||
20 | typedef struct arch_spinlock { | 26 | typedef struct arch_spinlock { |
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 06fdbd987e97..94dc8ca434e0 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h | |||
@@ -23,6 +23,7 @@ | |||
23 | #define KVM_FEATURE_ASYNC_PF 4 | 23 | #define KVM_FEATURE_ASYNC_PF 4 |
24 | #define KVM_FEATURE_STEAL_TIME 5 | 24 | #define KVM_FEATURE_STEAL_TIME 5 |
25 | #define KVM_FEATURE_PV_EOI 6 | 25 | #define KVM_FEATURE_PV_EOI 6 |
26 | #define KVM_FEATURE_PV_UNHALT 7 | ||
26 | 27 | ||
27 | /* The last 8 bits are used to indicate how to interpret the flags field | 28 | /* The last 8 bits are used to indicate how to interpret the flags field |
28 | * in pvclock structure. If no bits are set, all flags are ignored. | 29 | * in pvclock structure. If no bits are set, all flags are ignored. |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 7817afdac301..697b93af02dd 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/sched.h> | 34 | #include <linux/sched.h> |
35 | #include <linux/slab.h> | 35 | #include <linux/slab.h> |
36 | #include <linux/kprobes.h> | 36 | #include <linux/kprobes.h> |
37 | #include <linux/debugfs.h> | ||
37 | #include <asm/timer.h> | 38 | #include <asm/timer.h> |
38 | #include <asm/cpu.h> | 39 | #include <asm/cpu.h> |
39 | #include <asm/traps.h> | 40 | #include <asm/traps.h> |
@@ -419,6 +420,7 @@ static void __init kvm_smp_prepare_boot_cpu(void) | |||
419 | WARN_ON(kvm_register_clock("primary cpu clock")); | 420 | WARN_ON(kvm_register_clock("primary cpu clock")); |
420 | kvm_guest_cpu_init(); | 421 | kvm_guest_cpu_init(); |
421 | native_smp_prepare_boot_cpu(); | 422 | native_smp_prepare_boot_cpu(); |
423 | kvm_spinlock_init(); | ||
422 | } | 424 | } |
423 | 425 | ||
424 | static void kvm_guest_cpu_online(void *dummy) | 426 | static void kvm_guest_cpu_online(void *dummy) |
@@ -521,3 +523,263 @@ static __init int activate_jump_labels(void) | |||
521 | return 0; | 523 | return 0; |
522 | } | 524 | } |
523 | arch_initcall(activate_jump_labels); | 525 | arch_initcall(activate_jump_labels); |
526 | |||
527 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | ||
528 | |||
529 | /* Kick a cpu by its apicid. Used to wake up a halted vcpu */ | ||
530 | static void kvm_kick_cpu(int cpu) | ||
531 | { | ||
532 | int apicid; | ||
533 | unsigned long flags = 0; | ||
534 | |||
535 | apicid = per_cpu(x86_cpu_to_apicid, cpu); | ||
536 | kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid); | ||
537 | } | ||
538 | |||
539 | enum kvm_contention_stat { | ||
540 | TAKEN_SLOW, | ||
541 | TAKEN_SLOW_PICKUP, | ||
542 | RELEASED_SLOW, | ||
543 | RELEASED_SLOW_KICKED, | ||
544 | NR_CONTENTION_STATS | ||
545 | }; | ||
546 | |||
547 | #ifdef CONFIG_KVM_DEBUG_FS | ||
548 | #define HISTO_BUCKETS 30 | ||
549 | |||
550 | static struct kvm_spinlock_stats | ||
551 | { | ||
552 | u32 contention_stats[NR_CONTENTION_STATS]; | ||
553 | u32 histo_spin_blocked[HISTO_BUCKETS+1]; | ||
554 | u64 time_blocked; | ||
555 | } spinlock_stats; | ||
556 | |||
557 | static u8 zero_stats; | ||
558 | |||
559 | static inline void check_zero(void) | ||
560 | { | ||
561 | u8 ret; | ||
562 | u8 old; | ||
563 | |||
564 | old = ACCESS_ONCE(zero_stats); | ||
565 | if (unlikely(old)) { | ||
566 | ret = cmpxchg(&zero_stats, old, 0); | ||
567 | /* This ensures only one fellow resets the stat */ | ||
568 | if (ret == old) | ||
569 | memset(&spinlock_stats, 0, sizeof(spinlock_stats)); | ||
570 | } | ||
571 | } | ||
572 | |||
573 | static inline void add_stats(enum kvm_contention_stat var, u32 val) | ||
574 | { | ||
575 | check_zero(); | ||
576 | spinlock_stats.contention_stats[var] += val; | ||
577 | } | ||
578 | |||
579 | |||
580 | static inline u64 spin_time_start(void) | ||
581 | { | ||
582 | return sched_clock(); | ||
583 | } | ||
584 | |||
585 | static void __spin_time_accum(u64 delta, u32 *array) | ||
586 | { | ||
587 | unsigned index; | ||
588 | |||
589 | index = ilog2(delta); | ||
590 | check_zero(); | ||
591 | |||
592 | if (index < HISTO_BUCKETS) | ||
593 | array[index]++; | ||
594 | else | ||
595 | array[HISTO_BUCKETS]++; | ||
596 | } | ||
597 | |||
598 | static inline void spin_time_accum_blocked(u64 start) | ||
599 | { | ||
600 | u32 delta; | ||
601 | |||
602 | delta = sched_clock() - start; | ||
603 | __spin_time_accum(delta, spinlock_stats.histo_spin_blocked); | ||
604 | spinlock_stats.time_blocked += delta; | ||
605 | } | ||
606 | |||
607 | static struct dentry *d_spin_debug; | ||
608 | static struct dentry *d_kvm_debug; | ||
609 | |||
610 | struct dentry *kvm_init_debugfs(void) | ||
611 | { | ||
612 | d_kvm_debug = debugfs_create_dir("kvm", NULL); | ||
613 | if (!d_kvm_debug) | ||
614 | printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n"); | ||
615 | |||
616 | return d_kvm_debug; | ||
617 | } | ||
618 | |||
619 | static int __init kvm_spinlock_debugfs(void) | ||
620 | { | ||
621 | struct dentry *d_kvm; | ||
622 | |||
623 | d_kvm = kvm_init_debugfs(); | ||
624 | if (d_kvm == NULL) | ||
625 | return -ENOMEM; | ||
626 | |||
627 | d_spin_debug = debugfs_create_dir("spinlocks", d_kvm); | ||
628 | |||
629 | debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); | ||
630 | |||
631 | debugfs_create_u32("taken_slow", 0444, d_spin_debug, | ||
632 | &spinlock_stats.contention_stats[TAKEN_SLOW]); | ||
633 | debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, | ||
634 | &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]); | ||
635 | |||
636 | debugfs_create_u32("released_slow", 0444, d_spin_debug, | ||
637 | &spinlock_stats.contention_stats[RELEASED_SLOW]); | ||
638 | debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, | ||
639 | &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]); | ||
640 | |||
641 | debugfs_create_u64("time_blocked", 0444, d_spin_debug, | ||
642 | &spinlock_stats.time_blocked); | ||
643 | |||
644 | debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, | ||
645 | spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); | ||
646 | |||
647 | return 0; | ||
648 | } | ||
649 | fs_initcall(kvm_spinlock_debugfs); | ||
650 | #else /* !CONFIG_KVM_DEBUG_FS */ | ||
651 | static inline void add_stats(enum kvm_contention_stat var, u32 val) | ||
652 | { | ||
653 | } | ||
654 | |||
655 | static inline u64 spin_time_start(void) | ||
656 | { | ||
657 | return 0; | ||
658 | } | ||
659 | |||
660 | static inline void spin_time_accum_blocked(u64 start) | ||
661 | { | ||
662 | } | ||
663 | #endif /* CONFIG_KVM_DEBUG_FS */ | ||
664 | |||
665 | struct kvm_lock_waiting { | ||
666 | struct arch_spinlock *lock; | ||
667 | __ticket_t want; | ||
668 | }; | ||
669 | |||
670 | /* cpus 'waiting' on a spinlock to become available */ | ||
671 | static cpumask_t waiting_cpus; | ||
672 | |||
673 | /* Track spinlock on which a cpu is waiting */ | ||
674 | static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting); | ||
675 | |||
676 | static void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want) | ||
677 | { | ||
678 | struct kvm_lock_waiting *w; | ||
679 | int cpu; | ||
680 | u64 start; | ||
681 | unsigned long flags; | ||
682 | |||
683 | if (in_nmi()) | ||
684 | return; | ||
685 | |||
686 | w = &__get_cpu_var(klock_waiting); | ||
687 | cpu = smp_processor_id(); | ||
688 | start = spin_time_start(); | ||
689 | |||
690 | /* | ||
691 | * Make sure an interrupt handler can't upset things in a | ||
692 | * partially setup state. | ||
693 | */ | ||
694 | local_irq_save(flags); | ||
695 | |||
696 | /* | ||
697 | * The ordering protocol on this is that the "lock" pointer | ||
698 | * may only be set non-NULL if the "want" ticket is correct. | ||
699 | * If we're updating "want", we must first clear "lock". | ||
700 | */ | ||
701 | w->lock = NULL; | ||
702 | smp_wmb(); | ||
703 | w->want = want; | ||
704 | smp_wmb(); | ||
705 | w->lock = lock; | ||
706 | |||
707 | add_stats(TAKEN_SLOW, 1); | ||
708 | |||
709 | /* | ||
710 | * This uses set_bit, which is atomic but we should not rely on its | ||
711 | * reordering gurantees. So barrier is needed after this call. | ||
712 | */ | ||
713 | cpumask_set_cpu(cpu, &waiting_cpus); | ||
714 | |||
715 | barrier(); | ||
716 | |||
717 | /* | ||
718 | * Mark entry to slowpath before doing the pickup test to make | ||
719 | * sure we don't deadlock with an unlocker. | ||
720 | */ | ||
721 | __ticket_enter_slowpath(lock); | ||
722 | |||
723 | /* | ||
724 | * check again make sure it didn't become free while | ||
725 | * we weren't looking. | ||
726 | */ | ||
727 | if (ACCESS_ONCE(lock->tickets.head) == want) { | ||
728 | add_stats(TAKEN_SLOW_PICKUP, 1); | ||
729 | goto out; | ||
730 | } | ||
731 | |||
732 | /* | ||
733 | * halt until it's our turn and kicked. Note that we do safe halt | ||
734 | * for irq enabled case to avoid hang when lock info is overwritten | ||
735 | * in irq spinlock slowpath and no spurious interrupt occur to save us. | ||
736 | */ | ||
737 | if (arch_irqs_disabled_flags(flags)) | ||
738 | halt(); | ||
739 | else | ||
740 | safe_halt(); | ||
741 | |||
742 | out: | ||
743 | cpumask_clear_cpu(cpu, &waiting_cpus); | ||
744 | w->lock = NULL; | ||
745 | local_irq_restore(flags); | ||
746 | spin_time_accum_blocked(start); | ||
747 | } | ||
748 | PV_CALLEE_SAVE_REGS_THUNK(kvm_lock_spinning); | ||
749 | |||
750 | /* Kick vcpu waiting on @lock->head to reach value @ticket */ | ||
751 | static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket) | ||
752 | { | ||
753 | int cpu; | ||
754 | |||
755 | add_stats(RELEASED_SLOW, 1); | ||
756 | for_each_cpu(cpu, &waiting_cpus) { | ||
757 | const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu); | ||
758 | if (ACCESS_ONCE(w->lock) == lock && | ||
759 | ACCESS_ONCE(w->want) == ticket) { | ||
760 | add_stats(RELEASED_SLOW_KICKED, 1); | ||
761 | kvm_kick_cpu(cpu); | ||
762 | break; | ||
763 | } | ||
764 | } | ||
765 | } | ||
766 | |||
767 | /* | ||
768 | * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present. | ||
769 | */ | ||
770 | void __init kvm_spinlock_init(void) | ||
771 | { | ||
772 | if (!kvm_para_available()) | ||
773 | return; | ||
774 | /* Does host kernel support KVM_FEATURE_PV_UNHALT? */ | ||
775 | if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) | ||
776 | return; | ||
777 | |||
778 | printk(KERN_INFO "KVM setup paravirtual spinlock\n"); | ||
779 | |||
780 | static_key_slow_inc(¶virt_ticketlocks_enabled); | ||
781 | |||
782 | pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning); | ||
783 | pv_lock_ops.unlock_kick = kvm_unlock_kick; | ||
784 | } | ||
785 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | ||
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 676b8c77a976..bbb6c7316341 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c | |||
@@ -4,25 +4,17 @@ | |||
4 | */ | 4 | */ |
5 | #include <linux/spinlock.h> | 5 | #include <linux/spinlock.h> |
6 | #include <linux/module.h> | 6 | #include <linux/module.h> |
7 | #include <linux/jump_label.h> | ||
7 | 8 | ||
8 | #include <asm/paravirt.h> | 9 | #include <asm/paravirt.h> |
9 | 10 | ||
10 | static inline void | ||
11 | default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) | ||
12 | { | ||
13 | arch_spin_lock(lock); | ||
14 | } | ||
15 | |||
16 | struct pv_lock_ops pv_lock_ops = { | 11 | struct pv_lock_ops pv_lock_ops = { |
17 | #ifdef CONFIG_SMP | 12 | #ifdef CONFIG_SMP |
18 | .spin_is_locked = __ticket_spin_is_locked, | 13 | .lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop), |
19 | .spin_is_contended = __ticket_spin_is_contended, | 14 | .unlock_kick = paravirt_nop, |
20 | |||
21 | .spin_lock = __ticket_spin_lock, | ||
22 | .spin_lock_flags = default_spin_lock_flags, | ||
23 | .spin_trylock = __ticket_spin_trylock, | ||
24 | .spin_unlock = __ticket_spin_unlock, | ||
25 | #endif | 15 | #endif |
26 | }; | 16 | }; |
27 | EXPORT_SYMBOL(pv_lock_ops); | 17 | EXPORT_SYMBOL(pv_lock_ops); |
28 | 18 | ||
19 | struct static_key paravirt_ticketlocks_enabled = STATIC_KEY_INIT_FALSE; | ||
20 | EXPORT_SYMBOL(paravirt_ticketlocks_enabled); | ||
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index b81c88e51daa..597655bd72b0 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -279,6 +279,7 @@ static void __init xen_smp_prepare_boot_cpu(void) | |||
279 | 279 | ||
280 | xen_filter_cpu_maps(); | 280 | xen_filter_cpu_maps(); |
281 | xen_setup_vcpu_info_placement(); | 281 | xen_setup_vcpu_info_placement(); |
282 | xen_init_spinlocks(); | ||
282 | } | 283 | } |
283 | 284 | ||
284 | static void __init xen_smp_prepare_cpus(unsigned int max_cpus) | 285 | static void __init xen_smp_prepare_cpus(unsigned int max_cpus) |
@@ -680,7 +681,6 @@ void __init xen_smp_init(void) | |||
680 | { | 681 | { |
681 | smp_ops = xen_smp_ops; | 682 | smp_ops = xen_smp_ops; |
682 | xen_fill_possible_map(); | 683 | xen_fill_possible_map(); |
683 | xen_init_spinlocks(); | ||
684 | } | 684 | } |
685 | 685 | ||
686 | static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) | 686 | static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) |
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index cf3caee356b3..0438b9324a72 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c | |||
@@ -17,45 +17,44 @@ | |||
17 | #include "xen-ops.h" | 17 | #include "xen-ops.h" |
18 | #include "debugfs.h" | 18 | #include "debugfs.h" |
19 | 19 | ||
20 | #ifdef CONFIG_XEN_DEBUG_FS | 20 | enum xen_contention_stat { |
21 | static struct xen_spinlock_stats | 21 | TAKEN_SLOW, |
22 | { | 22 | TAKEN_SLOW_PICKUP, |
23 | u64 taken; | 23 | TAKEN_SLOW_SPURIOUS, |
24 | u32 taken_slow; | 24 | RELEASED_SLOW, |
25 | u32 taken_slow_nested; | 25 | RELEASED_SLOW_KICKED, |
26 | u32 taken_slow_pickup; | 26 | NR_CONTENTION_STATS |
27 | u32 taken_slow_spurious; | 27 | }; |
28 | u32 taken_slow_irqenable; | ||
29 | 28 | ||
30 | u64 released; | ||
31 | u32 released_slow; | ||
32 | u32 released_slow_kicked; | ||
33 | 29 | ||
30 | #ifdef CONFIG_XEN_DEBUG_FS | ||
34 | #define HISTO_BUCKETS 30 | 31 | #define HISTO_BUCKETS 30 |
35 | u32 histo_spin_total[HISTO_BUCKETS+1]; | 32 | static struct xen_spinlock_stats |
36 | u32 histo_spin_spinning[HISTO_BUCKETS+1]; | 33 | { |
34 | u32 contention_stats[NR_CONTENTION_STATS]; | ||
37 | u32 histo_spin_blocked[HISTO_BUCKETS+1]; | 35 | u32 histo_spin_blocked[HISTO_BUCKETS+1]; |
38 | |||
39 | u64 time_total; | ||
40 | u64 time_spinning; | ||
41 | u64 time_blocked; | 36 | u64 time_blocked; |
42 | } spinlock_stats; | 37 | } spinlock_stats; |
43 | 38 | ||
44 | static u8 zero_stats; | 39 | static u8 zero_stats; |
45 | 40 | ||
46 | static unsigned lock_timeout = 1 << 10; | ||
47 | #define TIMEOUT lock_timeout | ||
48 | |||
49 | static inline void check_zero(void) | 41 | static inline void check_zero(void) |
50 | { | 42 | { |
51 | if (unlikely(zero_stats)) { | 43 | u8 ret; |
52 | memset(&spinlock_stats, 0, sizeof(spinlock_stats)); | 44 | u8 old = ACCESS_ONCE(zero_stats); |
53 | zero_stats = 0; | 45 | if (unlikely(old)) { |
46 | ret = cmpxchg(&zero_stats, old, 0); | ||
47 | /* This ensures only one fellow resets the stat */ | ||
48 | if (ret == old) | ||
49 | memset(&spinlock_stats, 0, sizeof(spinlock_stats)); | ||
54 | } | 50 | } |
55 | } | 51 | } |
56 | 52 | ||
57 | #define ADD_STATS(elem, val) \ | 53 | static inline void add_stats(enum xen_contention_stat var, u32 val) |
58 | do { check_zero(); spinlock_stats.elem += (val); } while(0) | 54 | { |
55 | check_zero(); | ||
56 | spinlock_stats.contention_stats[var] += val; | ||
57 | } | ||
59 | 58 | ||
60 | static inline u64 spin_time_start(void) | 59 | static inline u64 spin_time_start(void) |
61 | { | 60 | { |
@@ -74,22 +73,6 @@ static void __spin_time_accum(u64 delta, u32 *array) | |||
74 | array[HISTO_BUCKETS]++; | 73 | array[HISTO_BUCKETS]++; |
75 | } | 74 | } |
76 | 75 | ||
77 | static inline void spin_time_accum_spinning(u64 start) | ||
78 | { | ||
79 | u32 delta = xen_clocksource_read() - start; | ||
80 | |||
81 | __spin_time_accum(delta, spinlock_stats.histo_spin_spinning); | ||
82 | spinlock_stats.time_spinning += delta; | ||
83 | } | ||
84 | |||
85 | static inline void spin_time_accum_total(u64 start) | ||
86 | { | ||
87 | u32 delta = xen_clocksource_read() - start; | ||
88 | |||
89 | __spin_time_accum(delta, spinlock_stats.histo_spin_total); | ||
90 | spinlock_stats.time_total += delta; | ||
91 | } | ||
92 | |||
93 | static inline void spin_time_accum_blocked(u64 start) | 76 | static inline void spin_time_accum_blocked(u64 start) |
94 | { | 77 | { |
95 | u32 delta = xen_clocksource_read() - start; | 78 | u32 delta = xen_clocksource_read() - start; |
@@ -99,19 +82,15 @@ static inline void spin_time_accum_blocked(u64 start) | |||
99 | } | 82 | } |
100 | #else /* !CONFIG_XEN_DEBUG_FS */ | 83 | #else /* !CONFIG_XEN_DEBUG_FS */ |
101 | #define TIMEOUT (1 << 10) | 84 | #define TIMEOUT (1 << 10) |
102 | #define ADD_STATS(elem, val) do { (void)(val); } while(0) | 85 | static inline void add_stats(enum xen_contention_stat var, u32 val) |
86 | { | ||
87 | } | ||
103 | 88 | ||
104 | static inline u64 spin_time_start(void) | 89 | static inline u64 spin_time_start(void) |
105 | { | 90 | { |
106 | return 0; | 91 | return 0; |
107 | } | 92 | } |
108 | 93 | ||
109 | static inline void spin_time_accum_total(u64 start) | ||
110 | { | ||
111 | } | ||
112 | static inline void spin_time_accum_spinning(u64 start) | ||
113 | { | ||
114 | } | ||
115 | static inline void spin_time_accum_blocked(u64 start) | 94 | static inline void spin_time_accum_blocked(u64 start) |
116 | { | 95 | { |
117 | } | 96 | } |
@@ -134,227 +113,123 @@ typedef u16 xen_spinners_t; | |||
134 | asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory"); | 113 | asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory"); |
135 | #endif | 114 | #endif |
136 | 115 | ||
137 | struct xen_spinlock { | 116 | struct xen_lock_waiting { |
138 | unsigned char lock; /* 0 -> free; 1 -> locked */ | 117 | struct arch_spinlock *lock; |
139 | xen_spinners_t spinners; /* count of waiting cpus */ | 118 | __ticket_t want; |
140 | }; | 119 | }; |
141 | 120 | ||
142 | static int xen_spin_is_locked(struct arch_spinlock *lock) | ||
143 | { | ||
144 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
145 | |||
146 | return xl->lock != 0; | ||
147 | } | ||
148 | |||
149 | static int xen_spin_is_contended(struct arch_spinlock *lock) | ||
150 | { | ||
151 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
152 | |||
153 | /* Not strictly true; this is only the count of contended | ||
154 | lock-takers entering the slow path. */ | ||
155 | return xl->spinners != 0; | ||
156 | } | ||
157 | |||
158 | static int xen_spin_trylock(struct arch_spinlock *lock) | ||
159 | { | ||
160 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
161 | u8 old = 1; | ||
162 | |||
163 | asm("xchgb %b0,%1" | ||
164 | : "+q" (old), "+m" (xl->lock) : : "memory"); | ||
165 | |||
166 | return old == 0; | ||
167 | } | ||
168 | |||
169 | static DEFINE_PER_CPU(char *, irq_name); | ||
170 | static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; | 121 | static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; |
171 | static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); | 122 | static DEFINE_PER_CPU(char *, irq_name); |
172 | 123 | static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting); | |
173 | /* | 124 | static cpumask_t waiting_cpus; |
174 | * Mark a cpu as interested in a lock. Returns the CPU's previous | ||
175 | * lock of interest, in case we got preempted by an interrupt. | ||
176 | */ | ||
177 | static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl) | ||
178 | { | ||
179 | struct xen_spinlock *prev; | ||
180 | |||
181 | prev = __this_cpu_read(lock_spinners); | ||
182 | __this_cpu_write(lock_spinners, xl); | ||
183 | |||
184 | wmb(); /* set lock of interest before count */ | ||
185 | |||
186 | inc_spinners(xl); | ||
187 | |||
188 | return prev; | ||
189 | } | ||
190 | |||
191 | /* | ||
192 | * Mark a cpu as no longer interested in a lock. Restores previous | ||
193 | * lock of interest (NULL for none). | ||
194 | */ | ||
195 | static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev) | ||
196 | { | ||
197 | dec_spinners(xl); | ||
198 | wmb(); /* decrement count before restoring lock */ | ||
199 | __this_cpu_write(lock_spinners, prev); | ||
200 | } | ||
201 | 125 | ||
202 | static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable) | 126 | static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) |
203 | { | 127 | { |
204 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
205 | struct xen_spinlock *prev; | ||
206 | int irq = __this_cpu_read(lock_kicker_irq); | 128 | int irq = __this_cpu_read(lock_kicker_irq); |
207 | int ret; | 129 | struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting); |
130 | int cpu = smp_processor_id(); | ||
208 | u64 start; | 131 | u64 start; |
132 | unsigned long flags; | ||
209 | 133 | ||
210 | /* If kicker interrupts not initialized yet, just spin */ | 134 | /* If kicker interrupts not initialized yet, just spin */ |
211 | if (irq == -1) | 135 | if (irq == -1) |
212 | return 0; | 136 | return; |
213 | 137 | ||
214 | start = spin_time_start(); | 138 | start = spin_time_start(); |
215 | 139 | ||
216 | /* announce we're spinning */ | 140 | /* |
217 | prev = spinning_lock(xl); | 141 | * Make sure an interrupt handler can't upset things in a |
142 | * partially setup state. | ||
143 | */ | ||
144 | local_irq_save(flags); | ||
145 | /* | ||
146 | * We don't really care if we're overwriting some other | ||
147 | * (lock,want) pair, as that would mean that we're currently | ||
148 | * in an interrupt context, and the outer context had | ||
149 | * interrupts enabled. That has already kicked the VCPU out | ||
150 | * of xen_poll_irq(), so it will just return spuriously and | ||
151 | * retry with newly setup (lock,want). | ||
152 | * | ||
153 | * The ordering protocol on this is that the "lock" pointer | ||
154 | * may only be set non-NULL if the "want" ticket is correct. | ||
155 | * If we're updating "want", we must first clear "lock". | ||
156 | */ | ||
157 | w->lock = NULL; | ||
158 | smp_wmb(); | ||
159 | w->want = want; | ||
160 | smp_wmb(); | ||
161 | w->lock = lock; | ||
218 | 162 | ||
219 | ADD_STATS(taken_slow, 1); | 163 | /* This uses set_bit, which atomic and therefore a barrier */ |
220 | ADD_STATS(taken_slow_nested, prev != NULL); | 164 | cpumask_set_cpu(cpu, &waiting_cpus); |
165 | add_stats(TAKEN_SLOW, 1); | ||
221 | 166 | ||
222 | do { | 167 | /* clear pending */ |
223 | unsigned long flags; | 168 | xen_clear_irq_pending(irq); |
224 | 169 | ||
225 | /* clear pending */ | 170 | /* Only check lock once pending cleared */ |
226 | xen_clear_irq_pending(irq); | 171 | barrier(); |
227 | 172 | ||
228 | /* check again make sure it didn't become free while | 173 | /* |
229 | we weren't looking */ | 174 | * Mark entry to slowpath before doing the pickup test to make |
230 | ret = xen_spin_trylock(lock); | 175 | * sure we don't deadlock with an unlocker. |
231 | if (ret) { | 176 | */ |
232 | ADD_STATS(taken_slow_pickup, 1); | 177 | __ticket_enter_slowpath(lock); |
233 | 178 | ||
234 | /* | 179 | /* |
235 | * If we interrupted another spinlock while it | 180 | * check again make sure it didn't become free while |
236 | * was blocking, make sure it doesn't block | 181 | * we weren't looking |
237 | * without rechecking the lock. | 182 | */ |
238 | */ | 183 | if (ACCESS_ONCE(lock->tickets.head) == want) { |
239 | if (prev != NULL) | 184 | add_stats(TAKEN_SLOW_PICKUP, 1); |
240 | xen_set_irq_pending(irq); | 185 | goto out; |
241 | goto out; | 186 | } |
242 | } | ||
243 | 187 | ||
244 | flags = arch_local_save_flags(); | 188 | /* Allow interrupts while blocked */ |
245 | if (irq_enable) { | 189 | local_irq_restore(flags); |
246 | ADD_STATS(taken_slow_irqenable, 1); | ||
247 | raw_local_irq_enable(); | ||
248 | } | ||
249 | 190 | ||
250 | /* | 191 | /* |
251 | * Block until irq becomes pending. If we're | 192 | * If an interrupt happens here, it will leave the wakeup irq |
252 | * interrupted at this point (after the trylock but | 193 | * pending, which will cause xen_poll_irq() to return |
253 | * before entering the block), then the nested lock | 194 | * immediately. |
254 | * handler guarantees that the irq will be left | 195 | */ |
255 | * pending if there's any chance the lock became free; | ||
256 | * xen_poll_irq() returns immediately if the irq is | ||
257 | * pending. | ||
258 | */ | ||
259 | xen_poll_irq(irq); | ||
260 | 196 | ||
261 | raw_local_irq_restore(flags); | 197 | /* Block until irq becomes pending (or perhaps a spurious wakeup) */ |
198 | xen_poll_irq(irq); | ||
199 | add_stats(TAKEN_SLOW_SPURIOUS, !xen_test_irq_pending(irq)); | ||
262 | 200 | ||
263 | ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); | 201 | local_irq_save(flags); |
264 | } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ | ||
265 | 202 | ||
266 | kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); | 203 | kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); |
267 | |||
268 | out: | 204 | out: |
269 | unspinning_lock(xl, prev); | 205 | cpumask_clear_cpu(cpu, &waiting_cpus); |
270 | spin_time_accum_blocked(start); | 206 | w->lock = NULL; |
271 | |||
272 | return ret; | ||
273 | } | ||
274 | |||
275 | static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable) | ||
276 | { | ||
277 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
278 | unsigned timeout; | ||
279 | u8 oldval; | ||
280 | u64 start_spin; | ||
281 | |||
282 | ADD_STATS(taken, 1); | ||
283 | |||
284 | start_spin = spin_time_start(); | ||
285 | |||
286 | do { | ||
287 | u64 start_spin_fast = spin_time_start(); | ||
288 | |||
289 | timeout = TIMEOUT; | ||
290 | |||
291 | asm("1: xchgb %1,%0\n" | ||
292 | " testb %1,%1\n" | ||
293 | " jz 3f\n" | ||
294 | "2: rep;nop\n" | ||
295 | " cmpb $0,%0\n" | ||
296 | " je 1b\n" | ||
297 | " dec %2\n" | ||
298 | " jnz 2b\n" | ||
299 | "3:\n" | ||
300 | : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) | ||
301 | : "1" (1) | ||
302 | : "memory"); | ||
303 | 207 | ||
304 | spin_time_accum_spinning(start_spin_fast); | 208 | local_irq_restore(flags); |
305 | 209 | ||
306 | } while (unlikely(oldval != 0 && | 210 | spin_time_accum_blocked(start); |
307 | (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable)))); | ||
308 | |||
309 | spin_time_accum_total(start_spin); | ||
310 | } | ||
311 | |||
312 | static void xen_spin_lock(struct arch_spinlock *lock) | ||
313 | { | ||
314 | __xen_spin_lock(lock, false); | ||
315 | } | ||
316 | |||
317 | static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags) | ||
318 | { | ||
319 | __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags)); | ||
320 | } | 211 | } |
212 | PV_CALLEE_SAVE_REGS_THUNK(xen_lock_spinning); | ||
321 | 213 | ||
322 | static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) | 214 | static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next) |
323 | { | 215 | { |
324 | int cpu; | 216 | int cpu; |
325 | 217 | ||
326 | ADD_STATS(released_slow, 1); | 218 | add_stats(RELEASED_SLOW, 1); |
219 | |||
220 | for_each_cpu(cpu, &waiting_cpus) { | ||
221 | const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu); | ||
327 | 222 | ||
328 | for_each_online_cpu(cpu) { | 223 | /* Make sure we read lock before want */ |
329 | /* XXX should mix up next cpu selection */ | 224 | if (ACCESS_ONCE(w->lock) == lock && |
330 | if (per_cpu(lock_spinners, cpu) == xl) { | 225 | ACCESS_ONCE(w->want) == next) { |
331 | ADD_STATS(released_slow_kicked, 1); | 226 | add_stats(RELEASED_SLOW_KICKED, 1); |
332 | xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); | 227 | xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); |
228 | break; | ||
333 | } | 229 | } |
334 | } | 230 | } |
335 | } | 231 | } |
336 | 232 | ||
337 | static void xen_spin_unlock(struct arch_spinlock *lock) | ||
338 | { | ||
339 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
340 | |||
341 | ADD_STATS(released, 1); | ||
342 | |||
343 | smp_wmb(); /* make sure no writes get moved after unlock */ | ||
344 | xl->lock = 0; /* release lock */ | ||
345 | |||
346 | /* | ||
347 | * Make sure unlock happens before checking for waiting | ||
348 | * spinners. We need a strong barrier to enforce the | ||
349 | * write-read ordering to different memory locations, as the | ||
350 | * CPU makes no implied guarantees about their ordering. | ||
351 | */ | ||
352 | mb(); | ||
353 | |||
354 | if (unlikely(xl->spinners)) | ||
355 | xen_spin_unlock_slow(xl); | ||
356 | } | ||
357 | |||
358 | static irqreturn_t dummy_handler(int irq, void *dev_id) | 233 | static irqreturn_t dummy_handler(int irq, void *dev_id) |
359 | { | 234 | { |
360 | BUG(); | 235 | BUG(); |
@@ -408,6 +283,8 @@ void xen_uninit_lock_cpu(int cpu) | |||
408 | per_cpu(irq_name, cpu) = NULL; | 283 | per_cpu(irq_name, cpu) = NULL; |
409 | } | 284 | } |
410 | 285 | ||
286 | static bool xen_pvspin __initdata = true; | ||
287 | |||
411 | void __init xen_init_spinlocks(void) | 288 | void __init xen_init_spinlocks(void) |
412 | { | 289 | { |
413 | /* | 290 | /* |
@@ -417,15 +294,23 @@ void __init xen_init_spinlocks(void) | |||
417 | if (xen_hvm_domain()) | 294 | if (xen_hvm_domain()) |
418 | return; | 295 | return; |
419 | 296 | ||
420 | BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t)); | 297 | if (!xen_pvspin) { |
298 | printk(KERN_DEBUG "xen: PV spinlocks disabled\n"); | ||
299 | return; | ||
300 | } | ||
421 | 301 | ||
422 | pv_lock_ops.spin_is_locked = xen_spin_is_locked; | 302 | static_key_slow_inc(¶virt_ticketlocks_enabled); |
423 | pv_lock_ops.spin_is_contended = xen_spin_is_contended; | 303 | |
424 | pv_lock_ops.spin_lock = xen_spin_lock; | 304 | pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning); |
425 | pv_lock_ops.spin_lock_flags = xen_spin_lock_flags; | 305 | pv_lock_ops.unlock_kick = xen_unlock_kick; |
426 | pv_lock_ops.spin_trylock = xen_spin_trylock; | 306 | } |
427 | pv_lock_ops.spin_unlock = xen_spin_unlock; | 307 | |
308 | static __init int xen_parse_nopvspin(char *arg) | ||
309 | { | ||
310 | xen_pvspin = false; | ||
311 | return 0; | ||
428 | } | 312 | } |
313 | early_param("xen_nopvspin", xen_parse_nopvspin); | ||
429 | 314 | ||
430 | #ifdef CONFIG_XEN_DEBUG_FS | 315 | #ifdef CONFIG_XEN_DEBUG_FS |
431 | 316 | ||
@@ -442,37 +327,21 @@ static int __init xen_spinlock_debugfs(void) | |||
442 | 327 | ||
443 | debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); | 328 | debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); |
444 | 329 | ||
445 | debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout); | ||
446 | |||
447 | debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken); | ||
448 | debugfs_create_u32("taken_slow", 0444, d_spin_debug, | 330 | debugfs_create_u32("taken_slow", 0444, d_spin_debug, |
449 | &spinlock_stats.taken_slow); | 331 | &spinlock_stats.contention_stats[TAKEN_SLOW]); |
450 | debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug, | ||
451 | &spinlock_stats.taken_slow_nested); | ||
452 | debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, | 332 | debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, |
453 | &spinlock_stats.taken_slow_pickup); | 333 | &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]); |
454 | debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, | 334 | debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, |
455 | &spinlock_stats.taken_slow_spurious); | 335 | &spinlock_stats.contention_stats[TAKEN_SLOW_SPURIOUS]); |
456 | debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug, | ||
457 | &spinlock_stats.taken_slow_irqenable); | ||
458 | 336 | ||
459 | debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released); | ||
460 | debugfs_create_u32("released_slow", 0444, d_spin_debug, | 337 | debugfs_create_u32("released_slow", 0444, d_spin_debug, |
461 | &spinlock_stats.released_slow); | 338 | &spinlock_stats.contention_stats[RELEASED_SLOW]); |
462 | debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, | 339 | debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, |
463 | &spinlock_stats.released_slow_kicked); | 340 | &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]); |
464 | 341 | ||
465 | debugfs_create_u64("time_spinning", 0444, d_spin_debug, | ||
466 | &spinlock_stats.time_spinning); | ||
467 | debugfs_create_u64("time_blocked", 0444, d_spin_debug, | 342 | debugfs_create_u64("time_blocked", 0444, d_spin_debug, |
468 | &spinlock_stats.time_blocked); | 343 | &spinlock_stats.time_blocked); |
469 | debugfs_create_u64("time_total", 0444, d_spin_debug, | ||
470 | &spinlock_stats.time_total); | ||
471 | 344 | ||
472 | debugfs_create_u32_array("histo_total", 0444, d_spin_debug, | ||
473 | spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1); | ||
474 | debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug, | ||
475 | spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1); | ||
476 | debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, | 345 | debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, |
477 | spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); | 346 | spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); |
478 | 347 | ||