aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen/spinlock.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 14:55:10 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 14:55:10 -0400
commit816434ec4a674fcdb3c2221a6dffdc8f34020550 (patch)
tree6b8a319171270b20bf1b2e1c98d333f47988553a /arch/x86/xen/spinlock.c
parentf357a82048ff1e5645861475b014570e11ad1911 (diff)
parent36bd621337c91a1ecda588e5bbbae8dd9698bae7 (diff)
Merge branch 'x86-spinlocks-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 spinlock changes from Ingo Molnar: "The biggest change here are paravirtualized ticket spinlocks (PV spinlocks), which bring a nice speedup on various benchmarks. The KVM host side will come to you via the KVM tree" * 'x86-spinlocks-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/kvm/guest: Fix sparse warning: "symbol 'klock_waiting' was not declared as static" kvm: Paravirtual ticketlocks support for linux guests running on KVM hypervisor kvm guest: Add configuration support to enable debug information for KVM Guests kvm uapi: Add KICK_CPU and PV_UNHALT definition to uapi xen, pvticketlock: Allow interrupts to be enabled while blocking x86, ticketlock: Add slowpath logic jump_label: Split jumplabel ratelimit x86, pvticketlock: When paravirtualizing ticket locks, increment by 2 x86, pvticketlock: Use callee-save for lock_spinning xen, pvticketlocks: Add xen_nopvspin parameter to disable xen pv ticketlocks xen, pvticketlock: Xen implementation for PV ticket locks xen: Defer spinlock setup until boot CPU setup x86, ticketlock: Collapse a layer of functions x86, ticketlock: Don't inline _spin_unlock when using paravirt spinlocks x86, spinlock: Replace pv spinlocks with pv ticketlocks
Diffstat (limited to 'arch/x86/xen/spinlock.c')
-rw-r--r--arch/x86/xen/spinlock.c387
1 files changed, 128 insertions, 259 deletions
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index cf3caee356b3..0438b9324a72 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -17,45 +17,44 @@
17#include "xen-ops.h" 17#include "xen-ops.h"
18#include "debugfs.h" 18#include "debugfs.h"
19 19
20#ifdef CONFIG_XEN_DEBUG_FS 20enum xen_contention_stat {
21static struct xen_spinlock_stats 21 TAKEN_SLOW,
22{ 22 TAKEN_SLOW_PICKUP,
23 u64 taken; 23 TAKEN_SLOW_SPURIOUS,
24 u32 taken_slow; 24 RELEASED_SLOW,
25 u32 taken_slow_nested; 25 RELEASED_SLOW_KICKED,
26 u32 taken_slow_pickup; 26 NR_CONTENTION_STATS
27 u32 taken_slow_spurious; 27};
28 u32 taken_slow_irqenable;
29 28
30 u64 released;
31 u32 released_slow;
32 u32 released_slow_kicked;
33 29
30#ifdef CONFIG_XEN_DEBUG_FS
34#define HISTO_BUCKETS 30 31#define HISTO_BUCKETS 30
35 u32 histo_spin_total[HISTO_BUCKETS+1]; 32static struct xen_spinlock_stats
36 u32 histo_spin_spinning[HISTO_BUCKETS+1]; 33{
34 u32 contention_stats[NR_CONTENTION_STATS];
37 u32 histo_spin_blocked[HISTO_BUCKETS+1]; 35 u32 histo_spin_blocked[HISTO_BUCKETS+1];
38
39 u64 time_total;
40 u64 time_spinning;
41 u64 time_blocked; 36 u64 time_blocked;
42} spinlock_stats; 37} spinlock_stats;
43 38
44static u8 zero_stats; 39static u8 zero_stats;
45 40
46static unsigned lock_timeout = 1 << 10;
47#define TIMEOUT lock_timeout
48
49static inline void check_zero(void) 41static inline void check_zero(void)
50{ 42{
51 if (unlikely(zero_stats)) { 43 u8 ret;
52 memset(&spinlock_stats, 0, sizeof(spinlock_stats)); 44 u8 old = ACCESS_ONCE(zero_stats);
53 zero_stats = 0; 45 if (unlikely(old)) {
46 ret = cmpxchg(&zero_stats, old, 0);
47 /* This ensures only one fellow resets the stat */
48 if (ret == old)
49 memset(&spinlock_stats, 0, sizeof(spinlock_stats));
54 } 50 }
55} 51}
56 52
57#define ADD_STATS(elem, val) \ 53static inline void add_stats(enum xen_contention_stat var, u32 val)
58 do { check_zero(); spinlock_stats.elem += (val); } while(0) 54{
55 check_zero();
56 spinlock_stats.contention_stats[var] += val;
57}
59 58
60static inline u64 spin_time_start(void) 59static inline u64 spin_time_start(void)
61{ 60{
@@ -74,22 +73,6 @@ static void __spin_time_accum(u64 delta, u32 *array)
74 array[HISTO_BUCKETS]++; 73 array[HISTO_BUCKETS]++;
75} 74}
76 75
77static inline void spin_time_accum_spinning(u64 start)
78{
79 u32 delta = xen_clocksource_read() - start;
80
81 __spin_time_accum(delta, spinlock_stats.histo_spin_spinning);
82 spinlock_stats.time_spinning += delta;
83}
84
85static inline void spin_time_accum_total(u64 start)
86{
87 u32 delta = xen_clocksource_read() - start;
88
89 __spin_time_accum(delta, spinlock_stats.histo_spin_total);
90 spinlock_stats.time_total += delta;
91}
92
93static inline void spin_time_accum_blocked(u64 start) 76static inline void spin_time_accum_blocked(u64 start)
94{ 77{
95 u32 delta = xen_clocksource_read() - start; 78 u32 delta = xen_clocksource_read() - start;
@@ -99,19 +82,15 @@ static inline void spin_time_accum_blocked(u64 start)
99} 82}
100#else /* !CONFIG_XEN_DEBUG_FS */ 83#else /* !CONFIG_XEN_DEBUG_FS */
101#define TIMEOUT (1 << 10) 84#define TIMEOUT (1 << 10)
102#define ADD_STATS(elem, val) do { (void)(val); } while(0) 85static inline void add_stats(enum xen_contention_stat var, u32 val)
86{
87}
103 88
104static inline u64 spin_time_start(void) 89static inline u64 spin_time_start(void)
105{ 90{
106 return 0; 91 return 0;
107} 92}
108 93
109static inline void spin_time_accum_total(u64 start)
110{
111}
112static inline void spin_time_accum_spinning(u64 start)
113{
114}
115static inline void spin_time_accum_blocked(u64 start) 94static inline void spin_time_accum_blocked(u64 start)
116{ 95{
117} 96}
@@ -134,227 +113,123 @@ typedef u16 xen_spinners_t;
134 asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory"); 113 asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory");
135#endif 114#endif
136 115
137struct xen_spinlock { 116struct xen_lock_waiting {
138 unsigned char lock; /* 0 -> free; 1 -> locked */ 117 struct arch_spinlock *lock;
139 xen_spinners_t spinners; /* count of waiting cpus */ 118 __ticket_t want;
140}; 119};
141 120
142static int xen_spin_is_locked(struct arch_spinlock *lock)
143{
144 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
145
146 return xl->lock != 0;
147}
148
149static int xen_spin_is_contended(struct arch_spinlock *lock)
150{
151 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
152
153 /* Not strictly true; this is only the count of contended
154 lock-takers entering the slow path. */
155 return xl->spinners != 0;
156}
157
158static int xen_spin_trylock(struct arch_spinlock *lock)
159{
160 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
161 u8 old = 1;
162
163 asm("xchgb %b0,%1"
164 : "+q" (old), "+m" (xl->lock) : : "memory");
165
166 return old == 0;
167}
168
169static DEFINE_PER_CPU(char *, irq_name);
170static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; 121static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
171static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); 122static DEFINE_PER_CPU(char *, irq_name);
172 123static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
173/* 124static cpumask_t waiting_cpus;
174 * Mark a cpu as interested in a lock. Returns the CPU's previous
175 * lock of interest, in case we got preempted by an interrupt.
176 */
177static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl)
178{
179 struct xen_spinlock *prev;
180
181 prev = __this_cpu_read(lock_spinners);
182 __this_cpu_write(lock_spinners, xl);
183
184 wmb(); /* set lock of interest before count */
185
186 inc_spinners(xl);
187
188 return prev;
189}
190
191/*
192 * Mark a cpu as no longer interested in a lock. Restores previous
193 * lock of interest (NULL for none).
194 */
195static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev)
196{
197 dec_spinners(xl);
198 wmb(); /* decrement count before restoring lock */
199 __this_cpu_write(lock_spinners, prev);
200}
201 125
202static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable) 126static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
203{ 127{
204 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
205 struct xen_spinlock *prev;
206 int irq = __this_cpu_read(lock_kicker_irq); 128 int irq = __this_cpu_read(lock_kicker_irq);
207 int ret; 129 struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting);
130 int cpu = smp_processor_id();
208 u64 start; 131 u64 start;
132 unsigned long flags;
209 133
210 /* If kicker interrupts not initialized yet, just spin */ 134 /* If kicker interrupts not initialized yet, just spin */
211 if (irq == -1) 135 if (irq == -1)
212 return 0; 136 return;
213 137
214 start = spin_time_start(); 138 start = spin_time_start();
215 139
216 /* announce we're spinning */ 140 /*
217 prev = spinning_lock(xl); 141 * Make sure an interrupt handler can't upset things in a
142 * partially setup state.
143 */
144 local_irq_save(flags);
145 /*
146 * We don't really care if we're overwriting some other
147 * (lock,want) pair, as that would mean that we're currently
148 * in an interrupt context, and the outer context had
149 * interrupts enabled. That has already kicked the VCPU out
150 * of xen_poll_irq(), so it will just return spuriously and
151 * retry with newly setup (lock,want).
152 *
153 * The ordering protocol on this is that the "lock" pointer
154 * may only be set non-NULL if the "want" ticket is correct.
155 * If we're updating "want", we must first clear "lock".
156 */
157 w->lock = NULL;
158 smp_wmb();
159 w->want = want;
160 smp_wmb();
161 w->lock = lock;
218 162
219 ADD_STATS(taken_slow, 1); 163 /* This uses set_bit, which atomic and therefore a barrier */
220 ADD_STATS(taken_slow_nested, prev != NULL); 164 cpumask_set_cpu(cpu, &waiting_cpus);
165 add_stats(TAKEN_SLOW, 1);
221 166
222 do { 167 /* clear pending */
223 unsigned long flags; 168 xen_clear_irq_pending(irq);
224 169
225 /* clear pending */ 170 /* Only check lock once pending cleared */
226 xen_clear_irq_pending(irq); 171 barrier();
227 172
228 /* check again make sure it didn't become free while 173 /*
229 we weren't looking */ 174 * Mark entry to slowpath before doing the pickup test to make
230 ret = xen_spin_trylock(lock); 175 * sure we don't deadlock with an unlocker.
231 if (ret) { 176 */
232 ADD_STATS(taken_slow_pickup, 1); 177 __ticket_enter_slowpath(lock);
233 178
234 /* 179 /*
235 * If we interrupted another spinlock while it 180 * check again make sure it didn't become free while
236 * was blocking, make sure it doesn't block 181 * we weren't looking
237 * without rechecking the lock. 182 */
238 */ 183 if (ACCESS_ONCE(lock->tickets.head) == want) {
239 if (prev != NULL) 184 add_stats(TAKEN_SLOW_PICKUP, 1);
240 xen_set_irq_pending(irq); 185 goto out;
241 goto out; 186 }
242 }
243 187
244 flags = arch_local_save_flags(); 188 /* Allow interrupts while blocked */
245 if (irq_enable) { 189 local_irq_restore(flags);
246 ADD_STATS(taken_slow_irqenable, 1);
247 raw_local_irq_enable();
248 }
249 190
250 /* 191 /*
251 * Block until irq becomes pending. If we're 192 * If an interrupt happens here, it will leave the wakeup irq
252 * interrupted at this point (after the trylock but 193 * pending, which will cause xen_poll_irq() to return
253 * before entering the block), then the nested lock 194 * immediately.
254 * handler guarantees that the irq will be left 195 */
255 * pending if there's any chance the lock became free;
256 * xen_poll_irq() returns immediately if the irq is
257 * pending.
258 */
259 xen_poll_irq(irq);
260 196
261 raw_local_irq_restore(flags); 197 /* Block until irq becomes pending (or perhaps a spurious wakeup) */
198 xen_poll_irq(irq);
199 add_stats(TAKEN_SLOW_SPURIOUS, !xen_test_irq_pending(irq));
262 200
263 ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); 201 local_irq_save(flags);
264 } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
265 202
266 kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); 203 kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
267
268out: 204out:
269 unspinning_lock(xl, prev); 205 cpumask_clear_cpu(cpu, &waiting_cpus);
270 spin_time_accum_blocked(start); 206 w->lock = NULL;
271
272 return ret;
273}
274
275static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable)
276{
277 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
278 unsigned timeout;
279 u8 oldval;
280 u64 start_spin;
281
282 ADD_STATS(taken, 1);
283
284 start_spin = spin_time_start();
285
286 do {
287 u64 start_spin_fast = spin_time_start();
288
289 timeout = TIMEOUT;
290
291 asm("1: xchgb %1,%0\n"
292 " testb %1,%1\n"
293 " jz 3f\n"
294 "2: rep;nop\n"
295 " cmpb $0,%0\n"
296 " je 1b\n"
297 " dec %2\n"
298 " jnz 2b\n"
299 "3:\n"
300 : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
301 : "1" (1)
302 : "memory");
303 207
304 spin_time_accum_spinning(start_spin_fast); 208 local_irq_restore(flags);
305 209
306 } while (unlikely(oldval != 0 && 210 spin_time_accum_blocked(start);
307 (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable))));
308
309 spin_time_accum_total(start_spin);
310}
311
312static void xen_spin_lock(struct arch_spinlock *lock)
313{
314 __xen_spin_lock(lock, false);
315}
316
317static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags)
318{
319 __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags));
320} 211}
212PV_CALLEE_SAVE_REGS_THUNK(xen_lock_spinning);
321 213
322static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) 214static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
323{ 215{
324 int cpu; 216 int cpu;
325 217
326 ADD_STATS(released_slow, 1); 218 add_stats(RELEASED_SLOW, 1);
219
220 for_each_cpu(cpu, &waiting_cpus) {
221 const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu);
327 222
328 for_each_online_cpu(cpu) { 223 /* Make sure we read lock before want */
329 /* XXX should mix up next cpu selection */ 224 if (ACCESS_ONCE(w->lock) == lock &&
330 if (per_cpu(lock_spinners, cpu) == xl) { 225 ACCESS_ONCE(w->want) == next) {
331 ADD_STATS(released_slow_kicked, 1); 226 add_stats(RELEASED_SLOW_KICKED, 1);
332 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); 227 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
228 break;
333 } 229 }
334 } 230 }
335} 231}
336 232
337static void xen_spin_unlock(struct arch_spinlock *lock)
338{
339 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
340
341 ADD_STATS(released, 1);
342
343 smp_wmb(); /* make sure no writes get moved after unlock */
344 xl->lock = 0; /* release lock */
345
346 /*
347 * Make sure unlock happens before checking for waiting
348 * spinners. We need a strong barrier to enforce the
349 * write-read ordering to different memory locations, as the
350 * CPU makes no implied guarantees about their ordering.
351 */
352 mb();
353
354 if (unlikely(xl->spinners))
355 xen_spin_unlock_slow(xl);
356}
357
358static irqreturn_t dummy_handler(int irq, void *dev_id) 233static irqreturn_t dummy_handler(int irq, void *dev_id)
359{ 234{
360 BUG(); 235 BUG();
@@ -408,6 +283,8 @@ void xen_uninit_lock_cpu(int cpu)
408 per_cpu(irq_name, cpu) = NULL; 283 per_cpu(irq_name, cpu) = NULL;
409} 284}
410 285
286static bool xen_pvspin __initdata = true;
287
411void __init xen_init_spinlocks(void) 288void __init xen_init_spinlocks(void)
412{ 289{
413 /* 290 /*
@@ -417,15 +294,23 @@ void __init xen_init_spinlocks(void)
417 if (xen_hvm_domain()) 294 if (xen_hvm_domain())
418 return; 295 return;
419 296
420 BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t)); 297 if (!xen_pvspin) {
298 printk(KERN_DEBUG "xen: PV spinlocks disabled\n");
299 return;
300 }
421 301
422 pv_lock_ops.spin_is_locked = xen_spin_is_locked; 302 static_key_slow_inc(&paravirt_ticketlocks_enabled);
423 pv_lock_ops.spin_is_contended = xen_spin_is_contended; 303
424 pv_lock_ops.spin_lock = xen_spin_lock; 304 pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
425 pv_lock_ops.spin_lock_flags = xen_spin_lock_flags; 305 pv_lock_ops.unlock_kick = xen_unlock_kick;
426 pv_lock_ops.spin_trylock = xen_spin_trylock; 306}
427 pv_lock_ops.spin_unlock = xen_spin_unlock; 307
308static __init int xen_parse_nopvspin(char *arg)
309{
310 xen_pvspin = false;
311 return 0;
428} 312}
313early_param("xen_nopvspin", xen_parse_nopvspin);
429 314
430#ifdef CONFIG_XEN_DEBUG_FS 315#ifdef CONFIG_XEN_DEBUG_FS
431 316
@@ -442,37 +327,21 @@ static int __init xen_spinlock_debugfs(void)
442 327
443 debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); 328 debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
444 329
445 debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout);
446
447 debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken);
448 debugfs_create_u32("taken_slow", 0444, d_spin_debug, 330 debugfs_create_u32("taken_slow", 0444, d_spin_debug,
449 &spinlock_stats.taken_slow); 331 &spinlock_stats.contention_stats[TAKEN_SLOW]);
450 debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug,
451 &spinlock_stats.taken_slow_nested);
452 debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, 332 debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
453 &spinlock_stats.taken_slow_pickup); 333 &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
454 debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, 334 debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug,
455 &spinlock_stats.taken_slow_spurious); 335 &spinlock_stats.contention_stats[TAKEN_SLOW_SPURIOUS]);
456 debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug,
457 &spinlock_stats.taken_slow_irqenable);
458 336
459 debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released);
460 debugfs_create_u32("released_slow", 0444, d_spin_debug, 337 debugfs_create_u32("released_slow", 0444, d_spin_debug,
461 &spinlock_stats.released_slow); 338 &spinlock_stats.contention_stats[RELEASED_SLOW]);
462 debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, 339 debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
463 &spinlock_stats.released_slow_kicked); 340 &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
464 341
465 debugfs_create_u64("time_spinning", 0444, d_spin_debug,
466 &spinlock_stats.time_spinning);
467 debugfs_create_u64("time_blocked", 0444, d_spin_debug, 342 debugfs_create_u64("time_blocked", 0444, d_spin_debug,
468 &spinlock_stats.time_blocked); 343 &spinlock_stats.time_blocked);
469 debugfs_create_u64("time_total", 0444, d_spin_debug,
470 &spinlock_stats.time_total);
471 344
472 debugfs_create_u32_array("histo_total", 0444, d_spin_debug,
473 spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1);
474 debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug,
475 spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1);
476 debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, 345 debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
477 spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); 346 spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
478 347