aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@goop.org>2013-08-09 10:21:49 -0400
committerH. Peter Anvin <hpa@linux.intel.com>2013-08-09 10:53:05 -0400
commit545ac13892ab391049a92108cf59a0d05de7e28c (patch)
treee993b90bcbedd44b77c895cf7fcee89ee5fe9d51 /arch/x86
parentc095ba7224d8edc71dcef0d655911399a8bd4a3f (diff)
x86, spinlock: Replace pv spinlocks with pv ticketlocks
Rather than outright replacing the entire spinlock implementation in order to paravirtualize it, keep the ticket lock implementation but add a couple of pvops hooks on the slow patch (long spin on lock, unlocking a contended lock). Ticket locks have a number of nice properties, but they also have some surprising behaviours in virtual environments. They enforce a strict FIFO ordering on cpus trying to take a lock; however, if the hypervisor scheduler does not schedule the cpus in the correct order, the system can waste a huge amount of time spinning until the next cpu can take the lock. (See Thomas Friebel's talk "Prevent Guests from Spinning Around" http://www.xen.org/files/xensummitboston08/LHP.pdf for more details.) To address this, we add two hooks: - __ticket_spin_lock which is called after the cpu has been spinning on the lock for a significant number of iterations but has failed to take the lock (presumably because the cpu holding the lock has been descheduled). The lock_spinning pvop is expected to block the cpu until it has been kicked by the current lock holder. - __ticket_spin_unlock, which on releasing a contended lock (there are more cpus with tail tickets), it looks to see if the next cpu is blocked and wakes it if so. When compiled with CONFIG_PARAVIRT_SPINLOCKS disabled, a set of stub functions causes all the extra code to go away. Results: ======= setup: 32 core machine with 32 vcpu KVM guest (HT off) with 8GB RAM base = 3.11-rc patched = base + pvspinlock V12 +-----------------+----------------+--------+ dbench (Throughput in MB/sec. Higher is better) +-----------------+----------------+--------+ | base (stdev %)|patched(stdev%) | %gain | +-----------------+----------------+--------+ | 15035.3 (0.3) |15150.0 (0.6) | 0.8 | | 1470.0 (2.2) | 1713.7 (1.9) | 16.6 | | 848.6 (4.3) | 967.8 (4.3) | 14.0 | | 652.9 (3.5) | 685.3 (3.7) | 5.0 | +-----------------+----------------+--------+ pvspinlock shows benefits for overcommit ratio > 1 for PLE enabled cases, and undercommits results are flat Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org> Link: http://lkml.kernel.org/r/1376058122-8248-2-git-send-email-raghavendra.kt@linux.vnet.ibm.com Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Tested-by: Attilio Rao <attilio.rao@citrix.com> [ Raghavendra: Changed SPIN_THRESHOLD, fixed redefinition of arch_spinlock_t] Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com> Acked-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/paravirt.h32
-rw-r--r--arch/x86/include/asm/paravirt_types.h14
-rw-r--r--arch/x86/include/asm/spinlock.h53
-rw-r--r--arch/x86/include/asm/spinlock_types.h4
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c15
-rw-r--r--arch/x86/xen/spinlock.c8
6 files changed, 65 insertions, 61 deletions
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index cfdc9ee4c900..040e72db5ea9 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -712,36 +712,16 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
712 712
713#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) 713#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
714 714
715static inline int arch_spin_is_locked(struct arch_spinlock *lock) 715static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
716 __ticket_t ticket)
716{ 717{
717 return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock); 718 PVOP_VCALL2(pv_lock_ops.lock_spinning, lock, ticket);
718} 719}
719 720
720static inline int arch_spin_is_contended(struct arch_spinlock *lock) 721static __always_inline void ____ticket_unlock_kick(struct arch_spinlock *lock,
722 __ticket_t ticket)
721{ 723{
722 return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); 724 PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket);
723}
724#define arch_spin_is_contended arch_spin_is_contended
725
726static __always_inline void arch_spin_lock(struct arch_spinlock *lock)
727{
728 PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
729}
730
731static __always_inline void arch_spin_lock_flags(struct arch_spinlock *lock,
732 unsigned long flags)
733{
734 PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
735}
736
737static __always_inline int arch_spin_trylock(struct arch_spinlock *lock)
738{
739 return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
740}
741
742static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
743{
744 PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
745} 725}
746 726
747#endif 727#endif
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 0db1fcac668c..346a07c1e71d 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -327,13 +327,15 @@ struct pv_mmu_ops {
327}; 327};
328 328
329struct arch_spinlock; 329struct arch_spinlock;
330#ifdef CONFIG_SMP
331#include <asm/spinlock_types.h>
332#else
333typedef u16 __ticket_t;
334#endif
335
330struct pv_lock_ops { 336struct pv_lock_ops {
331 int (*spin_is_locked)(struct arch_spinlock *lock); 337 void (*lock_spinning)(struct arch_spinlock *lock, __ticket_t ticket);
332 int (*spin_is_contended)(struct arch_spinlock *lock); 338 void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket);
333 void (*spin_lock)(struct arch_spinlock *lock);
334 void (*spin_lock_flags)(struct arch_spinlock *lock, unsigned long flags);
335 int (*spin_trylock)(struct arch_spinlock *lock);
336 void (*spin_unlock)(struct arch_spinlock *lock);
337}; 339};
338 340
339/* This contains all the paravirt structures: we get a convenient 341/* This contains all the paravirt structures: we get a convenient
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 33692eaabab5..4d542444bea3 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -34,6 +34,35 @@
34# define UNLOCK_LOCK_PREFIX 34# define UNLOCK_LOCK_PREFIX
35#endif 35#endif
36 36
37/* How long a lock should spin before we consider blocking */
38#define SPIN_THRESHOLD (1 << 15)
39
40#ifndef CONFIG_PARAVIRT_SPINLOCKS
41
42static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
43 __ticket_t ticket)
44{
45}
46
47static __always_inline void ____ticket_unlock_kick(struct arch_spinlock *lock,
48 __ticket_t ticket)
49{
50}
51
52#endif /* CONFIG_PARAVIRT_SPINLOCKS */
53
54
55/*
56 * If a spinlock has someone waiting on it, then kick the appropriate
57 * waiting cpu.
58 */
59static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
60 __ticket_t next)
61{
62 if (unlikely(lock->tickets.tail != next))
63 ____ticket_unlock_kick(lock, next);
64}
65
37/* 66/*
38 * Ticket locks are conceptually two parts, one indicating the current head of 67 * Ticket locks are conceptually two parts, one indicating the current head of
39 * the queue, and the other indicating the current tail. The lock is acquired 68 * the queue, and the other indicating the current tail. The lock is acquired
@@ -47,19 +76,24 @@
47 * in the high part, because a wide xadd increment of the low part would carry 76 * in the high part, because a wide xadd increment of the low part would carry
48 * up and contaminate the high part. 77 * up and contaminate the high part.
49 */ 78 */
50static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) 79static __always_inline void __ticket_spin_lock(struct arch_spinlock *lock)
51{ 80{
52 register struct __raw_tickets inc = { .tail = 1 }; 81 register struct __raw_tickets inc = { .tail = 1 };
53 82
54 inc = xadd(&lock->tickets, inc); 83 inc = xadd(&lock->tickets, inc);
55 84
56 for (;;) { 85 for (;;) {
57 if (inc.head == inc.tail) 86 unsigned count = SPIN_THRESHOLD;
58 break; 87
59 cpu_relax(); 88 do {
60 inc.head = ACCESS_ONCE(lock->tickets.head); 89 if (inc.head == inc.tail)
90 goto out;
91 cpu_relax();
92 inc.head = ACCESS_ONCE(lock->tickets.head);
93 } while (--count);
94 __ticket_lock_spinning(lock, inc.tail);
61 } 95 }
62 barrier(); /* make sure nothing creeps before the lock is taken */ 96out: barrier(); /* make sure nothing creeps before the lock is taken */
63} 97}
64 98
65static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) 99static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
@@ -78,7 +112,10 @@ static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
78 112
79static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) 113static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
80{ 114{
115 __ticket_t next = lock->tickets.head + 1;
116
81 __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX); 117 __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX);
118 __ticket_unlock_kick(lock, next);
82} 119}
83 120
84static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) 121static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
@@ -95,8 +132,6 @@ static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
95 return (__ticket_t)(tmp.tail - tmp.head) > 1; 132 return (__ticket_t)(tmp.tail - tmp.head) > 1;
96} 133}
97 134
98#ifndef CONFIG_PARAVIRT_SPINLOCKS
99
100static inline int arch_spin_is_locked(arch_spinlock_t *lock) 135static inline int arch_spin_is_locked(arch_spinlock_t *lock)
101{ 136{
102 return __ticket_spin_is_locked(lock); 137 return __ticket_spin_is_locked(lock);
@@ -129,8 +164,6 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
129 arch_spin_lock(lock); 164 arch_spin_lock(lock);
130} 165}
131 166
132#endif /* CONFIG_PARAVIRT_SPINLOCKS */
133
134static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) 167static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
135{ 168{
136 while (arch_spin_is_locked(lock)) 169 while (arch_spin_is_locked(lock))
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index ad0ad07fc006..83fd3c75d45c 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
1#ifndef _ASM_X86_SPINLOCK_TYPES_H 1#ifndef _ASM_X86_SPINLOCK_TYPES_H
2#define _ASM_X86_SPINLOCK_TYPES_H 2#define _ASM_X86_SPINLOCK_TYPES_H
3 3
4#ifndef __LINUX_SPINLOCK_TYPES_H
5# error "please don't include this file directly"
6#endif
7
8#include <linux/types.h> 4#include <linux/types.h>
9 5
10#if (CONFIG_NR_CPUS < 256) 6#if (CONFIG_NR_CPUS < 256)
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 676b8c77a976..c2e010e5fbce 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -7,21 +7,10 @@
7 7
8#include <asm/paravirt.h> 8#include <asm/paravirt.h>
9 9
10static inline void
11default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
12{
13 arch_spin_lock(lock);
14}
15
16struct pv_lock_ops pv_lock_ops = { 10struct pv_lock_ops pv_lock_ops = {
17#ifdef CONFIG_SMP 11#ifdef CONFIG_SMP
18 .spin_is_locked = __ticket_spin_is_locked, 12 .lock_spinning = paravirt_nop,
19 .spin_is_contended = __ticket_spin_is_contended, 13 .unlock_kick = paravirt_nop,
20
21 .spin_lock = __ticket_spin_lock,
22 .spin_lock_flags = default_spin_lock_flags,
23 .spin_trylock = __ticket_spin_trylock,
24 .spin_unlock = __ticket_spin_unlock,
25#endif 14#endif
26}; 15};
27EXPORT_SYMBOL(pv_lock_ops); 16EXPORT_SYMBOL(pv_lock_ops);
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index cf3caee356b3..d50962936af4 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -139,6 +139,9 @@ struct xen_spinlock {
139 xen_spinners_t spinners; /* count of waiting cpus */ 139 xen_spinners_t spinners; /* count of waiting cpus */
140}; 140};
141 141
142static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
143
144#if 0
142static int xen_spin_is_locked(struct arch_spinlock *lock) 145static int xen_spin_is_locked(struct arch_spinlock *lock)
143{ 146{
144 struct xen_spinlock *xl = (struct xen_spinlock *)lock; 147 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
@@ -167,7 +170,6 @@ static int xen_spin_trylock(struct arch_spinlock *lock)
167} 170}
168 171
169static DEFINE_PER_CPU(char *, irq_name); 172static DEFINE_PER_CPU(char *, irq_name);
170static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
171static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); 173static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
172 174
173/* 175/*
@@ -354,6 +356,7 @@ static void xen_spin_unlock(struct arch_spinlock *lock)
354 if (unlikely(xl->spinners)) 356 if (unlikely(xl->spinners))
355 xen_spin_unlock_slow(xl); 357 xen_spin_unlock_slow(xl);
356} 358}
359#endif
357 360
358static irqreturn_t dummy_handler(int irq, void *dev_id) 361static irqreturn_t dummy_handler(int irq, void *dev_id)
359{ 362{
@@ -418,13 +421,14 @@ void __init xen_init_spinlocks(void)
418 return; 421 return;
419 422
420 BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t)); 423 BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t));
421 424#if 0
422 pv_lock_ops.spin_is_locked = xen_spin_is_locked; 425 pv_lock_ops.spin_is_locked = xen_spin_is_locked;
423 pv_lock_ops.spin_is_contended = xen_spin_is_contended; 426 pv_lock_ops.spin_is_contended = xen_spin_is_contended;
424 pv_lock_ops.spin_lock = xen_spin_lock; 427 pv_lock_ops.spin_lock = xen_spin_lock;
425 pv_lock_ops.spin_lock_flags = xen_spin_lock_flags; 428 pv_lock_ops.spin_lock_flags = xen_spin_lock_flags;
426 pv_lock_ops.spin_trylock = xen_spin_trylock; 429 pv_lock_ops.spin_trylock = xen_spin_trylock;
427 pv_lock_ops.spin_unlock = xen_spin_unlock; 430 pv_lock_ops.spin_unlock = xen_spin_unlock;
431#endif
428} 432}
429 433
430#ifdef CONFIG_XEN_DEBUG_FS 434#ifdef CONFIG_XEN_DEBUG_FS