diff options
author | Jeremy Fitzhardinge <jeremy@goop.org> | 2013-08-09 10:21:49 -0400 |
---|---|---|
committer | H. Peter Anvin <hpa@linux.intel.com> | 2013-08-09 10:53:05 -0400 |
commit | 545ac13892ab391049a92108cf59a0d05de7e28c (patch) | |
tree | e993b90bcbedd44b77c895cf7fcee89ee5fe9d51 /arch/x86 | |
parent | c095ba7224d8edc71dcef0d655911399a8bd4a3f (diff) |
x86, spinlock: Replace pv spinlocks with pv ticketlocks
Rather than outright replacing the entire spinlock implementation in
order to paravirtualize it, keep the ticket lock implementation but add
a couple of pvops hooks on the slow patch (long spin on lock, unlocking
a contended lock).
Ticket locks have a number of nice properties, but they also have some
surprising behaviours in virtual environments. They enforce a strict
FIFO ordering on cpus trying to take a lock; however, if the hypervisor
scheduler does not schedule the cpus in the correct order, the system can
waste a huge amount of time spinning until the next cpu can take the lock.
(See Thomas Friebel's talk "Prevent Guests from Spinning Around"
http://www.xen.org/files/xensummitboston08/LHP.pdf for more details.)
To address this, we add two hooks:
- __ticket_spin_lock which is called after the cpu has been
spinning on the lock for a significant number of iterations but has
failed to take the lock (presumably because the cpu holding the lock
has been descheduled). The lock_spinning pvop is expected to block
the cpu until it has been kicked by the current lock holder.
- __ticket_spin_unlock, which on releasing a contended lock
(there are more cpus with tail tickets), it looks to see if the next
cpu is blocked and wakes it if so.
When compiled with CONFIG_PARAVIRT_SPINLOCKS disabled, a set of stub
functions causes all the extra code to go away.
Results:
=======
setup: 32 core machine with 32 vcpu KVM guest (HT off) with 8GB RAM
base = 3.11-rc
patched = base + pvspinlock V12
+-----------------+----------------+--------+
dbench (Throughput in MB/sec. Higher is better)
+-----------------+----------------+--------+
| base (stdev %)|patched(stdev%) | %gain |
+-----------------+----------------+--------+
| 15035.3 (0.3) |15150.0 (0.6) | 0.8 |
| 1470.0 (2.2) | 1713.7 (1.9) | 16.6 |
| 848.6 (4.3) | 967.8 (4.3) | 14.0 |
| 652.9 (3.5) | 685.3 (3.7) | 5.0 |
+-----------------+----------------+--------+
pvspinlock shows benefits for overcommit ratio > 1 for PLE enabled cases,
and undercommits results are flat
Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Link: http://lkml.kernel.org/r/1376058122-8248-2-git-send-email-raghavendra.kt@linux.vnet.ibm.com
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Tested-by: Attilio Rao <attilio.rao@citrix.com>
[ Raghavendra: Changed SPIN_THRESHOLD, fixed redefinition of arch_spinlock_t]
Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/paravirt.h | 32 | ||||
-rw-r--r-- | arch/x86/include/asm/paravirt_types.h | 14 | ||||
-rw-r--r-- | arch/x86/include/asm/spinlock.h | 53 | ||||
-rw-r--r-- | arch/x86/include/asm/spinlock_types.h | 4 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt-spinlocks.c | 15 | ||||
-rw-r--r-- | arch/x86/xen/spinlock.c | 8 |
6 files changed, 65 insertions, 61 deletions
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index cfdc9ee4c900..040e72db5ea9 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -712,36 +712,16 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, | |||
712 | 712 | ||
713 | #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) | 713 | #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) |
714 | 714 | ||
715 | static inline int arch_spin_is_locked(struct arch_spinlock *lock) | 715 | static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock, |
716 | __ticket_t ticket) | ||
716 | { | 717 | { |
717 | return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock); | 718 | PVOP_VCALL2(pv_lock_ops.lock_spinning, lock, ticket); |
718 | } | 719 | } |
719 | 720 | ||
720 | static inline int arch_spin_is_contended(struct arch_spinlock *lock) | 721 | static __always_inline void ____ticket_unlock_kick(struct arch_spinlock *lock, |
722 | __ticket_t ticket) | ||
721 | { | 723 | { |
722 | return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); | 724 | PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket); |
723 | } | ||
724 | #define arch_spin_is_contended arch_spin_is_contended | ||
725 | |||
726 | static __always_inline void arch_spin_lock(struct arch_spinlock *lock) | ||
727 | { | ||
728 | PVOP_VCALL1(pv_lock_ops.spin_lock, lock); | ||
729 | } | ||
730 | |||
731 | static __always_inline void arch_spin_lock_flags(struct arch_spinlock *lock, | ||
732 | unsigned long flags) | ||
733 | { | ||
734 | PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags); | ||
735 | } | ||
736 | |||
737 | static __always_inline int arch_spin_trylock(struct arch_spinlock *lock) | ||
738 | { | ||
739 | return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock); | ||
740 | } | ||
741 | |||
742 | static __always_inline void arch_spin_unlock(struct arch_spinlock *lock) | ||
743 | { | ||
744 | PVOP_VCALL1(pv_lock_ops.spin_unlock, lock); | ||
745 | } | 725 | } |
746 | 726 | ||
747 | #endif | 727 | #endif |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 0db1fcac668c..346a07c1e71d 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -327,13 +327,15 @@ struct pv_mmu_ops { | |||
327 | }; | 327 | }; |
328 | 328 | ||
329 | struct arch_spinlock; | 329 | struct arch_spinlock; |
330 | #ifdef CONFIG_SMP | ||
331 | #include <asm/spinlock_types.h> | ||
332 | #else | ||
333 | typedef u16 __ticket_t; | ||
334 | #endif | ||
335 | |||
330 | struct pv_lock_ops { | 336 | struct pv_lock_ops { |
331 | int (*spin_is_locked)(struct arch_spinlock *lock); | 337 | void (*lock_spinning)(struct arch_spinlock *lock, __ticket_t ticket); |
332 | int (*spin_is_contended)(struct arch_spinlock *lock); | 338 | void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket); |
333 | void (*spin_lock)(struct arch_spinlock *lock); | ||
334 | void (*spin_lock_flags)(struct arch_spinlock *lock, unsigned long flags); | ||
335 | int (*spin_trylock)(struct arch_spinlock *lock); | ||
336 | void (*spin_unlock)(struct arch_spinlock *lock); | ||
337 | }; | 339 | }; |
338 | 340 | ||
339 | /* This contains all the paravirt structures: we get a convenient | 341 | /* This contains all the paravirt structures: we get a convenient |
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 33692eaabab5..4d542444bea3 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h | |||
@@ -34,6 +34,35 @@ | |||
34 | # define UNLOCK_LOCK_PREFIX | 34 | # define UNLOCK_LOCK_PREFIX |
35 | #endif | 35 | #endif |
36 | 36 | ||
37 | /* How long a lock should spin before we consider blocking */ | ||
38 | #define SPIN_THRESHOLD (1 << 15) | ||
39 | |||
40 | #ifndef CONFIG_PARAVIRT_SPINLOCKS | ||
41 | |||
42 | static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock, | ||
43 | __ticket_t ticket) | ||
44 | { | ||
45 | } | ||
46 | |||
47 | static __always_inline void ____ticket_unlock_kick(struct arch_spinlock *lock, | ||
48 | __ticket_t ticket) | ||
49 | { | ||
50 | } | ||
51 | |||
52 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | ||
53 | |||
54 | |||
55 | /* | ||
56 | * If a spinlock has someone waiting on it, then kick the appropriate | ||
57 | * waiting cpu. | ||
58 | */ | ||
59 | static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock, | ||
60 | __ticket_t next) | ||
61 | { | ||
62 | if (unlikely(lock->tickets.tail != next)) | ||
63 | ____ticket_unlock_kick(lock, next); | ||
64 | } | ||
65 | |||
37 | /* | 66 | /* |
38 | * Ticket locks are conceptually two parts, one indicating the current head of | 67 | * Ticket locks are conceptually two parts, one indicating the current head of |
39 | * the queue, and the other indicating the current tail. The lock is acquired | 68 | * the queue, and the other indicating the current tail. The lock is acquired |
@@ -47,19 +76,24 @@ | |||
47 | * in the high part, because a wide xadd increment of the low part would carry | 76 | * in the high part, because a wide xadd increment of the low part would carry |
48 | * up and contaminate the high part. | 77 | * up and contaminate the high part. |
49 | */ | 78 | */ |
50 | static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) | 79 | static __always_inline void __ticket_spin_lock(struct arch_spinlock *lock) |
51 | { | 80 | { |
52 | register struct __raw_tickets inc = { .tail = 1 }; | 81 | register struct __raw_tickets inc = { .tail = 1 }; |
53 | 82 | ||
54 | inc = xadd(&lock->tickets, inc); | 83 | inc = xadd(&lock->tickets, inc); |
55 | 84 | ||
56 | for (;;) { | 85 | for (;;) { |
57 | if (inc.head == inc.tail) | 86 | unsigned count = SPIN_THRESHOLD; |
58 | break; | 87 | |
59 | cpu_relax(); | 88 | do { |
60 | inc.head = ACCESS_ONCE(lock->tickets.head); | 89 | if (inc.head == inc.tail) |
90 | goto out; | ||
91 | cpu_relax(); | ||
92 | inc.head = ACCESS_ONCE(lock->tickets.head); | ||
93 | } while (--count); | ||
94 | __ticket_lock_spinning(lock, inc.tail); | ||
61 | } | 95 | } |
62 | barrier(); /* make sure nothing creeps before the lock is taken */ | 96 | out: barrier(); /* make sure nothing creeps before the lock is taken */ |
63 | } | 97 | } |
64 | 98 | ||
65 | static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) | 99 | static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) |
@@ -78,7 +112,10 @@ static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) | |||
78 | 112 | ||
79 | static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) | 113 | static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) |
80 | { | 114 | { |
115 | __ticket_t next = lock->tickets.head + 1; | ||
116 | |||
81 | __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX); | 117 | __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX); |
118 | __ticket_unlock_kick(lock, next); | ||
82 | } | 119 | } |
83 | 120 | ||
84 | static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) | 121 | static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) |
@@ -95,8 +132,6 @@ static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) | |||
95 | return (__ticket_t)(tmp.tail - tmp.head) > 1; | 132 | return (__ticket_t)(tmp.tail - tmp.head) > 1; |
96 | } | 133 | } |
97 | 134 | ||
98 | #ifndef CONFIG_PARAVIRT_SPINLOCKS | ||
99 | |||
100 | static inline int arch_spin_is_locked(arch_spinlock_t *lock) | 135 | static inline int arch_spin_is_locked(arch_spinlock_t *lock) |
101 | { | 136 | { |
102 | return __ticket_spin_is_locked(lock); | 137 | return __ticket_spin_is_locked(lock); |
@@ -129,8 +164,6 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, | |||
129 | arch_spin_lock(lock); | 164 | arch_spin_lock(lock); |
130 | } | 165 | } |
131 | 166 | ||
132 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | ||
133 | |||
134 | static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) | 167 | static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) |
135 | { | 168 | { |
136 | while (arch_spin_is_locked(lock)) | 169 | while (arch_spin_is_locked(lock)) |
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index ad0ad07fc006..83fd3c75d45c 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h | |||
@@ -1,10 +1,6 @@ | |||
1 | #ifndef _ASM_X86_SPINLOCK_TYPES_H | 1 | #ifndef _ASM_X86_SPINLOCK_TYPES_H |
2 | #define _ASM_X86_SPINLOCK_TYPES_H | 2 | #define _ASM_X86_SPINLOCK_TYPES_H |
3 | 3 | ||
4 | #ifndef __LINUX_SPINLOCK_TYPES_H | ||
5 | # error "please don't include this file directly" | ||
6 | #endif | ||
7 | |||
8 | #include <linux/types.h> | 4 | #include <linux/types.h> |
9 | 5 | ||
10 | #if (CONFIG_NR_CPUS < 256) | 6 | #if (CONFIG_NR_CPUS < 256) |
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 676b8c77a976..c2e010e5fbce 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c | |||
@@ -7,21 +7,10 @@ | |||
7 | 7 | ||
8 | #include <asm/paravirt.h> | 8 | #include <asm/paravirt.h> |
9 | 9 | ||
10 | static inline void | ||
11 | default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) | ||
12 | { | ||
13 | arch_spin_lock(lock); | ||
14 | } | ||
15 | |||
16 | struct pv_lock_ops pv_lock_ops = { | 10 | struct pv_lock_ops pv_lock_ops = { |
17 | #ifdef CONFIG_SMP | 11 | #ifdef CONFIG_SMP |
18 | .spin_is_locked = __ticket_spin_is_locked, | 12 | .lock_spinning = paravirt_nop, |
19 | .spin_is_contended = __ticket_spin_is_contended, | 13 | .unlock_kick = paravirt_nop, |
20 | |||
21 | .spin_lock = __ticket_spin_lock, | ||
22 | .spin_lock_flags = default_spin_lock_flags, | ||
23 | .spin_trylock = __ticket_spin_trylock, | ||
24 | .spin_unlock = __ticket_spin_unlock, | ||
25 | #endif | 14 | #endif |
26 | }; | 15 | }; |
27 | EXPORT_SYMBOL(pv_lock_ops); | 16 | EXPORT_SYMBOL(pv_lock_ops); |
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index cf3caee356b3..d50962936af4 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c | |||
@@ -139,6 +139,9 @@ struct xen_spinlock { | |||
139 | xen_spinners_t spinners; /* count of waiting cpus */ | 139 | xen_spinners_t spinners; /* count of waiting cpus */ |
140 | }; | 140 | }; |
141 | 141 | ||
142 | static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; | ||
143 | |||
144 | #if 0 | ||
142 | static int xen_spin_is_locked(struct arch_spinlock *lock) | 145 | static int xen_spin_is_locked(struct arch_spinlock *lock) |
143 | { | 146 | { |
144 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | 147 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; |
@@ -167,7 +170,6 @@ static int xen_spin_trylock(struct arch_spinlock *lock) | |||
167 | } | 170 | } |
168 | 171 | ||
169 | static DEFINE_PER_CPU(char *, irq_name); | 172 | static DEFINE_PER_CPU(char *, irq_name); |
170 | static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; | ||
171 | static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); | 173 | static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); |
172 | 174 | ||
173 | /* | 175 | /* |
@@ -354,6 +356,7 @@ static void xen_spin_unlock(struct arch_spinlock *lock) | |||
354 | if (unlikely(xl->spinners)) | 356 | if (unlikely(xl->spinners)) |
355 | xen_spin_unlock_slow(xl); | 357 | xen_spin_unlock_slow(xl); |
356 | } | 358 | } |
359 | #endif | ||
357 | 360 | ||
358 | static irqreturn_t dummy_handler(int irq, void *dev_id) | 361 | static irqreturn_t dummy_handler(int irq, void *dev_id) |
359 | { | 362 | { |
@@ -418,13 +421,14 @@ void __init xen_init_spinlocks(void) | |||
418 | return; | 421 | return; |
419 | 422 | ||
420 | BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t)); | 423 | BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t)); |
421 | 424 | #if 0 | |
422 | pv_lock_ops.spin_is_locked = xen_spin_is_locked; | 425 | pv_lock_ops.spin_is_locked = xen_spin_is_locked; |
423 | pv_lock_ops.spin_is_contended = xen_spin_is_contended; | 426 | pv_lock_ops.spin_is_contended = xen_spin_is_contended; |
424 | pv_lock_ops.spin_lock = xen_spin_lock; | 427 | pv_lock_ops.spin_lock = xen_spin_lock; |
425 | pv_lock_ops.spin_lock_flags = xen_spin_lock_flags; | 428 | pv_lock_ops.spin_lock_flags = xen_spin_lock_flags; |
426 | pv_lock_ops.spin_trylock = xen_spin_trylock; | 429 | pv_lock_ops.spin_trylock = xen_spin_trylock; |
427 | pv_lock_ops.spin_unlock = xen_spin_unlock; | 430 | pv_lock_ops.spin_unlock = xen_spin_unlock; |
431 | #endif | ||
428 | } | 432 | } |
429 | 433 | ||
430 | #ifdef CONFIG_XEN_DEBUG_FS | 434 | #ifdef CONFIG_XEN_DEBUG_FS |