aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@goop.org>2008-07-07 15:07:50 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-16 05:15:52 -0400
commit74d4affde8feb8d5bdebf7fba8e90e4eae3b7b1d (patch)
treeea70d2323c8a424e8c20389514c6c91f149cdf72 /include
parent094029479be8eb380447f42eff1b35362ef1a464 (diff)
x86/paravirt: add hooks for spinlock operations
Ticket spinlocks have absolutely ghastly worst-case performance characteristics in a virtual environment. If there is any contention for physical CPUs (ie, there are more runnable vcpus than cpus), then ticket locks can cause the system to end up spending 90+% of its time spinning. The problem is that (v)cpus waiting on a ticket spinlock will be granted access to the lock in strict order they got their tickets. If the hypervisor scheduler doesn't give the vcpus time in that order, they will burn timeslices waiting for the scheduler to give the right vcpu some time. In the worst case it could take O(n^2) vcpu scheduler timeslices for everyone waiting on the lock to get it, not counting new cpus trying to take the lock while the log-jam is sorted out. These hooks allow a paravirt backend to replace the spinlock implementation. At the very least, this could revert the implementation back to the old lock algorithm, which allows the next scheduled vcpu to take the lock, and has basically fairly good performance. It also allows the spinlocks to take advantages of the hypervisor features to make locks more efficient (spin and block, for example). The cost to native execution is an extra direct call when using a spinlock function. There's no overhead if CONFIG_PARAVIRT is turned off. The lock structure is fixed at a single "unsigned int", initialized to zero, but the spinlock implementation can use it as it wishes. Thanks to Thomas Friebel's Xen Summit talk "Preventing Guests from Spinning Around" for pointing out this problem. Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Christoph Lameter <clameter@linux-foundation.org> Cc: Petr Tesarik <ptesarik@suse.cz> Cc: Virtualization <virtualization@lists.linux-foundation.org> Cc: Xen devel <xen-devel@lists.xensource.com> Cc: Thomas Friebel <thomas.friebel@amd.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'include')
-rw-r--r--include/asm-x86/paravirt.h37
-rw-r--r--include/asm-x86/spinlock.h55
-rw-r--r--include/asm-x86/spinlock_types.h2
3 files changed, 78 insertions, 16 deletions
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index eef8095a09dc..feb6bb66c5e2 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -326,6 +326,15 @@ struct pv_mmu_ops {
326 unsigned long phys, pgprot_t flags); 326 unsigned long phys, pgprot_t flags);
327}; 327};
328 328
329struct raw_spinlock;
330struct pv_lock_ops {
331 int (*spin_is_locked)(struct raw_spinlock *lock);
332 int (*spin_is_contended)(struct raw_spinlock *lock);
333 void (*spin_lock)(struct raw_spinlock *lock);
334 int (*spin_trylock)(struct raw_spinlock *lock);
335 void (*spin_unlock)(struct raw_spinlock *lock);
336};
337
329/* This contains all the paravirt structures: we get a convenient 338/* This contains all the paravirt structures: we get a convenient
330 * number for each function using the offset which we use to indicate 339 * number for each function using the offset which we use to indicate
331 * what to patch. */ 340 * what to patch. */
@@ -336,6 +345,7 @@ struct paravirt_patch_template {
336 struct pv_irq_ops pv_irq_ops; 345 struct pv_irq_ops pv_irq_ops;
337 struct pv_apic_ops pv_apic_ops; 346 struct pv_apic_ops pv_apic_ops;
338 struct pv_mmu_ops pv_mmu_ops; 347 struct pv_mmu_ops pv_mmu_ops;
348 struct pv_lock_ops pv_lock_ops;
339}; 349};
340 350
341extern struct pv_info pv_info; 351extern struct pv_info pv_info;
@@ -345,6 +355,7 @@ extern struct pv_cpu_ops pv_cpu_ops;
345extern struct pv_irq_ops pv_irq_ops; 355extern struct pv_irq_ops pv_irq_ops;
346extern struct pv_apic_ops pv_apic_ops; 356extern struct pv_apic_ops pv_apic_ops;
347extern struct pv_mmu_ops pv_mmu_ops; 357extern struct pv_mmu_ops pv_mmu_ops;
358extern struct pv_lock_ops pv_lock_ops;
348 359
349#define PARAVIRT_PATCH(x) \ 360#define PARAVIRT_PATCH(x) \
350 (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) 361 (offsetof(struct paravirt_patch_template, x) / sizeof(void *))
@@ -1374,6 +1385,31 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
1374void _paravirt_nop(void); 1385void _paravirt_nop(void);
1375#define paravirt_nop ((void *)_paravirt_nop) 1386#define paravirt_nop ((void *)_paravirt_nop)
1376 1387
1388static inline int __raw_spin_is_locked(struct raw_spinlock *lock)
1389{
1390 return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock);
1391}
1392
1393static inline int __raw_spin_is_contended(struct raw_spinlock *lock)
1394{
1395 return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock);
1396}
1397
1398static __always_inline void __raw_spin_lock(struct raw_spinlock *lock)
1399{
1400 return PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
1401}
1402
1403static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock)
1404{
1405 return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
1406}
1407
1408static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
1409{
1410 return PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
1411}
1412
1377/* These all sit in the .parainstructions section to tell us what to patch. */ 1413/* These all sit in the .parainstructions section to tell us what to patch. */
1378struct paravirt_patch_site { 1414struct paravirt_patch_site {
1379 u8 *instr; /* original instructions */ 1415 u8 *instr; /* original instructions */
@@ -1458,6 +1494,7 @@ static inline unsigned long __raw_local_irq_save(void)
1458 return f; 1494 return f;
1459} 1495}
1460 1496
1497
1461/* Make sure as little as possible of this mess escapes. */ 1498/* Make sure as little as possible of this mess escapes. */
1462#undef PARAVIRT_CALL 1499#undef PARAVIRT_CALL
1463#undef __PVOP_CALL 1500#undef __PVOP_CALL
diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h
index 21e89bf92f1c..9726144cdaba 100644
--- a/include/asm-x86/spinlock.h
+++ b/include/asm-x86/spinlock.h
@@ -6,7 +6,7 @@
6#include <asm/page.h> 6#include <asm/page.h>
7#include <asm/processor.h> 7#include <asm/processor.h>
8#include <linux/compiler.h> 8#include <linux/compiler.h>
9 9#include <asm/paravirt.h>
10/* 10/*
11 * Your basic SMP spinlocks, allowing only a single CPU anywhere 11 * Your basic SMP spinlocks, allowing only a single CPU anywhere
12 * 12 *
@@ -54,21 +54,21 @@
54 * much between them in performance though, especially as locks are out of line. 54 * much between them in performance though, especially as locks are out of line.
55 */ 55 */
56#if (NR_CPUS < 256) 56#if (NR_CPUS < 256)
57static inline int __raw_spin_is_locked(raw_spinlock_t *lock) 57static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
58{ 58{
59 int tmp = ACCESS_ONCE(lock->slock); 59 int tmp = ACCESS_ONCE(lock->slock);
60 60
61 return (((tmp >> 8) & 0xff) != (tmp & 0xff)); 61 return (((tmp >> 8) & 0xff) != (tmp & 0xff));
62} 62}
63 63
64static inline int __raw_spin_is_contended(raw_spinlock_t *lock) 64static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
65{ 65{
66 int tmp = ACCESS_ONCE(lock->slock); 66 int tmp = ACCESS_ONCE(lock->slock);
67 67
68 return (((tmp >> 8) & 0xff) - (tmp & 0xff)) > 1; 68 return (((tmp >> 8) & 0xff) - (tmp & 0xff)) > 1;
69} 69}
70 70
71static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) 71static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
72{ 72{
73 short inc = 0x0100; 73 short inc = 0x0100;
74 74
@@ -87,9 +87,7 @@ static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
87 : "memory", "cc"); 87 : "memory", "cc");
88} 88}
89 89
90#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) 90static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
91
92static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
93{ 91{
94 int tmp; 92 int tmp;
95 short new; 93 short new;
@@ -110,7 +108,7 @@ static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
110 return tmp; 108 return tmp;
111} 109}
112 110
113static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock) 111static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
114{ 112{
115 asm volatile(UNLOCK_LOCK_PREFIX "incb %0" 113 asm volatile(UNLOCK_LOCK_PREFIX "incb %0"
116 : "+m" (lock->slock) 114 : "+m" (lock->slock)
@@ -118,21 +116,21 @@ static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
118 : "memory", "cc"); 116 : "memory", "cc");
119} 117}
120#else 118#else
121static inline int __raw_spin_is_locked(raw_spinlock_t *lock) 119static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
122{ 120{
123 int tmp = ACCESS_ONCE(lock->slock); 121 int tmp = ACCESS_ONCE(lock->slock);
124 122
125 return (((tmp >> 16) & 0xffff) != (tmp & 0xffff)); 123 return (((tmp >> 16) & 0xffff) != (tmp & 0xffff));
126} 124}
127 125
128static inline int __raw_spin_is_contended(raw_spinlock_t *lock) 126static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
129{ 127{
130 int tmp = ACCESS_ONCE(lock->slock); 128 int tmp = ACCESS_ONCE(lock->slock);
131 129
132 return (((tmp >> 16) & 0xffff) - (tmp & 0xffff)) > 1; 130 return (((tmp >> 16) & 0xffff) - (tmp & 0xffff)) > 1;
133} 131}
134 132
135static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) 133static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
136{ 134{
137 int inc = 0x00010000; 135 int inc = 0x00010000;
138 int tmp; 136 int tmp;
@@ -153,9 +151,7 @@ static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
153 : "memory", "cc"); 151 : "memory", "cc");
154} 152}
155 153
156#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) 154static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
157
158static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
159{ 155{
160 int tmp; 156 int tmp;
161 int new; 157 int new;
@@ -177,7 +173,7 @@ static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
177 return tmp; 173 return tmp;
178} 174}
179 175
180static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock) 176static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
181{ 177{
182 asm volatile(UNLOCK_LOCK_PREFIX "incw %0" 178 asm volatile(UNLOCK_LOCK_PREFIX "incw %0"
183 : "+m" (lock->slock) 179 : "+m" (lock->slock)
@@ -186,6 +182,35 @@ static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
186} 182}
187#endif 183#endif
188 184
185#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
186
187#ifndef CONFIG_PARAVIRT
188static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
189{
190 return __ticket_spin_is_locked(lock);
191}
192
193static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
194{
195 return __ticket_spin_is_contended(lock);
196}
197
198static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
199{
200 __ticket_spin_lock(lock);
201}
202
203static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
204{
205 return __ticket_spin_trylock(lock);
206}
207
208static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
209{
210 __ticket_spin_unlock(lock);
211}
212#endif /* CONFIG_PARAVIRT */
213
189static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) 214static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
190{ 215{
191 while (__raw_spin_is_locked(lock)) 216 while (__raw_spin_is_locked(lock))
diff --git a/include/asm-x86/spinlock_types.h b/include/asm-x86/spinlock_types.h
index 9029cf78cf5d..06c071c9eee9 100644
--- a/include/asm-x86/spinlock_types.h
+++ b/include/asm-x86/spinlock_types.h
@@ -5,7 +5,7 @@
5# error "please don't include this file directly" 5# error "please don't include this file directly"
6#endif 6#endif
7 7
8typedef struct { 8typedef struct raw_spinlock {
9 unsigned int slock; 9 unsigned int slock;
10} raw_spinlock_t; 10} raw_spinlock_t;
11 11