aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen/spinlock.c
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@goop.org>2013-08-09 10:21:53 -0400
committerH. Peter Anvin <hpa@linux.intel.com>2013-08-09 10:53:23 -0400
commit80bd58fef495d000a02fc5b55ca76d423400e748 (patch)
tree5bebcd10517b355fc23db96603965f0d54152d40 /arch/x86/xen/spinlock.c
parentbf7aab3ad4b4364a293421d628a912a2153ee1ee (diff)
xen, pvticketlock: Xen implementation for PV ticket locks
Replace the old Xen implementation of PV spinlocks with and implementation of xen_lock_spinning and xen_unlock_kick. xen_lock_spinning simply registers the cpu in its entry in lock_waiting, adds itself to the waiting_cpus set, and blocks on an event channel until the channel becomes pending. xen_unlock_kick searches the cpus in waiting_cpus looking for the one which next wants this lock with the next ticket, if any. If found, it kicks it by making its event channel pending, which wakes it up. We need to make sure interrupts are disabled while we're relying on the contents of the per-cpu lock_waiting values, otherwise an interrupt handler could come in, try to take some other lock, block, and overwrite our values. Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org> Link: http://lkml.kernel.org/r/1376058122-8248-6-git-send-email-raghavendra.kt@linux.vnet.ibm.com Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> [ Raghavendra: use function + enum instead of macro, cmpxchg for zero status reset Reintroduce break since we know the exact vCPU to send IPI as suggested by Konrad.] Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com> Acked-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/xen/spinlock.c')
-rw-r--r--arch/x86/xen/spinlock.c348
1 files changed, 79 insertions, 269 deletions
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index d50962936af4..a458729be25f 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -17,45 +17,44 @@
17#include "xen-ops.h" 17#include "xen-ops.h"
18#include "debugfs.h" 18#include "debugfs.h"
19 19
20#ifdef CONFIG_XEN_DEBUG_FS 20enum xen_contention_stat {
21static struct xen_spinlock_stats 21 TAKEN_SLOW,
22{ 22 TAKEN_SLOW_PICKUP,
23 u64 taken; 23 TAKEN_SLOW_SPURIOUS,
24 u32 taken_slow; 24 RELEASED_SLOW,
25 u32 taken_slow_nested; 25 RELEASED_SLOW_KICKED,
26 u32 taken_slow_pickup; 26 NR_CONTENTION_STATS
27 u32 taken_slow_spurious; 27};
28 u32 taken_slow_irqenable;
29 28
30 u64 released;
31 u32 released_slow;
32 u32 released_slow_kicked;
33 29
30#ifdef CONFIG_XEN_DEBUG_FS
34#define HISTO_BUCKETS 30 31#define HISTO_BUCKETS 30
35 u32 histo_spin_total[HISTO_BUCKETS+1]; 32static struct xen_spinlock_stats
36 u32 histo_spin_spinning[HISTO_BUCKETS+1]; 33{
34 u32 contention_stats[NR_CONTENTION_STATS];
37 u32 histo_spin_blocked[HISTO_BUCKETS+1]; 35 u32 histo_spin_blocked[HISTO_BUCKETS+1];
38
39 u64 time_total;
40 u64 time_spinning;
41 u64 time_blocked; 36 u64 time_blocked;
42} spinlock_stats; 37} spinlock_stats;
43 38
44static u8 zero_stats; 39static u8 zero_stats;
45 40
46static unsigned lock_timeout = 1 << 10;
47#define TIMEOUT lock_timeout
48
49static inline void check_zero(void) 41static inline void check_zero(void)
50{ 42{
51 if (unlikely(zero_stats)) { 43 u8 ret;
52 memset(&spinlock_stats, 0, sizeof(spinlock_stats)); 44 u8 old = ACCESS_ONCE(zero_stats);
53 zero_stats = 0; 45 if (unlikely(old)) {
46 ret = cmpxchg(&zero_stats, old, 0);
47 /* This ensures only one fellow resets the stat */
48 if (ret == old)
49 memset(&spinlock_stats, 0, sizeof(spinlock_stats));
54 } 50 }
55} 51}
56 52
57#define ADD_STATS(elem, val) \ 53static inline void add_stats(enum xen_contention_stat var, u32 val)
58 do { check_zero(); spinlock_stats.elem += (val); } while(0) 54{
55 check_zero();
56 spinlock_stats.contention_stats[var] += val;
57}
59 58
60static inline u64 spin_time_start(void) 59static inline u64 spin_time_start(void)
61{ 60{
@@ -74,22 +73,6 @@ static void __spin_time_accum(u64 delta, u32 *array)
74 array[HISTO_BUCKETS]++; 73 array[HISTO_BUCKETS]++;
75} 74}
76 75
77static inline void spin_time_accum_spinning(u64 start)
78{
79 u32 delta = xen_clocksource_read() - start;
80
81 __spin_time_accum(delta, spinlock_stats.histo_spin_spinning);
82 spinlock_stats.time_spinning += delta;
83}
84
85static inline void spin_time_accum_total(u64 start)
86{
87 u32 delta = xen_clocksource_read() - start;
88
89 __spin_time_accum(delta, spinlock_stats.histo_spin_total);
90 spinlock_stats.time_total += delta;
91}
92
93static inline void spin_time_accum_blocked(u64 start) 76static inline void spin_time_accum_blocked(u64 start)
94{ 77{
95 u32 delta = xen_clocksource_read() - start; 78 u32 delta = xen_clocksource_read() - start;
@@ -99,19 +82,15 @@ static inline void spin_time_accum_blocked(u64 start)
99} 82}
100#else /* !CONFIG_XEN_DEBUG_FS */ 83#else /* !CONFIG_XEN_DEBUG_FS */
101#define TIMEOUT (1 << 10) 84#define TIMEOUT (1 << 10)
102#define ADD_STATS(elem, val) do { (void)(val); } while(0) 85static inline void add_stats(enum xen_contention_stat var, u32 val)
86{
87}
103 88
104static inline u64 spin_time_start(void) 89static inline u64 spin_time_start(void)
105{ 90{
106 return 0; 91 return 0;
107} 92}
108 93
109static inline void spin_time_accum_total(u64 start)
110{
111}
112static inline void spin_time_accum_spinning(u64 start)
113{
114}
115static inline void spin_time_accum_blocked(u64 start) 94static inline void spin_time_accum_blocked(u64 start)
116{ 95{
117} 96}
@@ -134,230 +113,84 @@ typedef u16 xen_spinners_t;
134 asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory"); 113 asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory");
135#endif 114#endif
136 115
137struct xen_spinlock { 116struct xen_lock_waiting {
138 unsigned char lock; /* 0 -> free; 1 -> locked */ 117 struct arch_spinlock *lock;
139 xen_spinners_t spinners; /* count of waiting cpus */ 118 __ticket_t want;
140}; 119};
141 120
142static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; 121static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
143
144#if 0
145static int xen_spin_is_locked(struct arch_spinlock *lock)
146{
147 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
148
149 return xl->lock != 0;
150}
151
152static int xen_spin_is_contended(struct arch_spinlock *lock)
153{
154 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
155
156 /* Not strictly true; this is only the count of contended
157 lock-takers entering the slow path. */
158 return xl->spinners != 0;
159}
160
161static int xen_spin_trylock(struct arch_spinlock *lock)
162{
163 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
164 u8 old = 1;
165
166 asm("xchgb %b0,%1"
167 : "+q" (old), "+m" (xl->lock) : : "memory");
168
169 return old == 0;
170}
171
172static DEFINE_PER_CPU(char *, irq_name); 122static DEFINE_PER_CPU(char *, irq_name);
173static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); 123static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
124static cpumask_t waiting_cpus;
174 125
175/* 126static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
176 * Mark a cpu as interested in a lock. Returns the CPU's previous
177 * lock of interest, in case we got preempted by an interrupt.
178 */
179static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl)
180{ 127{
181 struct xen_spinlock *prev;
182
183 prev = __this_cpu_read(lock_spinners);
184 __this_cpu_write(lock_spinners, xl);
185
186 wmb(); /* set lock of interest before count */
187
188 inc_spinners(xl);
189
190 return prev;
191}
192
193/*
194 * Mark a cpu as no longer interested in a lock. Restores previous
195 * lock of interest (NULL for none).
196 */
197static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev)
198{
199 dec_spinners(xl);
200 wmb(); /* decrement count before restoring lock */
201 __this_cpu_write(lock_spinners, prev);
202}
203
204static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable)
205{
206 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
207 struct xen_spinlock *prev;
208 int irq = __this_cpu_read(lock_kicker_irq); 128 int irq = __this_cpu_read(lock_kicker_irq);
209 int ret; 129 struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting);
130 int cpu = smp_processor_id();
210 u64 start; 131 u64 start;
132 unsigned long flags;
211 133
212 /* If kicker interrupts not initialized yet, just spin */ 134 /* If kicker interrupts not initialized yet, just spin */
213 if (irq == -1) 135 if (irq == -1)
214 return 0; 136 return;
215 137
216 start = spin_time_start(); 138 start = spin_time_start();
217 139
218 /* announce we're spinning */ 140 /*
219 prev = spinning_lock(xl); 141 * Make sure an interrupt handler can't upset things in a
220 142 * partially setup state.
221 ADD_STATS(taken_slow, 1); 143 */
222 ADD_STATS(taken_slow_nested, prev != NULL); 144 local_irq_save(flags);
223
224 do {
225 unsigned long flags;
226
227 /* clear pending */
228 xen_clear_irq_pending(irq);
229
230 /* check again make sure it didn't become free while
231 we weren't looking */
232 ret = xen_spin_trylock(lock);
233 if (ret) {
234 ADD_STATS(taken_slow_pickup, 1);
235
236 /*
237 * If we interrupted another spinlock while it
238 * was blocking, make sure it doesn't block
239 * without rechecking the lock.
240 */
241 if (prev != NULL)
242 xen_set_irq_pending(irq);
243 goto out;
244 }
245 145
246 flags = arch_local_save_flags(); 146 w->want = want;
247 if (irq_enable) { 147 smp_wmb();
248 ADD_STATS(taken_slow_irqenable, 1); 148 w->lock = lock;
249 raw_local_irq_enable();
250 }
251 149
252 /* 150 /* This uses set_bit, which atomic and therefore a barrier */
253 * Block until irq becomes pending. If we're 151 cpumask_set_cpu(cpu, &waiting_cpus);
254 * interrupted at this point (after the trylock but 152 add_stats(TAKEN_SLOW, 1);
255 * before entering the block), then the nested lock
256 * handler guarantees that the irq will be left
257 * pending if there's any chance the lock became free;
258 * xen_poll_irq() returns immediately if the irq is
259 * pending.
260 */
261 xen_poll_irq(irq);
262 153
263 raw_local_irq_restore(flags); 154 /* clear pending */
155 xen_clear_irq_pending(irq);
264 156
265 ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); 157 /* Only check lock once pending cleared */
266 } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ 158 barrier();
267 159
160 /* check again make sure it didn't become free while
161 we weren't looking */
162 if (ACCESS_ONCE(lock->tickets.head) == want) {
163 add_stats(TAKEN_SLOW_PICKUP, 1);
164 goto out;
165 }
166 /* Block until irq becomes pending (or perhaps a spurious wakeup) */
167 xen_poll_irq(irq);
168 add_stats(TAKEN_SLOW_SPURIOUS, !xen_test_irq_pending(irq));
268 kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); 169 kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
269
270out: 170out:
271 unspinning_lock(xl, prev); 171 cpumask_clear_cpu(cpu, &waiting_cpus);
172 w->lock = NULL;
173 local_irq_restore(flags);
272 spin_time_accum_blocked(start); 174 spin_time_accum_blocked(start);
273
274 return ret;
275} 175}
276 176
277static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable) 177static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
278{
279 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
280 unsigned timeout;
281 u8 oldval;
282 u64 start_spin;
283
284 ADD_STATS(taken, 1);
285
286 start_spin = spin_time_start();
287
288 do {
289 u64 start_spin_fast = spin_time_start();
290
291 timeout = TIMEOUT;
292
293 asm("1: xchgb %1,%0\n"
294 " testb %1,%1\n"
295 " jz 3f\n"
296 "2: rep;nop\n"
297 " cmpb $0,%0\n"
298 " je 1b\n"
299 " dec %2\n"
300 " jnz 2b\n"
301 "3:\n"
302 : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
303 : "1" (1)
304 : "memory");
305
306 spin_time_accum_spinning(start_spin_fast);
307
308 } while (unlikely(oldval != 0 &&
309 (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable))));
310
311 spin_time_accum_total(start_spin);
312}
313
314static void xen_spin_lock(struct arch_spinlock *lock)
315{
316 __xen_spin_lock(lock, false);
317}
318
319static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags)
320{
321 __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags));
322}
323
324static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
325{ 178{
326 int cpu; 179 int cpu;
327 180
328 ADD_STATS(released_slow, 1); 181 add_stats(RELEASED_SLOW, 1);
182
183 for_each_cpu(cpu, &waiting_cpus) {
184 const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu);
329 185
330 for_each_online_cpu(cpu) { 186 if (w->lock == lock && w->want == next) {
331 /* XXX should mix up next cpu selection */ 187 add_stats(RELEASED_SLOW_KICKED, 1);
332 if (per_cpu(lock_spinners, cpu) == xl) {
333 ADD_STATS(released_slow_kicked, 1);
334 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); 188 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
189 break;
335 } 190 }
336 } 191 }
337} 192}
338 193
339static void xen_spin_unlock(struct arch_spinlock *lock)
340{
341 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
342
343 ADD_STATS(released, 1);
344
345 smp_wmb(); /* make sure no writes get moved after unlock */
346 xl->lock = 0; /* release lock */
347
348 /*
349 * Make sure unlock happens before checking for waiting
350 * spinners. We need a strong barrier to enforce the
351 * write-read ordering to different memory locations, as the
352 * CPU makes no implied guarantees about their ordering.
353 */
354 mb();
355
356 if (unlikely(xl->spinners))
357 xen_spin_unlock_slow(xl);
358}
359#endif
360
361static irqreturn_t dummy_handler(int irq, void *dev_id) 194static irqreturn_t dummy_handler(int irq, void *dev_id)
362{ 195{
363 BUG(); 196 BUG();
@@ -420,15 +253,8 @@ void __init xen_init_spinlocks(void)
420 if (xen_hvm_domain()) 253 if (xen_hvm_domain())
421 return; 254 return;
422 255
423 BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t)); 256 pv_lock_ops.lock_spinning = xen_lock_spinning;
424#if 0 257 pv_lock_ops.unlock_kick = xen_unlock_kick;
425 pv_lock_ops.spin_is_locked = xen_spin_is_locked;
426 pv_lock_ops.spin_is_contended = xen_spin_is_contended;
427 pv_lock_ops.spin_lock = xen_spin_lock;
428 pv_lock_ops.spin_lock_flags = xen_spin_lock_flags;
429 pv_lock_ops.spin_trylock = xen_spin_trylock;
430 pv_lock_ops.spin_unlock = xen_spin_unlock;
431#endif
432} 258}
433 259
434#ifdef CONFIG_XEN_DEBUG_FS 260#ifdef CONFIG_XEN_DEBUG_FS
@@ -446,37 +272,21 @@ static int __init xen_spinlock_debugfs(void)
446 272
447 debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); 273 debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
448 274
449 debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout);
450
451 debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken);
452 debugfs_create_u32("taken_slow", 0444, d_spin_debug, 275 debugfs_create_u32("taken_slow", 0444, d_spin_debug,
453 &spinlock_stats.taken_slow); 276 &spinlock_stats.contention_stats[TAKEN_SLOW]);
454 debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug,
455 &spinlock_stats.taken_slow_nested);
456 debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, 277 debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
457 &spinlock_stats.taken_slow_pickup); 278 &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
458 debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, 279 debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug,
459 &spinlock_stats.taken_slow_spurious); 280 &spinlock_stats.contention_stats[TAKEN_SLOW_SPURIOUS]);
460 debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug,
461 &spinlock_stats.taken_slow_irqenable);
462 281
463 debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released);
464 debugfs_create_u32("released_slow", 0444, d_spin_debug, 282 debugfs_create_u32("released_slow", 0444, d_spin_debug,
465 &spinlock_stats.released_slow); 283 &spinlock_stats.contention_stats[RELEASED_SLOW]);
466 debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, 284 debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
467 &spinlock_stats.released_slow_kicked); 285 &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
468 286
469 debugfs_create_u64("time_spinning", 0444, d_spin_debug,
470 &spinlock_stats.time_spinning);
471 debugfs_create_u64("time_blocked", 0444, d_spin_debug, 287 debugfs_create_u64("time_blocked", 0444, d_spin_debug,
472 &spinlock_stats.time_blocked); 288 &spinlock_stats.time_blocked);
473 debugfs_create_u64("time_total", 0444, d_spin_debug,
474 &spinlock_stats.time_total);
475 289
476 debugfs_create_u32_array("histo_total", 0444, d_spin_debug,
477 spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1);
478 debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug,
479 spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1);
480 debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, 290 debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
481 spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); 291 spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
482 292