aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@goop.org>2008-07-07 15:07:53 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-16 05:15:53 -0400
commit2d9e1e2f58b5612aa4eab0ab54c84308a29dbd79 (patch)
tree08e91f7f485a8fa8ec2515f8c706870c0d44044b
parent56397f8dadb40055479a8ffff23f21a890098a31 (diff)
xen: implement Xen-specific spinlocks
The standard ticket spinlocks are very expensive in a virtual environment, because their performance depends on Xen's scheduler giving vcpus time in the order that they're supposed to take the spinlock. This implements a Xen-specific spinlock, which should be much more efficient. The fast-path is essentially the old Linux-x86 locks, using a single lock byte. The locker decrements the byte; if the result is 0, then they have the lock. If the lock is negative, then locker must spin until the lock is positive again. When there's contention, the locker spin for 2^16[*] iterations waiting to get the lock. If it fails to get the lock in that time, it adds itself to the contention count in the lock and blocks on a per-cpu event channel. When unlocking the spinlock, the locker looks to see if there's anyone blocked waiting for the lock by checking for a non-zero waiter count. If there's a waiter, it traverses the per-cpu "lock_spinners" variable, which contains which lock each CPU is waiting on. It picks one CPU waiting on the lock and sends it an event to wake it up. This allows efficient fast-path spinlock operation, while allowing spinning vcpus to give up their processor time while waiting for a contended lock. [*] 2^16 iterations is threshold at which 98% locks have been taken according to Thomas Friebel's Xen Summit talk "Preventing Guests from Spinning Around". Therefore, we'd expect the lock and unlock slow paths will only be entered 2% of the time. Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Christoph Lameter <clameter@linux-foundation.org> Cc: Petr Tesarik <ptesarik@suse.cz> Cc: Virtualization <virtualization@lists.linux-foundation.org> Cc: Xen devel <xen-devel@lists.xensource.com> Cc: Thomas Friebel <thomas.friebel@amd.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/xen/smp.c172
-rw-r--r--drivers/xen/events.c27
-rw-r--r--include/asm-x86/xen/events.h1
-rw-r--r--include/xen/events.h7
4 files changed, 206 insertions, 1 deletions
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index a8ebafc09d47..e693812ac59a 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -15,6 +15,7 @@
15 * This does not handle HOTPLUG_CPU yet. 15 * This does not handle HOTPLUG_CPU yet.
16 */ 16 */
17#include <linux/sched.h> 17#include <linux/sched.h>
18#include <linux/kernel_stat.h>
18#include <linux/err.h> 19#include <linux/err.h>
19#include <linux/smp.h> 20#include <linux/smp.h>
20 21
@@ -35,6 +36,8 @@
35#include "xen-ops.h" 36#include "xen-ops.h"
36#include "mmu.h" 37#include "mmu.h"
37 38
39static void __cpuinit xen_init_lock_cpu(int cpu);
40
38cpumask_t xen_cpu_initialized_map; 41cpumask_t xen_cpu_initialized_map;
39 42
40static DEFINE_PER_CPU(int, resched_irq); 43static DEFINE_PER_CPU(int, resched_irq);
@@ -179,6 +182,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
179{ 182{
180 unsigned cpu; 183 unsigned cpu;
181 184
185 xen_init_lock_cpu(0);
186
182 smp_store_cpu_info(0); 187 smp_store_cpu_info(0);
183 cpu_data(0).x86_max_cores = 1; 188 cpu_data(0).x86_max_cores = 1;
184 set_cpu_sibling_map(0); 189 set_cpu_sibling_map(0);
@@ -301,6 +306,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
301 clear_tsk_thread_flag(idle, TIF_FORK); 306 clear_tsk_thread_flag(idle, TIF_FORK);
302#endif 307#endif
303 xen_setup_timer(cpu); 308 xen_setup_timer(cpu);
309 xen_init_lock_cpu(cpu);
304 310
305 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; 311 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
306 312
@@ -413,6 +419,170 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
413 return IRQ_HANDLED; 419 return IRQ_HANDLED;
414} 420}
415 421
422struct xen_spinlock {
423 unsigned char lock; /* 0 -> free; 1 -> locked */
424 unsigned short spinners; /* count of waiting cpus */
425};
426
427static int xen_spin_is_locked(struct raw_spinlock *lock)
428{
429 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
430
431 return xl->lock != 0;
432}
433
434static int xen_spin_is_contended(struct raw_spinlock *lock)
435{
436 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
437
438 /* Not strictly true; this is only the count of contended
439 lock-takers entering the slow path. */
440 return xl->spinners != 0;
441}
442
443static int xen_spin_trylock(struct raw_spinlock *lock)
444{
445 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
446 u8 old = 1;
447
448 asm("xchgb %b0,%1"
449 : "+q" (old), "+m" (xl->lock) : : "memory");
450
451 return old == 0;
452}
453
454static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
455static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
456
457static inline void spinning_lock(struct xen_spinlock *xl)
458{
459 __get_cpu_var(lock_spinners) = xl;
460 wmb(); /* set lock of interest before count */
461 asm(LOCK_PREFIX " incw %0"
462 : "+m" (xl->spinners) : : "memory");
463}
464
465static inline void unspinning_lock(struct xen_spinlock *xl)
466{
467 asm(LOCK_PREFIX " decw %0"
468 : "+m" (xl->spinners) : : "memory");
469 wmb(); /* decrement count before clearing lock */
470 __get_cpu_var(lock_spinners) = NULL;
471}
472
473static noinline int xen_spin_lock_slow(struct raw_spinlock *lock)
474{
475 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
476 int irq = __get_cpu_var(lock_kicker_irq);
477 int ret;
478
479 /* If kicker interrupts not initialized yet, just spin */
480 if (irq == -1)
481 return 0;
482
483 /* announce we're spinning */
484 spinning_lock(xl);
485
486 /* clear pending */
487 xen_clear_irq_pending(irq);
488
489 /* check again make sure it didn't become free while
490 we weren't looking */
491 ret = xen_spin_trylock(lock);
492 if (ret)
493 goto out;
494
495 /* block until irq becomes pending */
496 xen_poll_irq(irq);
497 kstat_this_cpu.irqs[irq]++;
498
499out:
500 unspinning_lock(xl);
501 return ret;
502}
503
504static void xen_spin_lock(struct raw_spinlock *lock)
505{
506 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
507 int timeout;
508 u8 oldval;
509
510 do {
511 timeout = 1 << 10;
512
513 asm("1: xchgb %1,%0\n"
514 " testb %1,%1\n"
515 " jz 3f\n"
516 "2: rep;nop\n"
517 " cmpb $0,%0\n"
518 " je 1b\n"
519 " dec %2\n"
520 " jnz 2b\n"
521 "3:\n"
522 : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
523 : "1" (1)
524 : "memory");
525
526 } while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock)));
527}
528
529static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
530{
531 int cpu;
532
533 for_each_online_cpu(cpu) {
534 /* XXX should mix up next cpu selection */
535 if (per_cpu(lock_spinners, cpu) == xl) {
536 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
537 break;
538 }
539 }
540}
541
542static void xen_spin_unlock(struct raw_spinlock *lock)
543{
544 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
545
546 smp_wmb(); /* make sure no writes get moved after unlock */
547 xl->lock = 0; /* release lock */
548
549 /* make sure unlock happens before kick */
550 barrier();
551
552 if (unlikely(xl->spinners))
553 xen_spin_unlock_slow(xl);
554}
555
556static __cpuinit void xen_init_lock_cpu(int cpu)
557{
558 int irq;
559 const char *name;
560
561 name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
562 irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
563 cpu,
564 xen_reschedule_interrupt,
565 IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
566 name,
567 NULL);
568
569 if (irq >= 0) {
570 disable_irq(irq); /* make sure it's never delivered */
571 per_cpu(lock_kicker_irq, cpu) = irq;
572 }
573
574 printk("cpu %d spinlock event irq %d\n", cpu, irq);
575}
576
577static void __init xen_init_spinlocks(void)
578{
579 pv_lock_ops.spin_is_locked = xen_spin_is_locked;
580 pv_lock_ops.spin_is_contended = xen_spin_is_contended;
581 pv_lock_ops.spin_lock = xen_spin_lock;
582 pv_lock_ops.spin_trylock = xen_spin_trylock;
583 pv_lock_ops.spin_unlock = xen_spin_unlock;
584}
585
416static const struct smp_ops xen_smp_ops __initdata = { 586static const struct smp_ops xen_smp_ops __initdata = {
417 .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, 587 .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
418 .smp_prepare_cpus = xen_smp_prepare_cpus, 588 .smp_prepare_cpus = xen_smp_prepare_cpus,
@@ -430,5 +600,5 @@ void __init xen_smp_init(void)
430{ 600{
431 smp_ops = xen_smp_ops; 601 smp_ops = xen_smp_ops;
432 xen_fill_possible_map(); 602 xen_fill_possible_map();
433 paravirt_use_bytelocks(); 603 xen_init_spinlocks();
434} 604}
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 332dd63750a0..0e0c28574af8 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -734,6 +734,33 @@ static void restore_cpu_ipis(unsigned int cpu)
734 } 734 }
735} 735}
736 736
737/* Clear an irq's pending state, in preparation for polling on it */
738void xen_clear_irq_pending(int irq)
739{
740 int evtchn = evtchn_from_irq(irq);
741
742 if (VALID_EVTCHN(evtchn))
743 clear_evtchn(evtchn);
744}
745
746/* Poll waiting for an irq to become pending. In the usual case, the
747 irq will be disabled so it won't deliver an interrupt. */
748void xen_poll_irq(int irq)
749{
750 evtchn_port_t evtchn = evtchn_from_irq(irq);
751
752 if (VALID_EVTCHN(evtchn)) {
753 struct sched_poll poll;
754
755 poll.nr_ports = 1;
756 poll.timeout = 0;
757 poll.ports = &evtchn;
758
759 if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0)
760 BUG();
761 }
762}
763
737void xen_irq_resume(void) 764void xen_irq_resume(void)
738{ 765{
739 unsigned int cpu, irq, evtchn; 766 unsigned int cpu, irq, evtchn;
diff --git a/include/asm-x86/xen/events.h b/include/asm-x86/xen/events.h
index f8d57ea1f05f..8ded74720024 100644
--- a/include/asm-x86/xen/events.h
+++ b/include/asm-x86/xen/events.h
@@ -5,6 +5,7 @@ enum ipi_vector {
5 XEN_RESCHEDULE_VECTOR, 5 XEN_RESCHEDULE_VECTOR,
6 XEN_CALL_FUNCTION_VECTOR, 6 XEN_CALL_FUNCTION_VECTOR,
7 XEN_CALL_FUNCTION_SINGLE_VECTOR, 7 XEN_CALL_FUNCTION_SINGLE_VECTOR,
8 XEN_SPIN_UNLOCK_VECTOR,
8 9
9 XEN_NR_IPIS, 10 XEN_NR_IPIS,
10}; 11};
diff --git a/include/xen/events.h b/include/xen/events.h
index 67c4436554a9..4680ff3fbc91 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -44,4 +44,11 @@ extern void notify_remote_via_irq(int irq);
44 44
45extern void xen_irq_resume(void); 45extern void xen_irq_resume(void);
46 46
47/* Clear an irq's pending state, in preparation for polling on it */
48void xen_clear_irq_pending(int irq);
49
50/* Poll waiting for an irq to become pending. In the usual case, the
51 irq will be disabled so it won't deliver an interrupt. */
52void xen_poll_irq(int irq);
53
47#endif /* _XEN_EVENTS_H */ 54#endif /* _XEN_EVENTS_H */