diff options
author | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2013-09-09 12:01:15 -0400 |
---|---|---|
committer | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2013-09-09 12:01:15 -0400 |
commit | c3f31f6a6f68bcb51689c90733282ec263602a9d (patch) | |
tree | 07c2c7ae966b07d5adabe78215d9c76fa4ec531a /arch/x86 | |
parent | e1a9c16b303725ac900fee2a3ec4dbe2c2f846ab (diff) | |
parent | 36bd621337c91a1ecda588e5bbbae8dd9698bae7 (diff) |
Merge branch 'x86/spinlocks' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into stable/for-linus-3.12
* 'x86/spinlocks' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/kvm/guest: Fix sparse warning: "symbol 'klock_waiting' was not declared as static"
kvm: Paravirtual ticketlocks support for linux guests running on KVM hypervisor
kvm guest: Add configuration support to enable debug information for KVM Guests
kvm uapi: Add KICK_CPU and PV_UNHALT definition to uapi
xen, pvticketlock: Allow interrupts to be enabled while blocking
x86, ticketlock: Add slowpath logic
jump_label: Split jumplabel ratelimit
x86, pvticketlock: When paravirtualizing ticket locks, increment by 2
x86, pvticketlock: Use callee-save for lock_spinning
xen, pvticketlocks: Add xen_nopvspin parameter to disable xen pv ticketlocks
xen, pvticketlock: Xen implementation for PV ticket locks
xen: Defer spinlock setup until boot CPU setup
x86, ticketlock: Collapse a layer of functions
x86, ticketlock: Don't inline _spin_unlock when using paravirt spinlocks
x86, spinlock: Replace pv spinlocks with pv ticketlocks
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig | 10 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_para.h | 14 | ||||
-rw-r--r-- | arch/x86/include/asm/paravirt.h | 32 | ||||
-rw-r--r-- | arch/x86/include/asm/paravirt_types.h | 14 | ||||
-rw-r--r-- | arch/x86/include/asm/spinlock.h | 128 | ||||
-rw-r--r-- | arch/x86/include/asm/spinlock_types.h | 16 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/kvm_para.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/kvm.c | 262 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt-spinlocks.c | 18 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/spinlock.c | 387 |
11 files changed, 531 insertions, 353 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b32ebf92b0ce..b1fb846e6dac 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -632,6 +632,7 @@ config PARAVIRT_DEBUG | |||
632 | config PARAVIRT_SPINLOCKS | 632 | config PARAVIRT_SPINLOCKS |
633 | bool "Paravirtualization layer for spinlocks" | 633 | bool "Paravirtualization layer for spinlocks" |
634 | depends on PARAVIRT && SMP | 634 | depends on PARAVIRT && SMP |
635 | select UNINLINE_SPIN_UNLOCK | ||
635 | ---help--- | 636 | ---help--- |
636 | Paravirtualized spinlocks allow a pvops backend to replace the | 637 | Paravirtualized spinlocks allow a pvops backend to replace the |
637 | spinlock implementation with something virtualization-friendly | 638 | spinlock implementation with something virtualization-friendly |
@@ -656,6 +657,15 @@ config KVM_GUEST | |||
656 | underlying device model, the host provides the guest with | 657 | underlying device model, the host provides the guest with |
657 | timing infrastructure such as time of day, and system time | 658 | timing infrastructure such as time of day, and system time |
658 | 659 | ||
660 | config KVM_DEBUG_FS | ||
661 | bool "Enable debug information for KVM Guests in debugfs" | ||
662 | depends on KVM_GUEST && DEBUG_FS | ||
663 | default n | ||
664 | ---help--- | ||
665 | This option enables collection of various statistics for KVM guest. | ||
666 | Statistics are displayed in debugfs filesystem. Enabling this option | ||
667 | may incur significant overhead. | ||
668 | |||
659 | source "arch/x86/lguest/Kconfig" | 669 | source "arch/x86/lguest/Kconfig" |
660 | 670 | ||
661 | config PARAVIRT_TIME_ACCOUNTING | 671 | config PARAVIRT_TIME_ACCOUNTING |
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 695399f2d5eb..427afcbf3d55 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
@@ -118,10 +118,20 @@ void kvm_async_pf_task_wait(u32 token); | |||
118 | void kvm_async_pf_task_wake(u32 token); | 118 | void kvm_async_pf_task_wake(u32 token); |
119 | u32 kvm_read_and_reset_pf_reason(void); | 119 | u32 kvm_read_and_reset_pf_reason(void); |
120 | extern void kvm_disable_steal_time(void); | 120 | extern void kvm_disable_steal_time(void); |
121 | #else | 121 | |
122 | #define kvm_guest_init() do { } while (0) | 122 | #ifdef CONFIG_PARAVIRT_SPINLOCKS |
123 | void __init kvm_spinlock_init(void); | ||
124 | #else /* !CONFIG_PARAVIRT_SPINLOCKS */ | ||
125 | static inline void kvm_spinlock_init(void) | ||
126 | { | ||
127 | } | ||
128 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | ||
129 | |||
130 | #else /* CONFIG_KVM_GUEST */ | ||
131 | #define kvm_guest_init() do {} while (0) | ||
123 | #define kvm_async_pf_task_wait(T) do {} while(0) | 132 | #define kvm_async_pf_task_wait(T) do {} while(0) |
124 | #define kvm_async_pf_task_wake(T) do {} while(0) | 133 | #define kvm_async_pf_task_wake(T) do {} while(0) |
134 | |||
125 | static inline u32 kvm_read_and_reset_pf_reason(void) | 135 | static inline u32 kvm_read_and_reset_pf_reason(void) |
126 | { | 136 | { |
127 | return 0; | 137 | return 0; |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index cfdc9ee4c900..401f350ef71b 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -712,36 +712,16 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, | |||
712 | 712 | ||
713 | #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) | 713 | #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) |
714 | 714 | ||
715 | static inline int arch_spin_is_locked(struct arch_spinlock *lock) | 715 | static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock, |
716 | __ticket_t ticket) | ||
716 | { | 717 | { |
717 | return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock); | 718 | PVOP_VCALLEE2(pv_lock_ops.lock_spinning, lock, ticket); |
718 | } | 719 | } |
719 | 720 | ||
720 | static inline int arch_spin_is_contended(struct arch_spinlock *lock) | 721 | static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock, |
722 | __ticket_t ticket) | ||
721 | { | 723 | { |
722 | return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); | 724 | PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket); |
723 | } | ||
724 | #define arch_spin_is_contended arch_spin_is_contended | ||
725 | |||
726 | static __always_inline void arch_spin_lock(struct arch_spinlock *lock) | ||
727 | { | ||
728 | PVOP_VCALL1(pv_lock_ops.spin_lock, lock); | ||
729 | } | ||
730 | |||
731 | static __always_inline void arch_spin_lock_flags(struct arch_spinlock *lock, | ||
732 | unsigned long flags) | ||
733 | { | ||
734 | PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags); | ||
735 | } | ||
736 | |||
737 | static __always_inline int arch_spin_trylock(struct arch_spinlock *lock) | ||
738 | { | ||
739 | return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock); | ||
740 | } | ||
741 | |||
742 | static __always_inline void arch_spin_unlock(struct arch_spinlock *lock) | ||
743 | { | ||
744 | PVOP_VCALL1(pv_lock_ops.spin_unlock, lock); | ||
745 | } | 725 | } |
746 | 726 | ||
747 | #endif | 727 | #endif |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 0db1fcac668c..04ac40e192eb 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -327,13 +327,15 @@ struct pv_mmu_ops { | |||
327 | }; | 327 | }; |
328 | 328 | ||
329 | struct arch_spinlock; | 329 | struct arch_spinlock; |
330 | #ifdef CONFIG_SMP | ||
331 | #include <asm/spinlock_types.h> | ||
332 | #else | ||
333 | typedef u16 __ticket_t; | ||
334 | #endif | ||
335 | |||
330 | struct pv_lock_ops { | 336 | struct pv_lock_ops { |
331 | int (*spin_is_locked)(struct arch_spinlock *lock); | 337 | struct paravirt_callee_save lock_spinning; |
332 | int (*spin_is_contended)(struct arch_spinlock *lock); | 338 | void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket); |
333 | void (*spin_lock)(struct arch_spinlock *lock); | ||
334 | void (*spin_lock_flags)(struct arch_spinlock *lock, unsigned long flags); | ||
335 | int (*spin_trylock)(struct arch_spinlock *lock); | ||
336 | void (*spin_unlock)(struct arch_spinlock *lock); | ||
337 | }; | 339 | }; |
338 | 340 | ||
339 | /* This contains all the paravirt structures: we get a convenient | 341 | /* This contains all the paravirt structures: we get a convenient |
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 33692eaabab5..d68883dd133c 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h | |||
@@ -1,11 +1,14 @@ | |||
1 | #ifndef _ASM_X86_SPINLOCK_H | 1 | #ifndef _ASM_X86_SPINLOCK_H |
2 | #define _ASM_X86_SPINLOCK_H | 2 | #define _ASM_X86_SPINLOCK_H |
3 | 3 | ||
4 | #include <linux/jump_label.h> | ||
4 | #include <linux/atomic.h> | 5 | #include <linux/atomic.h> |
5 | #include <asm/page.h> | 6 | #include <asm/page.h> |
6 | #include <asm/processor.h> | 7 | #include <asm/processor.h> |
7 | #include <linux/compiler.h> | 8 | #include <linux/compiler.h> |
8 | #include <asm/paravirt.h> | 9 | #include <asm/paravirt.h> |
10 | #include <asm/bitops.h> | ||
11 | |||
9 | /* | 12 | /* |
10 | * Your basic SMP spinlocks, allowing only a single CPU anywhere | 13 | * Your basic SMP spinlocks, allowing only a single CPU anywhere |
11 | * | 14 | * |
@@ -34,6 +37,31 @@ | |||
34 | # define UNLOCK_LOCK_PREFIX | 37 | # define UNLOCK_LOCK_PREFIX |
35 | #endif | 38 | #endif |
36 | 39 | ||
40 | /* How long a lock should spin before we consider blocking */ | ||
41 | #define SPIN_THRESHOLD (1 << 15) | ||
42 | |||
43 | extern struct static_key paravirt_ticketlocks_enabled; | ||
44 | static __always_inline bool static_key_false(struct static_key *key); | ||
45 | |||
46 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | ||
47 | |||
48 | static inline void __ticket_enter_slowpath(arch_spinlock_t *lock) | ||
49 | { | ||
50 | set_bit(0, (volatile unsigned long *)&lock->tickets.tail); | ||
51 | } | ||
52 | |||
53 | #else /* !CONFIG_PARAVIRT_SPINLOCKS */ | ||
54 | static __always_inline void __ticket_lock_spinning(arch_spinlock_t *lock, | ||
55 | __ticket_t ticket) | ||
56 | { | ||
57 | } | ||
58 | static inline void __ticket_unlock_kick(arch_spinlock_t *lock, | ||
59 | __ticket_t ticket) | ||
60 | { | ||
61 | } | ||
62 | |||
63 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | ||
64 | |||
37 | /* | 65 | /* |
38 | * Ticket locks are conceptually two parts, one indicating the current head of | 66 | * Ticket locks are conceptually two parts, one indicating the current head of |
39 | * the queue, and the other indicating the current tail. The lock is acquired | 67 | * the queue, and the other indicating the current tail. The lock is acquired |
@@ -47,81 +75,101 @@ | |||
47 | * in the high part, because a wide xadd increment of the low part would carry | 75 | * in the high part, because a wide xadd increment of the low part would carry |
48 | * up and contaminate the high part. | 76 | * up and contaminate the high part. |
49 | */ | 77 | */ |
50 | static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) | 78 | static __always_inline void arch_spin_lock(arch_spinlock_t *lock) |
51 | { | 79 | { |
52 | register struct __raw_tickets inc = { .tail = 1 }; | 80 | register struct __raw_tickets inc = { .tail = TICKET_LOCK_INC }; |
53 | 81 | ||
54 | inc = xadd(&lock->tickets, inc); | 82 | inc = xadd(&lock->tickets, inc); |
83 | if (likely(inc.head == inc.tail)) | ||
84 | goto out; | ||
55 | 85 | ||
86 | inc.tail &= ~TICKET_SLOWPATH_FLAG; | ||
56 | for (;;) { | 87 | for (;;) { |
57 | if (inc.head == inc.tail) | 88 | unsigned count = SPIN_THRESHOLD; |
58 | break; | 89 | |
59 | cpu_relax(); | 90 | do { |
60 | inc.head = ACCESS_ONCE(lock->tickets.head); | 91 | if (ACCESS_ONCE(lock->tickets.head) == inc.tail) |
92 | goto out; | ||
93 | cpu_relax(); | ||
94 | } while (--count); | ||
95 | __ticket_lock_spinning(lock, inc.tail); | ||
61 | } | 96 | } |
62 | barrier(); /* make sure nothing creeps before the lock is taken */ | 97 | out: barrier(); /* make sure nothing creeps before the lock is taken */ |
63 | } | 98 | } |
64 | 99 | ||
65 | static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) | 100 | static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) |
66 | { | 101 | { |
67 | arch_spinlock_t old, new; | 102 | arch_spinlock_t old, new; |
68 | 103 | ||
69 | old.tickets = ACCESS_ONCE(lock->tickets); | 104 | old.tickets = ACCESS_ONCE(lock->tickets); |
70 | if (old.tickets.head != old.tickets.tail) | 105 | if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG)) |
71 | return 0; | 106 | return 0; |
72 | 107 | ||
73 | new.head_tail = old.head_tail + (1 << TICKET_SHIFT); | 108 | new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT); |
74 | 109 | ||
75 | /* cmpxchg is a full barrier, so nothing can move before it */ | 110 | /* cmpxchg is a full barrier, so nothing can move before it */ |
76 | return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; | 111 | return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; |
77 | } | 112 | } |
78 | 113 | ||
79 | static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) | 114 | static inline void __ticket_unlock_slowpath(arch_spinlock_t *lock, |
115 | arch_spinlock_t old) | ||
80 | { | 116 | { |
81 | __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX); | 117 | arch_spinlock_t new; |
118 | |||
119 | BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS); | ||
120 | |||
121 | /* Perform the unlock on the "before" copy */ | ||
122 | old.tickets.head += TICKET_LOCK_INC; | ||
123 | |||
124 | /* Clear the slowpath flag */ | ||
125 | new.head_tail = old.head_tail & ~(TICKET_SLOWPATH_FLAG << TICKET_SHIFT); | ||
126 | |||
127 | /* | ||
128 | * If the lock is uncontended, clear the flag - use cmpxchg in | ||
129 | * case it changes behind our back though. | ||
130 | */ | ||
131 | if (new.tickets.head != new.tickets.tail || | ||
132 | cmpxchg(&lock->head_tail, old.head_tail, | ||
133 | new.head_tail) != old.head_tail) { | ||
134 | /* | ||
135 | * Lock still has someone queued for it, so wake up an | ||
136 | * appropriate waiter. | ||
137 | */ | ||
138 | __ticket_unlock_kick(lock, old.tickets.head); | ||
139 | } | ||
82 | } | 140 | } |
83 | 141 | ||
84 | static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) | 142 | static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) |
85 | { | 143 | { |
86 | struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); | 144 | if (TICKET_SLOWPATH_FLAG && |
145 | static_key_false(¶virt_ticketlocks_enabled)) { | ||
146 | arch_spinlock_t prev; | ||
87 | 147 | ||
88 | return tmp.tail != tmp.head; | 148 | prev = *lock; |
89 | } | 149 | add_smp(&lock->tickets.head, TICKET_LOCK_INC); |
90 | 150 | ||
91 | static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) | 151 | /* add_smp() is a full mb() */ |
92 | { | ||
93 | struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); | ||
94 | 152 | ||
95 | return (__ticket_t)(tmp.tail - tmp.head) > 1; | 153 | if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG)) |
154 | __ticket_unlock_slowpath(lock, prev); | ||
155 | } else | ||
156 | __add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX); | ||
96 | } | 157 | } |
97 | 158 | ||
98 | #ifndef CONFIG_PARAVIRT_SPINLOCKS | ||
99 | |||
100 | static inline int arch_spin_is_locked(arch_spinlock_t *lock) | 159 | static inline int arch_spin_is_locked(arch_spinlock_t *lock) |
101 | { | 160 | { |
102 | return __ticket_spin_is_locked(lock); | 161 | struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); |
103 | } | ||
104 | |||
105 | static inline int arch_spin_is_contended(arch_spinlock_t *lock) | ||
106 | { | ||
107 | return __ticket_spin_is_contended(lock); | ||
108 | } | ||
109 | #define arch_spin_is_contended arch_spin_is_contended | ||
110 | 162 | ||
111 | static __always_inline void arch_spin_lock(arch_spinlock_t *lock) | 163 | return tmp.tail != tmp.head; |
112 | { | ||
113 | __ticket_spin_lock(lock); | ||
114 | } | 164 | } |
115 | 165 | ||
116 | static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) | 166 | static inline int arch_spin_is_contended(arch_spinlock_t *lock) |
117 | { | 167 | { |
118 | return __ticket_spin_trylock(lock); | 168 | struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); |
119 | } | ||
120 | 169 | ||
121 | static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) | 170 | return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC; |
122 | { | ||
123 | __ticket_spin_unlock(lock); | ||
124 | } | 171 | } |
172 | #define arch_spin_is_contended arch_spin_is_contended | ||
125 | 173 | ||
126 | static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, | 174 | static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, |
127 | unsigned long flags) | 175 | unsigned long flags) |
@@ -129,8 +177,6 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, | |||
129 | arch_spin_lock(lock); | 177 | arch_spin_lock(lock); |
130 | } | 178 | } |
131 | 179 | ||
132 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | ||
133 | |||
134 | static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) | 180 | static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) |
135 | { | 181 | { |
136 | while (arch_spin_is_locked(lock)) | 182 | while (arch_spin_is_locked(lock)) |
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index ad0ad07fc006..4f1bea19945b 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h | |||
@@ -1,13 +1,17 @@ | |||
1 | #ifndef _ASM_X86_SPINLOCK_TYPES_H | 1 | #ifndef _ASM_X86_SPINLOCK_TYPES_H |
2 | #define _ASM_X86_SPINLOCK_TYPES_H | 2 | #define _ASM_X86_SPINLOCK_TYPES_H |
3 | 3 | ||
4 | #ifndef __LINUX_SPINLOCK_TYPES_H | ||
5 | # error "please don't include this file directly" | ||
6 | #endif | ||
7 | |||
8 | #include <linux/types.h> | 4 | #include <linux/types.h> |
9 | 5 | ||
10 | #if (CONFIG_NR_CPUS < 256) | 6 | #ifdef CONFIG_PARAVIRT_SPINLOCKS |
7 | #define __TICKET_LOCK_INC 2 | ||
8 | #define TICKET_SLOWPATH_FLAG ((__ticket_t)1) | ||
9 | #else | ||
10 | #define __TICKET_LOCK_INC 1 | ||
11 | #define TICKET_SLOWPATH_FLAG ((__ticket_t)0) | ||
12 | #endif | ||
13 | |||
14 | #if (CONFIG_NR_CPUS < (256 / __TICKET_LOCK_INC)) | ||
11 | typedef u8 __ticket_t; | 15 | typedef u8 __ticket_t; |
12 | typedef u16 __ticketpair_t; | 16 | typedef u16 __ticketpair_t; |
13 | #else | 17 | #else |
@@ -15,6 +19,8 @@ typedef u16 __ticket_t; | |||
15 | typedef u32 __ticketpair_t; | 19 | typedef u32 __ticketpair_t; |
16 | #endif | 20 | #endif |
17 | 21 | ||
22 | #define TICKET_LOCK_INC ((__ticket_t)__TICKET_LOCK_INC) | ||
23 | |||
18 | #define TICKET_SHIFT (sizeof(__ticket_t) * 8) | 24 | #define TICKET_SHIFT (sizeof(__ticket_t) * 8) |
19 | 25 | ||
20 | typedef struct arch_spinlock { | 26 | typedef struct arch_spinlock { |
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 06fdbd987e97..94dc8ca434e0 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h | |||
@@ -23,6 +23,7 @@ | |||
23 | #define KVM_FEATURE_ASYNC_PF 4 | 23 | #define KVM_FEATURE_ASYNC_PF 4 |
24 | #define KVM_FEATURE_STEAL_TIME 5 | 24 | #define KVM_FEATURE_STEAL_TIME 5 |
25 | #define KVM_FEATURE_PV_EOI 6 | 25 | #define KVM_FEATURE_PV_EOI 6 |
26 | #define KVM_FEATURE_PV_UNHALT 7 | ||
26 | 27 | ||
27 | /* The last 8 bits are used to indicate how to interpret the flags field | 28 | /* The last 8 bits are used to indicate how to interpret the flags field |
28 | * in pvclock structure. If no bits are set, all flags are ignored. | 29 | * in pvclock structure. If no bits are set, all flags are ignored. |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index a96d32cc55b8..56e2fa4a8b13 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/sched.h> | 34 | #include <linux/sched.h> |
35 | #include <linux/slab.h> | 35 | #include <linux/slab.h> |
36 | #include <linux/kprobes.h> | 36 | #include <linux/kprobes.h> |
37 | #include <linux/debugfs.h> | ||
37 | #include <asm/timer.h> | 38 | #include <asm/timer.h> |
38 | #include <asm/cpu.h> | 39 | #include <asm/cpu.h> |
39 | #include <asm/traps.h> | 40 | #include <asm/traps.h> |
@@ -419,6 +420,7 @@ static void __init kvm_smp_prepare_boot_cpu(void) | |||
419 | WARN_ON(kvm_register_clock("primary cpu clock")); | 420 | WARN_ON(kvm_register_clock("primary cpu clock")); |
420 | kvm_guest_cpu_init(); | 421 | kvm_guest_cpu_init(); |
421 | native_smp_prepare_boot_cpu(); | 422 | native_smp_prepare_boot_cpu(); |
423 | kvm_spinlock_init(); | ||
422 | } | 424 | } |
423 | 425 | ||
424 | static void kvm_guest_cpu_online(void *dummy) | 426 | static void kvm_guest_cpu_online(void *dummy) |
@@ -523,3 +525,263 @@ static __init int activate_jump_labels(void) | |||
523 | return 0; | 525 | return 0; |
524 | } | 526 | } |
525 | arch_initcall(activate_jump_labels); | 527 | arch_initcall(activate_jump_labels); |
528 | |||
529 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | ||
530 | |||
531 | /* Kick a cpu by its apicid. Used to wake up a halted vcpu */ | ||
532 | static void kvm_kick_cpu(int cpu) | ||
533 | { | ||
534 | int apicid; | ||
535 | unsigned long flags = 0; | ||
536 | |||
537 | apicid = per_cpu(x86_cpu_to_apicid, cpu); | ||
538 | kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid); | ||
539 | } | ||
540 | |||
541 | enum kvm_contention_stat { | ||
542 | TAKEN_SLOW, | ||
543 | TAKEN_SLOW_PICKUP, | ||
544 | RELEASED_SLOW, | ||
545 | RELEASED_SLOW_KICKED, | ||
546 | NR_CONTENTION_STATS | ||
547 | }; | ||
548 | |||
549 | #ifdef CONFIG_KVM_DEBUG_FS | ||
550 | #define HISTO_BUCKETS 30 | ||
551 | |||
552 | static struct kvm_spinlock_stats | ||
553 | { | ||
554 | u32 contention_stats[NR_CONTENTION_STATS]; | ||
555 | u32 histo_spin_blocked[HISTO_BUCKETS+1]; | ||
556 | u64 time_blocked; | ||
557 | } spinlock_stats; | ||
558 | |||
559 | static u8 zero_stats; | ||
560 | |||
561 | static inline void check_zero(void) | ||
562 | { | ||
563 | u8 ret; | ||
564 | u8 old; | ||
565 | |||
566 | old = ACCESS_ONCE(zero_stats); | ||
567 | if (unlikely(old)) { | ||
568 | ret = cmpxchg(&zero_stats, old, 0); | ||
569 | /* This ensures only one fellow resets the stat */ | ||
570 | if (ret == old) | ||
571 | memset(&spinlock_stats, 0, sizeof(spinlock_stats)); | ||
572 | } | ||
573 | } | ||
574 | |||
575 | static inline void add_stats(enum kvm_contention_stat var, u32 val) | ||
576 | { | ||
577 | check_zero(); | ||
578 | spinlock_stats.contention_stats[var] += val; | ||
579 | } | ||
580 | |||
581 | |||
582 | static inline u64 spin_time_start(void) | ||
583 | { | ||
584 | return sched_clock(); | ||
585 | } | ||
586 | |||
587 | static void __spin_time_accum(u64 delta, u32 *array) | ||
588 | { | ||
589 | unsigned index; | ||
590 | |||
591 | index = ilog2(delta); | ||
592 | check_zero(); | ||
593 | |||
594 | if (index < HISTO_BUCKETS) | ||
595 | array[index]++; | ||
596 | else | ||
597 | array[HISTO_BUCKETS]++; | ||
598 | } | ||
599 | |||
600 | static inline void spin_time_accum_blocked(u64 start) | ||
601 | { | ||
602 | u32 delta; | ||
603 | |||
604 | delta = sched_clock() - start; | ||
605 | __spin_time_accum(delta, spinlock_stats.histo_spin_blocked); | ||
606 | spinlock_stats.time_blocked += delta; | ||
607 | } | ||
608 | |||
609 | static struct dentry *d_spin_debug; | ||
610 | static struct dentry *d_kvm_debug; | ||
611 | |||
612 | struct dentry *kvm_init_debugfs(void) | ||
613 | { | ||
614 | d_kvm_debug = debugfs_create_dir("kvm", NULL); | ||
615 | if (!d_kvm_debug) | ||
616 | printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n"); | ||
617 | |||
618 | return d_kvm_debug; | ||
619 | } | ||
620 | |||
621 | static int __init kvm_spinlock_debugfs(void) | ||
622 | { | ||
623 | struct dentry *d_kvm; | ||
624 | |||
625 | d_kvm = kvm_init_debugfs(); | ||
626 | if (d_kvm == NULL) | ||
627 | return -ENOMEM; | ||
628 | |||
629 | d_spin_debug = debugfs_create_dir("spinlocks", d_kvm); | ||
630 | |||
631 | debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); | ||
632 | |||
633 | debugfs_create_u32("taken_slow", 0444, d_spin_debug, | ||
634 | &spinlock_stats.contention_stats[TAKEN_SLOW]); | ||
635 | debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, | ||
636 | &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]); | ||
637 | |||
638 | debugfs_create_u32("released_slow", 0444, d_spin_debug, | ||
639 | &spinlock_stats.contention_stats[RELEASED_SLOW]); | ||
640 | debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, | ||
641 | &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]); | ||
642 | |||
643 | debugfs_create_u64("time_blocked", 0444, d_spin_debug, | ||
644 | &spinlock_stats.time_blocked); | ||
645 | |||
646 | debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, | ||
647 | spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); | ||
648 | |||
649 | return 0; | ||
650 | } | ||
651 | fs_initcall(kvm_spinlock_debugfs); | ||
652 | #else /* !CONFIG_KVM_DEBUG_FS */ | ||
653 | static inline void add_stats(enum kvm_contention_stat var, u32 val) | ||
654 | { | ||
655 | } | ||
656 | |||
657 | static inline u64 spin_time_start(void) | ||
658 | { | ||
659 | return 0; | ||
660 | } | ||
661 | |||
662 | static inline void spin_time_accum_blocked(u64 start) | ||
663 | { | ||
664 | } | ||
665 | #endif /* CONFIG_KVM_DEBUG_FS */ | ||
666 | |||
667 | struct kvm_lock_waiting { | ||
668 | struct arch_spinlock *lock; | ||
669 | __ticket_t want; | ||
670 | }; | ||
671 | |||
672 | /* cpus 'waiting' on a spinlock to become available */ | ||
673 | static cpumask_t waiting_cpus; | ||
674 | |||
675 | /* Track spinlock on which a cpu is waiting */ | ||
676 | static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting); | ||
677 | |||
678 | static void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want) | ||
679 | { | ||
680 | struct kvm_lock_waiting *w; | ||
681 | int cpu; | ||
682 | u64 start; | ||
683 | unsigned long flags; | ||
684 | |||
685 | if (in_nmi()) | ||
686 | return; | ||
687 | |||
688 | w = &__get_cpu_var(klock_waiting); | ||
689 | cpu = smp_processor_id(); | ||
690 | start = spin_time_start(); | ||
691 | |||
692 | /* | ||
693 | * Make sure an interrupt handler can't upset things in a | ||
694 | * partially setup state. | ||
695 | */ | ||
696 | local_irq_save(flags); | ||
697 | |||
698 | /* | ||
699 | * The ordering protocol on this is that the "lock" pointer | ||
700 | * may only be set non-NULL if the "want" ticket is correct. | ||
701 | * If we're updating "want", we must first clear "lock". | ||
702 | */ | ||
703 | w->lock = NULL; | ||
704 | smp_wmb(); | ||
705 | w->want = want; | ||
706 | smp_wmb(); | ||
707 | w->lock = lock; | ||
708 | |||
709 | add_stats(TAKEN_SLOW, 1); | ||
710 | |||
711 | /* | ||
712 | * This uses set_bit, which is atomic but we should not rely on its | ||
713 | * reordering gurantees. So barrier is needed after this call. | ||
714 | */ | ||
715 | cpumask_set_cpu(cpu, &waiting_cpus); | ||
716 | |||
717 | barrier(); | ||
718 | |||
719 | /* | ||
720 | * Mark entry to slowpath before doing the pickup test to make | ||
721 | * sure we don't deadlock with an unlocker. | ||
722 | */ | ||
723 | __ticket_enter_slowpath(lock); | ||
724 | |||
725 | /* | ||
726 | * check again make sure it didn't become free while | ||
727 | * we weren't looking. | ||
728 | */ | ||
729 | if (ACCESS_ONCE(lock->tickets.head) == want) { | ||
730 | add_stats(TAKEN_SLOW_PICKUP, 1); | ||
731 | goto out; | ||
732 | } | ||
733 | |||
734 | /* | ||
735 | * halt until it's our turn and kicked. Note that we do safe halt | ||
736 | * for irq enabled case to avoid hang when lock info is overwritten | ||
737 | * in irq spinlock slowpath and no spurious interrupt occur to save us. | ||
738 | */ | ||
739 | if (arch_irqs_disabled_flags(flags)) | ||
740 | halt(); | ||
741 | else | ||
742 | safe_halt(); | ||
743 | |||
744 | out: | ||
745 | cpumask_clear_cpu(cpu, &waiting_cpus); | ||
746 | w->lock = NULL; | ||
747 | local_irq_restore(flags); | ||
748 | spin_time_accum_blocked(start); | ||
749 | } | ||
750 | PV_CALLEE_SAVE_REGS_THUNK(kvm_lock_spinning); | ||
751 | |||
752 | /* Kick vcpu waiting on @lock->head to reach value @ticket */ | ||
753 | static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket) | ||
754 | { | ||
755 | int cpu; | ||
756 | |||
757 | add_stats(RELEASED_SLOW, 1); | ||
758 | for_each_cpu(cpu, &waiting_cpus) { | ||
759 | const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu); | ||
760 | if (ACCESS_ONCE(w->lock) == lock && | ||
761 | ACCESS_ONCE(w->want) == ticket) { | ||
762 | add_stats(RELEASED_SLOW_KICKED, 1); | ||
763 | kvm_kick_cpu(cpu); | ||
764 | break; | ||
765 | } | ||
766 | } | ||
767 | } | ||
768 | |||
769 | /* | ||
770 | * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present. | ||
771 | */ | ||
772 | void __init kvm_spinlock_init(void) | ||
773 | { | ||
774 | if (!kvm_para_available()) | ||
775 | return; | ||
776 | /* Does host kernel support KVM_FEATURE_PV_UNHALT? */ | ||
777 | if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) | ||
778 | return; | ||
779 | |||
780 | printk(KERN_INFO "KVM setup paravirtual spinlock\n"); | ||
781 | |||
782 | static_key_slow_inc(¶virt_ticketlocks_enabled); | ||
783 | |||
784 | pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning); | ||
785 | pv_lock_ops.unlock_kick = kvm_unlock_kick; | ||
786 | } | ||
787 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | ||
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 676b8c77a976..bbb6c7316341 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c | |||
@@ -4,25 +4,17 @@ | |||
4 | */ | 4 | */ |
5 | #include <linux/spinlock.h> | 5 | #include <linux/spinlock.h> |
6 | #include <linux/module.h> | 6 | #include <linux/module.h> |
7 | #include <linux/jump_label.h> | ||
7 | 8 | ||
8 | #include <asm/paravirt.h> | 9 | #include <asm/paravirt.h> |
9 | 10 | ||
10 | static inline void | ||
11 | default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) | ||
12 | { | ||
13 | arch_spin_lock(lock); | ||
14 | } | ||
15 | |||
16 | struct pv_lock_ops pv_lock_ops = { | 11 | struct pv_lock_ops pv_lock_ops = { |
17 | #ifdef CONFIG_SMP | 12 | #ifdef CONFIG_SMP |
18 | .spin_is_locked = __ticket_spin_is_locked, | 13 | .lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop), |
19 | .spin_is_contended = __ticket_spin_is_contended, | 14 | .unlock_kick = paravirt_nop, |
20 | |||
21 | .spin_lock = __ticket_spin_lock, | ||
22 | .spin_lock_flags = default_spin_lock_flags, | ||
23 | .spin_trylock = __ticket_spin_trylock, | ||
24 | .spin_unlock = __ticket_spin_unlock, | ||
25 | #endif | 15 | #endif |
26 | }; | 16 | }; |
27 | EXPORT_SYMBOL(pv_lock_ops); | 17 | EXPORT_SYMBOL(pv_lock_ops); |
28 | 18 | ||
19 | struct static_key paravirt_ticketlocks_enabled = STATIC_KEY_INIT_FALSE; | ||
20 | EXPORT_SYMBOL(paravirt_ticketlocks_enabled); | ||
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 22759c6d309f..368c290929fe 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -279,6 +279,7 @@ static void __init xen_smp_prepare_boot_cpu(void) | |||
279 | 279 | ||
280 | xen_filter_cpu_maps(); | 280 | xen_filter_cpu_maps(); |
281 | xen_setup_vcpu_info_placement(); | 281 | xen_setup_vcpu_info_placement(); |
282 | xen_init_spinlocks(); | ||
282 | } | 283 | } |
283 | 284 | ||
284 | static void __init xen_smp_prepare_cpus(unsigned int max_cpus) | 285 | static void __init xen_smp_prepare_cpus(unsigned int max_cpus) |
@@ -686,7 +687,6 @@ void __init xen_smp_init(void) | |||
686 | { | 687 | { |
687 | smp_ops = xen_smp_ops; | 688 | smp_ops = xen_smp_ops; |
688 | xen_fill_possible_map(); | 689 | xen_fill_possible_map(); |
689 | xen_init_spinlocks(); | ||
690 | } | 690 | } |
691 | 691 | ||
692 | static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) | 692 | static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) |
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index cf3caee356b3..0438b9324a72 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c | |||
@@ -17,45 +17,44 @@ | |||
17 | #include "xen-ops.h" | 17 | #include "xen-ops.h" |
18 | #include "debugfs.h" | 18 | #include "debugfs.h" |
19 | 19 | ||
20 | #ifdef CONFIG_XEN_DEBUG_FS | 20 | enum xen_contention_stat { |
21 | static struct xen_spinlock_stats | 21 | TAKEN_SLOW, |
22 | { | 22 | TAKEN_SLOW_PICKUP, |
23 | u64 taken; | 23 | TAKEN_SLOW_SPURIOUS, |
24 | u32 taken_slow; | 24 | RELEASED_SLOW, |
25 | u32 taken_slow_nested; | 25 | RELEASED_SLOW_KICKED, |
26 | u32 taken_slow_pickup; | 26 | NR_CONTENTION_STATS |
27 | u32 taken_slow_spurious; | 27 | }; |
28 | u32 taken_slow_irqenable; | ||
29 | 28 | ||
30 | u64 released; | ||
31 | u32 released_slow; | ||
32 | u32 released_slow_kicked; | ||
33 | 29 | ||
30 | #ifdef CONFIG_XEN_DEBUG_FS | ||
34 | #define HISTO_BUCKETS 30 | 31 | #define HISTO_BUCKETS 30 |
35 | u32 histo_spin_total[HISTO_BUCKETS+1]; | 32 | static struct xen_spinlock_stats |
36 | u32 histo_spin_spinning[HISTO_BUCKETS+1]; | 33 | { |
34 | u32 contention_stats[NR_CONTENTION_STATS]; | ||
37 | u32 histo_spin_blocked[HISTO_BUCKETS+1]; | 35 | u32 histo_spin_blocked[HISTO_BUCKETS+1]; |
38 | |||
39 | u64 time_total; | ||
40 | u64 time_spinning; | ||
41 | u64 time_blocked; | 36 | u64 time_blocked; |
42 | } spinlock_stats; | 37 | } spinlock_stats; |
43 | 38 | ||
44 | static u8 zero_stats; | 39 | static u8 zero_stats; |
45 | 40 | ||
46 | static unsigned lock_timeout = 1 << 10; | ||
47 | #define TIMEOUT lock_timeout | ||
48 | |||
49 | static inline void check_zero(void) | 41 | static inline void check_zero(void) |
50 | { | 42 | { |
51 | if (unlikely(zero_stats)) { | 43 | u8 ret; |
52 | memset(&spinlock_stats, 0, sizeof(spinlock_stats)); | 44 | u8 old = ACCESS_ONCE(zero_stats); |
53 | zero_stats = 0; | 45 | if (unlikely(old)) { |
46 | ret = cmpxchg(&zero_stats, old, 0); | ||
47 | /* This ensures only one fellow resets the stat */ | ||
48 | if (ret == old) | ||
49 | memset(&spinlock_stats, 0, sizeof(spinlock_stats)); | ||
54 | } | 50 | } |
55 | } | 51 | } |
56 | 52 | ||
57 | #define ADD_STATS(elem, val) \ | 53 | static inline void add_stats(enum xen_contention_stat var, u32 val) |
58 | do { check_zero(); spinlock_stats.elem += (val); } while(0) | 54 | { |
55 | check_zero(); | ||
56 | spinlock_stats.contention_stats[var] += val; | ||
57 | } | ||
59 | 58 | ||
60 | static inline u64 spin_time_start(void) | 59 | static inline u64 spin_time_start(void) |
61 | { | 60 | { |
@@ -74,22 +73,6 @@ static void __spin_time_accum(u64 delta, u32 *array) | |||
74 | array[HISTO_BUCKETS]++; | 73 | array[HISTO_BUCKETS]++; |
75 | } | 74 | } |
76 | 75 | ||
77 | static inline void spin_time_accum_spinning(u64 start) | ||
78 | { | ||
79 | u32 delta = xen_clocksource_read() - start; | ||
80 | |||
81 | __spin_time_accum(delta, spinlock_stats.histo_spin_spinning); | ||
82 | spinlock_stats.time_spinning += delta; | ||
83 | } | ||
84 | |||
85 | static inline void spin_time_accum_total(u64 start) | ||
86 | { | ||
87 | u32 delta = xen_clocksource_read() - start; | ||
88 | |||
89 | __spin_time_accum(delta, spinlock_stats.histo_spin_total); | ||
90 | spinlock_stats.time_total += delta; | ||
91 | } | ||
92 | |||
93 | static inline void spin_time_accum_blocked(u64 start) | 76 | static inline void spin_time_accum_blocked(u64 start) |
94 | { | 77 | { |
95 | u32 delta = xen_clocksource_read() - start; | 78 | u32 delta = xen_clocksource_read() - start; |
@@ -99,19 +82,15 @@ static inline void spin_time_accum_blocked(u64 start) | |||
99 | } | 82 | } |
100 | #else /* !CONFIG_XEN_DEBUG_FS */ | 83 | #else /* !CONFIG_XEN_DEBUG_FS */ |
101 | #define TIMEOUT (1 << 10) | 84 | #define TIMEOUT (1 << 10) |
102 | #define ADD_STATS(elem, val) do { (void)(val); } while(0) | 85 | static inline void add_stats(enum xen_contention_stat var, u32 val) |
86 | { | ||
87 | } | ||
103 | 88 | ||
104 | static inline u64 spin_time_start(void) | 89 | static inline u64 spin_time_start(void) |
105 | { | 90 | { |
106 | return 0; | 91 | return 0; |
107 | } | 92 | } |
108 | 93 | ||
109 | static inline void spin_time_accum_total(u64 start) | ||
110 | { | ||
111 | } | ||
112 | static inline void spin_time_accum_spinning(u64 start) | ||
113 | { | ||
114 | } | ||
115 | static inline void spin_time_accum_blocked(u64 start) | 94 | static inline void spin_time_accum_blocked(u64 start) |
116 | { | 95 | { |
117 | } | 96 | } |
@@ -134,227 +113,123 @@ typedef u16 xen_spinners_t; | |||
134 | asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory"); | 113 | asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory"); |
135 | #endif | 114 | #endif |
136 | 115 | ||
137 | struct xen_spinlock { | 116 | struct xen_lock_waiting { |
138 | unsigned char lock; /* 0 -> free; 1 -> locked */ | 117 | struct arch_spinlock *lock; |
139 | xen_spinners_t spinners; /* count of waiting cpus */ | 118 | __ticket_t want; |
140 | }; | 119 | }; |
141 | 120 | ||
142 | static int xen_spin_is_locked(struct arch_spinlock *lock) | ||
143 | { | ||
144 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
145 | |||
146 | return xl->lock != 0; | ||
147 | } | ||
148 | |||
149 | static int xen_spin_is_contended(struct arch_spinlock *lock) | ||
150 | { | ||
151 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
152 | |||
153 | /* Not strictly true; this is only the count of contended | ||
154 | lock-takers entering the slow path. */ | ||
155 | return xl->spinners != 0; | ||
156 | } | ||
157 | |||
158 | static int xen_spin_trylock(struct arch_spinlock *lock) | ||
159 | { | ||
160 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
161 | u8 old = 1; | ||
162 | |||
163 | asm("xchgb %b0,%1" | ||
164 | : "+q" (old), "+m" (xl->lock) : : "memory"); | ||
165 | |||
166 | return old == 0; | ||
167 | } | ||
168 | |||
169 | static DEFINE_PER_CPU(char *, irq_name); | ||
170 | static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; | 121 | static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; |
171 | static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); | 122 | static DEFINE_PER_CPU(char *, irq_name); |
172 | 123 | static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting); | |
173 | /* | 124 | static cpumask_t waiting_cpus; |
174 | * Mark a cpu as interested in a lock. Returns the CPU's previous | ||
175 | * lock of interest, in case we got preempted by an interrupt. | ||
176 | */ | ||
177 | static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl) | ||
178 | { | ||
179 | struct xen_spinlock *prev; | ||
180 | |||
181 | prev = __this_cpu_read(lock_spinners); | ||
182 | __this_cpu_write(lock_spinners, xl); | ||
183 | |||
184 | wmb(); /* set lock of interest before count */ | ||
185 | |||
186 | inc_spinners(xl); | ||
187 | |||
188 | return prev; | ||
189 | } | ||
190 | |||
191 | /* | ||
192 | * Mark a cpu as no longer interested in a lock. Restores previous | ||
193 | * lock of interest (NULL for none). | ||
194 | */ | ||
195 | static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev) | ||
196 | { | ||
197 | dec_spinners(xl); | ||
198 | wmb(); /* decrement count before restoring lock */ | ||
199 | __this_cpu_write(lock_spinners, prev); | ||
200 | } | ||
201 | 125 | ||
202 | static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable) | 126 | static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) |
203 | { | 127 | { |
204 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
205 | struct xen_spinlock *prev; | ||
206 | int irq = __this_cpu_read(lock_kicker_irq); | 128 | int irq = __this_cpu_read(lock_kicker_irq); |
207 | int ret; | 129 | struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting); |
130 | int cpu = smp_processor_id(); | ||
208 | u64 start; | 131 | u64 start; |
132 | unsigned long flags; | ||
209 | 133 | ||
210 | /* If kicker interrupts not initialized yet, just spin */ | 134 | /* If kicker interrupts not initialized yet, just spin */ |
211 | if (irq == -1) | 135 | if (irq == -1) |
212 | return 0; | 136 | return; |
213 | 137 | ||
214 | start = spin_time_start(); | 138 | start = spin_time_start(); |
215 | 139 | ||
216 | /* announce we're spinning */ | 140 | /* |
217 | prev = spinning_lock(xl); | 141 | * Make sure an interrupt handler can't upset things in a |
142 | * partially setup state. | ||
143 | */ | ||
144 | local_irq_save(flags); | ||
145 | /* | ||
146 | * We don't really care if we're overwriting some other | ||
147 | * (lock,want) pair, as that would mean that we're currently | ||
148 | * in an interrupt context, and the outer context had | ||
149 | * interrupts enabled. That has already kicked the VCPU out | ||
150 | * of xen_poll_irq(), so it will just return spuriously and | ||
151 | * retry with newly setup (lock,want). | ||
152 | * | ||
153 | * The ordering protocol on this is that the "lock" pointer | ||
154 | * may only be set non-NULL if the "want" ticket is correct. | ||
155 | * If we're updating "want", we must first clear "lock". | ||
156 | */ | ||
157 | w->lock = NULL; | ||
158 | smp_wmb(); | ||
159 | w->want = want; | ||
160 | smp_wmb(); | ||
161 | w->lock = lock; | ||
218 | 162 | ||
219 | ADD_STATS(taken_slow, 1); | 163 | /* This uses set_bit, which atomic and therefore a barrier */ |
220 | ADD_STATS(taken_slow_nested, prev != NULL); | 164 | cpumask_set_cpu(cpu, &waiting_cpus); |
165 | add_stats(TAKEN_SLOW, 1); | ||
221 | 166 | ||
222 | do { | 167 | /* clear pending */ |
223 | unsigned long flags; | 168 | xen_clear_irq_pending(irq); |
224 | 169 | ||
225 | /* clear pending */ | 170 | /* Only check lock once pending cleared */ |
226 | xen_clear_irq_pending(irq); | 171 | barrier(); |
227 | 172 | ||
228 | /* check again make sure it didn't become free while | 173 | /* |
229 | we weren't looking */ | 174 | * Mark entry to slowpath before doing the pickup test to make |
230 | ret = xen_spin_trylock(lock); | 175 | * sure we don't deadlock with an unlocker. |
231 | if (ret) { | 176 | */ |
232 | ADD_STATS(taken_slow_pickup, 1); | 177 | __ticket_enter_slowpath(lock); |
233 | 178 | ||
234 | /* | 179 | /* |
235 | * If we interrupted another spinlock while it | 180 | * check again make sure it didn't become free while |
236 | * was blocking, make sure it doesn't block | 181 | * we weren't looking |
237 | * without rechecking the lock. | 182 | */ |
238 | */ | 183 | if (ACCESS_ONCE(lock->tickets.head) == want) { |
239 | if (prev != NULL) | 184 | add_stats(TAKEN_SLOW_PICKUP, 1); |
240 | xen_set_irq_pending(irq); | 185 | goto out; |
241 | goto out; | 186 | } |
242 | } | ||
243 | 187 | ||
244 | flags = arch_local_save_flags(); | 188 | /* Allow interrupts while blocked */ |
245 | if (irq_enable) { | 189 | local_irq_restore(flags); |
246 | ADD_STATS(taken_slow_irqenable, 1); | ||
247 | raw_local_irq_enable(); | ||
248 | } | ||
249 | 190 | ||
250 | /* | 191 | /* |
251 | * Block until irq becomes pending. If we're | 192 | * If an interrupt happens here, it will leave the wakeup irq |
252 | * interrupted at this point (after the trylock but | 193 | * pending, which will cause xen_poll_irq() to return |
253 | * before entering the block), then the nested lock | 194 | * immediately. |
254 | * handler guarantees that the irq will be left | 195 | */ |
255 | * pending if there's any chance the lock became free; | ||
256 | * xen_poll_irq() returns immediately if the irq is | ||
257 | * pending. | ||
258 | */ | ||
259 | xen_poll_irq(irq); | ||
260 | 196 | ||
261 | raw_local_irq_restore(flags); | 197 | /* Block until irq becomes pending (or perhaps a spurious wakeup) */ |
198 | xen_poll_irq(irq); | ||
199 | add_stats(TAKEN_SLOW_SPURIOUS, !xen_test_irq_pending(irq)); | ||
262 | 200 | ||
263 | ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); | 201 | local_irq_save(flags); |
264 | } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ | ||
265 | 202 | ||
266 | kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); | 203 | kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); |
267 | |||
268 | out: | 204 | out: |
269 | unspinning_lock(xl, prev); | 205 | cpumask_clear_cpu(cpu, &waiting_cpus); |
270 | spin_time_accum_blocked(start); | 206 | w->lock = NULL; |
271 | |||
272 | return ret; | ||
273 | } | ||
274 | |||
275 | static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable) | ||
276 | { | ||
277 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
278 | unsigned timeout; | ||
279 | u8 oldval; | ||
280 | u64 start_spin; | ||
281 | |||
282 | ADD_STATS(taken, 1); | ||
283 | |||
284 | start_spin = spin_time_start(); | ||
285 | |||
286 | do { | ||
287 | u64 start_spin_fast = spin_time_start(); | ||
288 | |||
289 | timeout = TIMEOUT; | ||
290 | |||
291 | asm("1: xchgb %1,%0\n" | ||
292 | " testb %1,%1\n" | ||
293 | " jz 3f\n" | ||
294 | "2: rep;nop\n" | ||
295 | " cmpb $0,%0\n" | ||
296 | " je 1b\n" | ||
297 | " dec %2\n" | ||
298 | " jnz 2b\n" | ||
299 | "3:\n" | ||
300 | : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) | ||
301 | : "1" (1) | ||
302 | : "memory"); | ||
303 | 207 | ||
304 | spin_time_accum_spinning(start_spin_fast); | 208 | local_irq_restore(flags); |
305 | 209 | ||
306 | } while (unlikely(oldval != 0 && | 210 | spin_time_accum_blocked(start); |
307 | (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable)))); | ||
308 | |||
309 | spin_time_accum_total(start_spin); | ||
310 | } | ||
311 | |||
312 | static void xen_spin_lock(struct arch_spinlock *lock) | ||
313 | { | ||
314 | __xen_spin_lock(lock, false); | ||
315 | } | ||
316 | |||
317 | static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags) | ||
318 | { | ||
319 | __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags)); | ||
320 | } | 211 | } |
212 | PV_CALLEE_SAVE_REGS_THUNK(xen_lock_spinning); | ||
321 | 213 | ||
322 | static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) | 214 | static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next) |
323 | { | 215 | { |
324 | int cpu; | 216 | int cpu; |
325 | 217 | ||
326 | ADD_STATS(released_slow, 1); | 218 | add_stats(RELEASED_SLOW, 1); |
219 | |||
220 | for_each_cpu(cpu, &waiting_cpus) { | ||
221 | const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu); | ||
327 | 222 | ||
328 | for_each_online_cpu(cpu) { | 223 | /* Make sure we read lock before want */ |
329 | /* XXX should mix up next cpu selection */ | 224 | if (ACCESS_ONCE(w->lock) == lock && |
330 | if (per_cpu(lock_spinners, cpu) == xl) { | 225 | ACCESS_ONCE(w->want) == next) { |
331 | ADD_STATS(released_slow_kicked, 1); | 226 | add_stats(RELEASED_SLOW_KICKED, 1); |
332 | xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); | 227 | xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); |
228 | break; | ||
333 | } | 229 | } |
334 | } | 230 | } |
335 | } | 231 | } |
336 | 232 | ||
337 | static void xen_spin_unlock(struct arch_spinlock *lock) | ||
338 | { | ||
339 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
340 | |||
341 | ADD_STATS(released, 1); | ||
342 | |||
343 | smp_wmb(); /* make sure no writes get moved after unlock */ | ||
344 | xl->lock = 0; /* release lock */ | ||
345 | |||
346 | /* | ||
347 | * Make sure unlock happens before checking for waiting | ||
348 | * spinners. We need a strong barrier to enforce the | ||
349 | * write-read ordering to different memory locations, as the | ||
350 | * CPU makes no implied guarantees about their ordering. | ||
351 | */ | ||
352 | mb(); | ||
353 | |||
354 | if (unlikely(xl->spinners)) | ||
355 | xen_spin_unlock_slow(xl); | ||
356 | } | ||
357 | |||
358 | static irqreturn_t dummy_handler(int irq, void *dev_id) | 233 | static irqreturn_t dummy_handler(int irq, void *dev_id) |
359 | { | 234 | { |
360 | BUG(); | 235 | BUG(); |
@@ -408,6 +283,8 @@ void xen_uninit_lock_cpu(int cpu) | |||
408 | per_cpu(irq_name, cpu) = NULL; | 283 | per_cpu(irq_name, cpu) = NULL; |
409 | } | 284 | } |
410 | 285 | ||
286 | static bool xen_pvspin __initdata = true; | ||
287 | |||
411 | void __init xen_init_spinlocks(void) | 288 | void __init xen_init_spinlocks(void) |
412 | { | 289 | { |
413 | /* | 290 | /* |
@@ -417,15 +294,23 @@ void __init xen_init_spinlocks(void) | |||
417 | if (xen_hvm_domain()) | 294 | if (xen_hvm_domain()) |
418 | return; | 295 | return; |
419 | 296 | ||
420 | BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t)); | 297 | if (!xen_pvspin) { |
298 | printk(KERN_DEBUG "xen: PV spinlocks disabled\n"); | ||
299 | return; | ||
300 | } | ||
421 | 301 | ||
422 | pv_lock_ops.spin_is_locked = xen_spin_is_locked; | 302 | static_key_slow_inc(¶virt_ticketlocks_enabled); |
423 | pv_lock_ops.spin_is_contended = xen_spin_is_contended; | 303 | |
424 | pv_lock_ops.spin_lock = xen_spin_lock; | 304 | pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning); |
425 | pv_lock_ops.spin_lock_flags = xen_spin_lock_flags; | 305 | pv_lock_ops.unlock_kick = xen_unlock_kick; |
426 | pv_lock_ops.spin_trylock = xen_spin_trylock; | 306 | } |
427 | pv_lock_ops.spin_unlock = xen_spin_unlock; | 307 | |
308 | static __init int xen_parse_nopvspin(char *arg) | ||
309 | { | ||
310 | xen_pvspin = false; | ||
311 | return 0; | ||
428 | } | 312 | } |
313 | early_param("xen_nopvspin", xen_parse_nopvspin); | ||
429 | 314 | ||
430 | #ifdef CONFIG_XEN_DEBUG_FS | 315 | #ifdef CONFIG_XEN_DEBUG_FS |
431 | 316 | ||
@@ -442,37 +327,21 @@ static int __init xen_spinlock_debugfs(void) | |||
442 | 327 | ||
443 | debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); | 328 | debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); |
444 | 329 | ||
445 | debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout); | ||
446 | |||
447 | debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken); | ||
448 | debugfs_create_u32("taken_slow", 0444, d_spin_debug, | 330 | debugfs_create_u32("taken_slow", 0444, d_spin_debug, |
449 | &spinlock_stats.taken_slow); | 331 | &spinlock_stats.contention_stats[TAKEN_SLOW]); |
450 | debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug, | ||
451 | &spinlock_stats.taken_slow_nested); | ||
452 | debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, | 332 | debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, |
453 | &spinlock_stats.taken_slow_pickup); | 333 | &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]); |
454 | debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, | 334 | debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, |
455 | &spinlock_stats.taken_slow_spurious); | 335 | &spinlock_stats.contention_stats[TAKEN_SLOW_SPURIOUS]); |
456 | debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug, | ||
457 | &spinlock_stats.taken_slow_irqenable); | ||
458 | 336 | ||
459 | debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released); | ||
460 | debugfs_create_u32("released_slow", 0444, d_spin_debug, | 337 | debugfs_create_u32("released_slow", 0444, d_spin_debug, |
461 | &spinlock_stats.released_slow); | 338 | &spinlock_stats.contention_stats[RELEASED_SLOW]); |
462 | debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, | 339 | debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, |
463 | &spinlock_stats.released_slow_kicked); | 340 | &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]); |
464 | 341 | ||
465 | debugfs_create_u64("time_spinning", 0444, d_spin_debug, | ||
466 | &spinlock_stats.time_spinning); | ||
467 | debugfs_create_u64("time_blocked", 0444, d_spin_debug, | 342 | debugfs_create_u64("time_blocked", 0444, d_spin_debug, |
468 | &spinlock_stats.time_blocked); | 343 | &spinlock_stats.time_blocked); |
469 | debugfs_create_u64("time_total", 0444, d_spin_debug, | ||
470 | &spinlock_stats.time_total); | ||
471 | 344 | ||
472 | debugfs_create_u32_array("histo_total", 0444, d_spin_debug, | ||
473 | spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1); | ||
474 | debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug, | ||
475 | spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1); | ||
476 | debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, | 345 | debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, |
477 | spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); | 346 | spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); |
478 | 347 | ||