aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaghavendra K T <raghavendra.kt@linux.vnet.ibm.com>2015-02-06 06:14:11 -0500
committerIngo Molnar <mingo@kernel.org>2015-02-18 08:53:49 -0500
commitd6abfdb2022368d8c6c4be3f11a06656601a6cc2 (patch)
tree2569d8da91393a807fdb479a25eaa5087673c49e
parent8d1e5a1a1ccf5ae9d8a5a0ee7960202ccb0c5429 (diff)
x86/spinlocks/paravirt: Fix memory corruption on unlock
Paravirt spinlock clears slowpath flag after doing unlock. As explained by Linus currently it does: prev = *lock; add_smp(&lock->tickets.head, TICKET_LOCK_INC); /* add_smp() is a full mb() */ if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG)) __ticket_unlock_slowpath(lock, prev); which is *exactly* the kind of things you cannot do with spinlocks, because after you've done the "add_smp()" and released the spinlock for the fast-path, you can't access the spinlock any more. Exactly because a fast-path lock might come in, and release the whole data structure. Linus suggested that we should not do any writes to lock after unlock(), and we can move slowpath clearing to fastpath lock. So this patch implements the fix with: 1. Moving slowpath flag to head (Oleg): Unlocked locks don't care about the slowpath flag; therefore we can keep it set after the last unlock, and clear it again on the first (try)lock. -- this removes the write after unlock. note that keeping slowpath flag would result in unnecessary kicks. By moving the slowpath flag from the tail to the head ticket we also avoid the need to access both the head and tail tickets on unlock. 2. use xadd to avoid read/write after unlock that checks the need for unlock_kick (Linus): We further avoid the need for a read-after-release by using xadd; the prev head value will include the slowpath flag and indicate if we need to do PV kicking of suspended spinners -- on modern chips xadd isn't (much) more expensive than an add + load. Result: setup: 16core (32 cpu +ht sandy bridge 8GB 16vcpu guest) benchmark overcommit %improve kernbench 1x -0.13 kernbench 2x 0.02 dbench 1x -1.77 dbench 2x -0.63 [Jeremy: Hinted missing TICKET_LOCK_INC for kick] [Oleg: Moved slowpath flag to head, ticket_equals idea] [PeterZ: Added detailed changelog] Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Reported-by: Sasha Levin <sasha.levin@oracle.com> Tested-by: Sasha Levin <sasha.levin@oracle.com> Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Cc: Andrew Jones <drjones@redhat.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Christoph Lameter <cl@linux.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Dave Jones <davej@redhat.com> Cc: David Vrabel <david.vrabel@citrix.com> Cc: Fernando Luis Vázquez Cao <fernando_b1@lab.ntt.co.jp> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Ulrich Obergfell <uobergfe@redhat.com> Cc: Waiman Long <Waiman.Long@hp.com> Cc: a.ryabinin@samsung.com Cc: dave@stgolabs.net Cc: hpa@zytor.com Cc: jasowang@redhat.com Cc: jeremy@goop.org Cc: paul.gortmaker@windriver.com Cc: riel@redhat.com Cc: tglx@linutronix.de Cc: waiman.long@hp.com Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/20150215173043.GA7471@linux.vnet.ibm.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/include/asm/spinlock.h94
-rw-r--r--arch/x86/kernel/kvm.c13
-rw-r--r--arch/x86/xen/spinlock.c13
3 files changed, 64 insertions, 56 deletions
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 625660f8a2fc..cf87de3fc390 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -46,7 +46,7 @@ static __always_inline bool static_key_false(struct static_key *key);
46 46
47static inline void __ticket_enter_slowpath(arch_spinlock_t *lock) 47static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
48{ 48{
49 set_bit(0, (volatile unsigned long *)&lock->tickets.tail); 49 set_bit(0, (volatile unsigned long *)&lock->tickets.head);
50} 50}
51 51
52#else /* !CONFIG_PARAVIRT_SPINLOCKS */ 52#else /* !CONFIG_PARAVIRT_SPINLOCKS */
@@ -60,10 +60,30 @@ static inline void __ticket_unlock_kick(arch_spinlock_t *lock,
60} 60}
61 61
62#endif /* CONFIG_PARAVIRT_SPINLOCKS */ 62#endif /* CONFIG_PARAVIRT_SPINLOCKS */
63static inline int __tickets_equal(__ticket_t one, __ticket_t two)
64{
65 return !((one ^ two) & ~TICKET_SLOWPATH_FLAG);
66}
67
68static inline void __ticket_check_and_clear_slowpath(arch_spinlock_t *lock,
69 __ticket_t head)
70{
71 if (head & TICKET_SLOWPATH_FLAG) {
72 arch_spinlock_t old, new;
73
74 old.tickets.head = head;
75 new.tickets.head = head & ~TICKET_SLOWPATH_FLAG;
76 old.tickets.tail = new.tickets.head + TICKET_LOCK_INC;
77 new.tickets.tail = old.tickets.tail;
78
79 /* try to clear slowpath flag when there are no contenders */
80 cmpxchg(&lock->head_tail, old.head_tail, new.head_tail);
81 }
82}
63 83
64static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) 84static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
65{ 85{
66 return lock.tickets.head == lock.tickets.tail; 86 return __tickets_equal(lock.tickets.head, lock.tickets.tail);
67} 87}
68 88
69/* 89/*
@@ -87,18 +107,21 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
87 if (likely(inc.head == inc.tail)) 107 if (likely(inc.head == inc.tail))
88 goto out; 108 goto out;
89 109
90 inc.tail &= ~TICKET_SLOWPATH_FLAG;
91 for (;;) { 110 for (;;) {
92 unsigned count = SPIN_THRESHOLD; 111 unsigned count = SPIN_THRESHOLD;
93 112
94 do { 113 do {
95 if (READ_ONCE(lock->tickets.head) == inc.tail) 114 inc.head = READ_ONCE(lock->tickets.head);
96 goto out; 115 if (__tickets_equal(inc.head, inc.tail))
116 goto clear_slowpath;
97 cpu_relax(); 117 cpu_relax();
98 } while (--count); 118 } while (--count);
99 __ticket_lock_spinning(lock, inc.tail); 119 __ticket_lock_spinning(lock, inc.tail);
100 } 120 }
101out: barrier(); /* make sure nothing creeps before the lock is taken */ 121clear_slowpath:
122 __ticket_check_and_clear_slowpath(lock, inc.head);
123out:
124 barrier(); /* make sure nothing creeps before the lock is taken */
102} 125}
103 126
104static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) 127static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
@@ -106,56 +129,30 @@ static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
106 arch_spinlock_t old, new; 129 arch_spinlock_t old, new;
107 130
108 old.tickets = READ_ONCE(lock->tickets); 131 old.tickets = READ_ONCE(lock->tickets);
109 if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG)) 132 if (!__tickets_equal(old.tickets.head, old.tickets.tail))
110 return 0; 133 return 0;
111 134
112 new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT); 135 new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT);
136 new.head_tail &= ~TICKET_SLOWPATH_FLAG;
113 137
114 /* cmpxchg is a full barrier, so nothing can move before it */ 138 /* cmpxchg is a full barrier, so nothing can move before it */
115 return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; 139 return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
116} 140}
117 141
118static inline void __ticket_unlock_slowpath(arch_spinlock_t *lock,
119 arch_spinlock_t old)
120{
121 arch_spinlock_t new;
122
123 BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
124
125 /* Perform the unlock on the "before" copy */
126 old.tickets.head += TICKET_LOCK_INC;
127
128 /* Clear the slowpath flag */
129 new.head_tail = old.head_tail & ~(TICKET_SLOWPATH_FLAG << TICKET_SHIFT);
130
131 /*
132 * If the lock is uncontended, clear the flag - use cmpxchg in
133 * case it changes behind our back though.
134 */
135 if (new.tickets.head != new.tickets.tail ||
136 cmpxchg(&lock->head_tail, old.head_tail,
137 new.head_tail) != old.head_tail) {
138 /*
139 * Lock still has someone queued for it, so wake up an
140 * appropriate waiter.
141 */
142 __ticket_unlock_kick(lock, old.tickets.head);
143 }
144}
145
146static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) 142static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
147{ 143{
148 if (TICKET_SLOWPATH_FLAG && 144 if (TICKET_SLOWPATH_FLAG &&
149 static_key_false(&paravirt_ticketlocks_enabled)) { 145 static_key_false(&paravirt_ticketlocks_enabled)) {
150 arch_spinlock_t prev; 146 __ticket_t head;
151 147
152 prev = *lock; 148 BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
153 add_smp(&lock->tickets.head, TICKET_LOCK_INC);
154 149
155 /* add_smp() is a full mb() */ 150 head = xadd(&lock->tickets.head, TICKET_LOCK_INC);
156 151
157 if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG)) 152 if (unlikely(head & TICKET_SLOWPATH_FLAG)) {
158 __ticket_unlock_slowpath(lock, prev); 153 head &= ~TICKET_SLOWPATH_FLAG;
154 __ticket_unlock_kick(lock, (head + TICKET_LOCK_INC));
155 }
159 } else 156 } else
160 __add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX); 157 __add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX);
161} 158}
@@ -164,14 +161,15 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock)
164{ 161{
165 struct __raw_tickets tmp = READ_ONCE(lock->tickets); 162 struct __raw_tickets tmp = READ_ONCE(lock->tickets);
166 163
167 return tmp.tail != tmp.head; 164 return !__tickets_equal(tmp.tail, tmp.head);
168} 165}
169 166
170static inline int arch_spin_is_contended(arch_spinlock_t *lock) 167static inline int arch_spin_is_contended(arch_spinlock_t *lock)
171{ 168{
172 struct __raw_tickets tmp = READ_ONCE(lock->tickets); 169 struct __raw_tickets tmp = READ_ONCE(lock->tickets);
173 170
174 return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC; 171 tmp.head &= ~TICKET_SLOWPATH_FLAG;
172 return (tmp.tail - tmp.head) > TICKET_LOCK_INC;
175} 173}
176#define arch_spin_is_contended arch_spin_is_contended 174#define arch_spin_is_contended arch_spin_is_contended
177 175
@@ -183,16 +181,16 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
183 181
184static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) 182static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
185{ 183{
186 __ticket_t head = ACCESS_ONCE(lock->tickets.head); 184 __ticket_t head = READ_ONCE(lock->tickets.head);
187 185
188 for (;;) { 186 for (;;) {
189 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); 187 struct __raw_tickets tmp = READ_ONCE(lock->tickets);
190 /* 188 /*
191 * We need to check "unlocked" in a loop, tmp.head == head 189 * We need to check "unlocked" in a loop, tmp.head == head
192 * can be false positive because of overflow. 190 * can be false positive because of overflow.
193 */ 191 */
194 if (tmp.head == (tmp.tail & ~TICKET_SLOWPATH_FLAG) || 192 if (__tickets_equal(tmp.head, tmp.tail) ||
195 tmp.head != head) 193 !__tickets_equal(tmp.head, head))
196 break; 194 break;
197 195
198 cpu_relax(); 196 cpu_relax();
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 94f643484300..e354cc6446ab 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -609,7 +609,7 @@ static inline void check_zero(void)
609 u8 ret; 609 u8 ret;
610 u8 old; 610 u8 old;
611 611
612 old = ACCESS_ONCE(zero_stats); 612 old = READ_ONCE(zero_stats);
613 if (unlikely(old)) { 613 if (unlikely(old)) {
614 ret = cmpxchg(&zero_stats, old, 0); 614 ret = cmpxchg(&zero_stats, old, 0);
615 /* This ensures only one fellow resets the stat */ 615 /* This ensures only one fellow resets the stat */
@@ -727,6 +727,7 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
727 int cpu; 727 int cpu;
728 u64 start; 728 u64 start;
729 unsigned long flags; 729 unsigned long flags;
730 __ticket_t head;
730 731
731 if (in_nmi()) 732 if (in_nmi())
732 return; 733 return;
@@ -768,11 +769,15 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
768 */ 769 */
769 __ticket_enter_slowpath(lock); 770 __ticket_enter_slowpath(lock);
770 771
772 /* make sure enter_slowpath, which is atomic does not cross the read */
773 smp_mb__after_atomic();
774
771 /* 775 /*
772 * check again make sure it didn't become free while 776 * check again make sure it didn't become free while
773 * we weren't looking. 777 * we weren't looking.
774 */ 778 */
775 if (ACCESS_ONCE(lock->tickets.head) == want) { 779 head = READ_ONCE(lock->tickets.head);
780 if (__tickets_equal(head, want)) {
776 add_stats(TAKEN_SLOW_PICKUP, 1); 781 add_stats(TAKEN_SLOW_PICKUP, 1);
777 goto out; 782 goto out;
778 } 783 }
@@ -803,8 +808,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
803 add_stats(RELEASED_SLOW, 1); 808 add_stats(RELEASED_SLOW, 1);
804 for_each_cpu(cpu, &waiting_cpus) { 809 for_each_cpu(cpu, &waiting_cpus) {
805 const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu); 810 const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu);
806 if (ACCESS_ONCE(w->lock) == lock && 811 if (READ_ONCE(w->lock) == lock &&
807 ACCESS_ONCE(w->want) == ticket) { 812 READ_ONCE(w->want) == ticket) {
808 add_stats(RELEASED_SLOW_KICKED, 1); 813 add_stats(RELEASED_SLOW_KICKED, 1);
809 kvm_kick_cpu(cpu); 814 kvm_kick_cpu(cpu);
810 break; 815 break;
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 23b45eb9a89c..956374c1edbc 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -41,7 +41,7 @@ static u8 zero_stats;
41static inline void check_zero(void) 41static inline void check_zero(void)
42{ 42{
43 u8 ret; 43 u8 ret;
44 u8 old = ACCESS_ONCE(zero_stats); 44 u8 old = READ_ONCE(zero_stats);
45 if (unlikely(old)) { 45 if (unlikely(old)) {
46 ret = cmpxchg(&zero_stats, old, 0); 46 ret = cmpxchg(&zero_stats, old, 0);
47 /* This ensures only one fellow resets the stat */ 47 /* This ensures only one fellow resets the stat */
@@ -112,6 +112,7 @@ __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
112 struct xen_lock_waiting *w = this_cpu_ptr(&lock_waiting); 112 struct xen_lock_waiting *w = this_cpu_ptr(&lock_waiting);
113 int cpu = smp_processor_id(); 113 int cpu = smp_processor_id();
114 u64 start; 114 u64 start;
115 __ticket_t head;
115 unsigned long flags; 116 unsigned long flags;
116 117
117 /* If kicker interrupts not initialized yet, just spin */ 118 /* If kicker interrupts not initialized yet, just spin */
@@ -159,11 +160,15 @@ __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
159 */ 160 */
160 __ticket_enter_slowpath(lock); 161 __ticket_enter_slowpath(lock);
161 162
163 /* make sure enter_slowpath, which is atomic does not cross the read */
164 smp_mb__after_atomic();
165
162 /* 166 /*
163 * check again make sure it didn't become free while 167 * check again make sure it didn't become free while
164 * we weren't looking 168 * we weren't looking
165 */ 169 */
166 if (ACCESS_ONCE(lock->tickets.head) == want) { 170 head = READ_ONCE(lock->tickets.head);
171 if (__tickets_equal(head, want)) {
167 add_stats(TAKEN_SLOW_PICKUP, 1); 172 add_stats(TAKEN_SLOW_PICKUP, 1);
168 goto out; 173 goto out;
169 } 174 }
@@ -204,8 +209,8 @@ static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
204 const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu); 209 const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu);
205 210
206 /* Make sure we read lock before want */ 211 /* Make sure we read lock before want */
207 if (ACCESS_ONCE(w->lock) == lock && 212 if (READ_ONCE(w->lock) == lock &&
208 ACCESS_ONCE(w->want) == next) { 213 READ_ONCE(w->want) == next) {
209 add_stats(RELEASED_SLOW_KICKED, 1); 214 add_stats(RELEASED_SLOW_KICKED, 1);
210 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); 215 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
211 break; 216 break;