aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@tilera.com>2011-03-01 13:30:15 -0500
committerChris Metcalf <cmetcalf@tilera.com>2011-03-10 16:10:41 -0500
commit3c5ead52ed68406c0ee789024c4ae581be8bcee4 (patch)
treecd634aba3710115640b372b4fc49fee5ead75acf
parent5c7707554858eca8903706b6df7cba5c0f802244 (diff)
arch/tile: fix deadlock bugs in rwlock implementation
The first issue fixed in this patch is that pending rwlock write locks could lock out new readers; this could cause a deadlock if a read lock was held on cpu 1, a write lock was then attempted on cpu 2 and was pending, and cpu 1 was interrupted and attempted to re-acquire a read lock. The write lock code was modified to not lock out new readers. The second issue fixed is that there was a narrow race window where a tns instruction had been issued (setting the lock value to "1") and the store instruction to reset the lock value correctly had not yet been issued. In this case, if an interrupt occurred and the same cpu then tried to manipulate the lock, it would find the lock value set to "1" and spin forever, assuming some other cpu was partway through updating it. The fix is to enforce an interrupt critical section around the tns/store pair. In addition, this change now arranges to always validate that after a readlock we have not wrapped around the count of readers, which is only eight bits. Since these changes make the rwlock "fast path" code heavier weight, I decided to move all the rwlock code all out of line, leaving only the conventional spinlock code with fastpath inlines. Since the read_lock and read_trylock implementations ended up very similar, I just expressed read_lock in terms of read_trylock. As part of this change I also eliminate support for the now-obsolete tns_atomic mode. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
-rw-r--r--arch/tile/include/asm/spinlock_32.h83
-rw-r--r--arch/tile/lib/spinlock_32.c161
2 files changed, 103 insertions, 141 deletions
diff --git a/arch/tile/include/asm/spinlock_32.h b/arch/tile/include/asm/spinlock_32.h
index 88efdde8dd2b..a8f2c6e31a87 100644
--- a/arch/tile/include/asm/spinlock_32.h
+++ b/arch/tile/include/asm/spinlock_32.h
@@ -78,13 +78,6 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock);
78#define _RD_COUNT_SHIFT 24 78#define _RD_COUNT_SHIFT 24
79#define _RD_COUNT_WIDTH 8 79#define _RD_COUNT_WIDTH 8
80 80
81/* Internal functions; do not use. */
82void arch_read_lock_slow(arch_rwlock_t *, u32);
83int arch_read_trylock_slow(arch_rwlock_t *);
84void arch_read_unlock_slow(arch_rwlock_t *);
85void arch_write_lock_slow(arch_rwlock_t *, u32);
86void arch_write_unlock_slow(arch_rwlock_t *, u32);
87
88/** 81/**
89 * arch_read_can_lock() - would read_trylock() succeed? 82 * arch_read_can_lock() - would read_trylock() succeed?
90 */ 83 */
@@ -104,94 +97,32 @@ static inline int arch_write_can_lock(arch_rwlock_t *rwlock)
104/** 97/**
105 * arch_read_lock() - acquire a read lock. 98 * arch_read_lock() - acquire a read lock.
106 */ 99 */
107static inline void arch_read_lock(arch_rwlock_t *rwlock) 100void arch_read_lock(arch_rwlock_t *rwlock);
108{
109 u32 val = __insn_tns((int *)&rwlock->lock);
110 if (unlikely(val << _RD_COUNT_WIDTH)) {
111 arch_read_lock_slow(rwlock, val);
112 return;
113 }
114 rwlock->lock = val + (1 << _RD_COUNT_SHIFT);
115}
116 101
117/** 102/**
118 * arch_read_lock() - acquire a write lock. 103 * arch_write_lock() - acquire a write lock.
119 */ 104 */
120static inline void arch_write_lock(arch_rwlock_t *rwlock) 105void arch_write_lock(arch_rwlock_t *rwlock);
121{
122 u32 val = __insn_tns((int *)&rwlock->lock);
123 if (unlikely(val != 0)) {
124 arch_write_lock_slow(rwlock, val);
125 return;
126 }
127 rwlock->lock = 1 << _WR_NEXT_SHIFT;
128}
129 106
130/** 107/**
131 * arch_read_trylock() - try to acquire a read lock. 108 * arch_read_trylock() - try to acquire a read lock.
132 */ 109 */
133static inline int arch_read_trylock(arch_rwlock_t *rwlock) 110int arch_read_trylock(arch_rwlock_t *rwlock);
134{
135 int locked;
136 u32 val = __insn_tns((int *)&rwlock->lock);
137 if (unlikely(val & 1))
138 return arch_read_trylock_slow(rwlock);
139 locked = (val << _RD_COUNT_WIDTH) == 0;
140 rwlock->lock = val + (locked << _RD_COUNT_SHIFT);
141 return locked;
142}
143 111
144/** 112/**
145 * arch_write_trylock() - try to acquire a write lock. 113 * arch_write_trylock() - try to acquire a write lock.
146 */ 114 */
147static inline int arch_write_trylock(arch_rwlock_t *rwlock) 115int arch_write_trylock(arch_rwlock_t *rwlock);
148{
149 u32 val = __insn_tns((int *)&rwlock->lock);
150
151 /*
152 * If a tns is in progress, or there's a waiting or active locker,
153 * or active readers, we can't take the lock, so give up.
154 */
155 if (unlikely(val != 0)) {
156 if (!(val & 1))
157 rwlock->lock = val;
158 return 0;
159 }
160
161 /* Set the "next" field to mark it locked. */
162 rwlock->lock = 1 << _WR_NEXT_SHIFT;
163 return 1;
164}
165 116
166/** 117/**
167 * arch_read_unlock() - release a read lock. 118 * arch_read_unlock() - release a read lock.
168 */ 119 */
169static inline void arch_read_unlock(arch_rwlock_t *rwlock) 120void arch_read_unlock(arch_rwlock_t *rwlock);
170{
171 u32 val;
172 mb(); /* guarantee anything modified under the lock is visible */
173 val = __insn_tns((int *)&rwlock->lock);
174 if (unlikely(val & 1)) {
175 arch_read_unlock_slow(rwlock);
176 return;
177 }
178 rwlock->lock = val - (1 << _RD_COUNT_SHIFT);
179}
180 121
181/** 122/**
182 * arch_write_unlock() - release a write lock. 123 * arch_write_unlock() - release a write lock.
183 */ 124 */
184static inline void arch_write_unlock(arch_rwlock_t *rwlock) 125void arch_write_unlock(arch_rwlock_t *rwlock);
185{
186 u32 val;
187 mb(); /* guarantee anything modified under the lock is visible */
188 val = __insn_tns((int *)&rwlock->lock);
189 if (unlikely(val != (1 << _WR_NEXT_SHIFT))) {
190 arch_write_unlock_slow(rwlock, val);
191 return;
192 }
193 rwlock->lock = 0;
194}
195 126
196#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) 127#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
197#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) 128#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
index 5cd1c4004eca..cb0999fb64b4 100644
--- a/arch/tile/lib/spinlock_32.c
+++ b/arch/tile/lib/spinlock_32.c
@@ -15,6 +15,7 @@
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <asm/processor.h> 17#include <asm/processor.h>
18#include <arch/spr_def.h>
18 19
19#include "spinlock_common.h" 20#include "spinlock_common.h"
20 21
@@ -91,75 +92,75 @@ EXPORT_SYMBOL(arch_spin_unlock_wait);
91#define RD_COUNT_MASK ((1 << RD_COUNT_WIDTH) - 1) 92#define RD_COUNT_MASK ((1 << RD_COUNT_WIDTH) - 1)
92 93
93 94
94/* Lock the word, spinning until there are no tns-ers. */ 95/*
95static inline u32 get_rwlock(arch_rwlock_t *rwlock) 96 * We can get the read lock if everything but the reader bits (which
96{ 97 * are in the high part of the word) is zero, i.e. no active or
97 u32 iterations = 0; 98 * waiting writers, no tns.
98 for (;;) { 99 *
99 u32 val = __insn_tns((int *)&rwlock->lock); 100 * We guard the tns/store-back with an interrupt critical section to
100 if (unlikely(val & 1)) { 101 * preserve the semantic that the same read lock can be acquired in an
101 delay_backoff(iterations++); 102 * interrupt context.
102 continue; 103 */
103 } 104inline int arch_read_trylock(arch_rwlock_t *rwlock)
104 return val;
105 }
106}
107
108int arch_read_trylock_slow(arch_rwlock_t *rwlock)
109{
110 u32 val = get_rwlock(rwlock);
111 int locked = (val << RD_COUNT_WIDTH) == 0;
112 rwlock->lock = val + (locked << RD_COUNT_SHIFT);
113 return locked;
114}
115EXPORT_SYMBOL(arch_read_trylock_slow);
116
117void arch_read_unlock_slow(arch_rwlock_t *rwlock)
118{
119 u32 val = get_rwlock(rwlock);
120 rwlock->lock = val - (1 << RD_COUNT_SHIFT);
121}
122EXPORT_SYMBOL(arch_read_unlock_slow);
123
124void arch_write_unlock_slow(arch_rwlock_t *rwlock, u32 val)
125{ 105{
126 u32 eq, mask = 1 << WR_CURR_SHIFT; 106 u32 val;
127 while (unlikely(val & 1)) { 107 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1);
128 /* Limited backoff since we are the highest-priority task. */ 108 val = __insn_tns((int *)&rwlock->lock);
129 relax(4); 109 if (likely((val << _RD_COUNT_WIDTH) == 0)) {
130 val = __insn_tns((int *)&rwlock->lock); 110 val += 1 << RD_COUNT_SHIFT;
111 rwlock->lock = val;
112 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);
113 BUG_ON(val == 0); /* we don't expect wraparound */
114 return 1;
131 } 115 }
132 val = __insn_addb(val, mask); 116 if ((val & 1) == 0)
133 eq = __insn_seqb(val, val << (WR_CURR_SHIFT - WR_NEXT_SHIFT)); 117 rwlock->lock = val;
134 val = __insn_mz(eq & mask, val); 118 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);
135 rwlock->lock = val; 119 return 0;
136} 120}
137EXPORT_SYMBOL(arch_write_unlock_slow); 121EXPORT_SYMBOL(arch_read_trylock);
138 122
139/* 123/*
140 * We spin until everything but the reader bits (which are in the high 124 * Spin doing arch_read_trylock() until we acquire the lock.
141 * part of the word) are zero, i.e. no active or waiting writers, no tns.
142 *
143 * ISSUE: This approach can permanently starve readers. A reader who sees 125 * ISSUE: This approach can permanently starve readers. A reader who sees
144 * a writer could instead take a ticket lock (just like a writer would), 126 * a writer could instead take a ticket lock (just like a writer would),
145 * and atomically enter read mode (with 1 reader) when it gets the ticket. 127 * and atomically enter read mode (with 1 reader) when it gets the ticket.
146 * This way both readers and writers will always make forward progress 128 * This way both readers and writers would always make forward progress
147 * in a finite time. 129 * in a finite time.
148 */ 130 */
149void arch_read_lock_slow(arch_rwlock_t *rwlock, u32 val) 131void arch_read_lock(arch_rwlock_t *rwlock)
150{ 132{
151 u32 iterations = 0; 133 u32 iterations = 0;
152 do { 134 while (unlikely(!arch_read_trylock(rwlock)))
153 if (!(val & 1))
154 rwlock->lock = val;
155 delay_backoff(iterations++); 135 delay_backoff(iterations++);
136}
137EXPORT_SYMBOL(arch_read_lock);
138
139void arch_read_unlock(arch_rwlock_t *rwlock)
140{
141 u32 val, iterations = 0;
142
143 mb(); /* guarantee anything modified under the lock is visible */
144 for (;;) {
145 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1);
156 val = __insn_tns((int *)&rwlock->lock); 146 val = __insn_tns((int *)&rwlock->lock);
157 } while ((val << RD_COUNT_WIDTH) != 0); 147 if (likely(val & 1) == 0) {
158 rwlock->lock = val + (1 << RD_COUNT_SHIFT); 148 rwlock->lock = val - (1 << _RD_COUNT_SHIFT);
149 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);
150 break;
151 }
152 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);
153 delay_backoff(iterations++);
154 }
159} 155}
160EXPORT_SYMBOL(arch_read_lock_slow); 156EXPORT_SYMBOL(arch_read_unlock);
161 157
162void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val) 158/*
159 * We don't need an interrupt critical section here (unlike for
160 * arch_read_lock) since we should never use a bare write lock where
161 * it could be interrupted by code that could try to re-acquire it.
162 */
163void arch_write_lock(arch_rwlock_t *rwlock)
163{ 164{
164 /* 165 /*
165 * The trailing underscore on this variable (and curr_ below) 166 * The trailing underscore on this variable (and curr_ below)
@@ -168,6 +169,12 @@ void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val)
168 */ 169 */
169 u32 my_ticket_; 170 u32 my_ticket_;
170 u32 iterations = 0; 171 u32 iterations = 0;
172 u32 val = __insn_tns((int *)&rwlock->lock);
173
174 if (likely(val == 0)) {
175 rwlock->lock = 1 << _WR_NEXT_SHIFT;
176 return;
177 }
171 178
172 /* 179 /*
173 * Wait until there are no readers, then bump up the next 180 * Wait until there are no readers, then bump up the next
@@ -206,23 +213,47 @@ void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val)
206 relax(4); 213 relax(4);
207 } 214 }
208} 215}
209EXPORT_SYMBOL(arch_write_lock_slow); 216EXPORT_SYMBOL(arch_write_lock);
210 217
211int __tns_atomic_acquire(atomic_t *lock) 218int arch_write_trylock(arch_rwlock_t *rwlock)
212{ 219{
213 int ret; 220 u32 val = __insn_tns((int *)&rwlock->lock);
214 u32 iterations = 0;
215 221
216 BUG_ON(__insn_mfspr(SPR_INTERRUPT_CRITICAL_SECTION)); 222 /*
217 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); 223 * If a tns is in progress, or there's a waiting or active locker,
224 * or active readers, we can't take the lock, so give up.
225 */
226 if (unlikely(val != 0)) {
227 if (!(val & 1))
228 rwlock->lock = val;
229 return 0;
230 }
218 231
219 while ((ret = __insn_tns((void *)&lock->counter)) == 1) 232 /* Set the "next" field to mark it locked. */
220 delay_backoff(iterations++); 233 rwlock->lock = 1 << _WR_NEXT_SHIFT;
221 return ret; 234 return 1;
222} 235}
236EXPORT_SYMBOL(arch_write_trylock);
223 237
224void __tns_atomic_release(atomic_t *p, int v) 238void arch_write_unlock(arch_rwlock_t *rwlock)
225{ 239{
226 p->counter = v; 240 u32 val, eq, mask;
227 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); 241
242 mb(); /* guarantee anything modified under the lock is visible */
243 val = __insn_tns((int *)&rwlock->lock);
244 if (likely(val == (1 << _WR_NEXT_SHIFT))) {
245 rwlock->lock = 0;
246 return;
247 }
248 while (unlikely(val & 1)) {
249 /* Limited backoff since we are the highest-priority task. */
250 relax(4);
251 val = __insn_tns((int *)&rwlock->lock);
252 }
253 mask = 1 << WR_CURR_SHIFT;
254 val = __insn_addb(val, mask);
255 eq = __insn_seqb(val, val << (WR_CURR_SHIFT - WR_NEXT_SHIFT));
256 val = __insn_mz(eq & mask, val);
257 rwlock->lock = val;
228} 258}
259EXPORT_SYMBOL(arch_write_unlock);