diff options
author | Chris Metcalf <cmetcalf@tilera.com> | 2011-03-01 13:30:15 -0500 |
---|---|---|
committer | Chris Metcalf <cmetcalf@tilera.com> | 2011-03-10 16:10:41 -0500 |
commit | 3c5ead52ed68406c0ee789024c4ae581be8bcee4 (patch) | |
tree | cd634aba3710115640b372b4fc49fee5ead75acf /arch/tile/lib | |
parent | 5c7707554858eca8903706b6df7cba5c0f802244 (diff) |
arch/tile: fix deadlock bugs in rwlock implementation
The first issue fixed in this patch is that pending rwlock write locks
could lock out new readers; this could cause a deadlock if a read lock was
held on cpu 1, a write lock was then attempted on cpu 2 and was pending,
and cpu 1 was interrupted and attempted to re-acquire a read lock.
The write lock code was modified to not lock out new readers.
The second issue fixed is that there was a narrow race window where a tns
instruction had been issued (setting the lock value to "1") and the store
instruction to reset the lock value correctly had not yet been issued.
In this case, if an interrupt occurred and the same cpu then tried to
manipulate the lock, it would find the lock value set to "1" and spin
forever, assuming some other cpu was partway through updating it. The fix
is to enforce an interrupt critical section around the tns/store pair.
In addition, this change now arranges to always validate that after
a readlock we have not wrapped around the count of readers, which
is only eight bits.
Since these changes make the rwlock "fast path" code heavier weight,
I decided to move all the rwlock code all out of line, leaving only the
conventional spinlock code with fastpath inlines. Since the read_lock
and read_trylock implementations ended up very similar, I just expressed
read_lock in terms of read_trylock.
As part of this change I also eliminate support for the now-obsolete
tns_atomic mode.
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Diffstat (limited to 'arch/tile/lib')
-rw-r--r-- | arch/tile/lib/spinlock_32.c | 161 |
1 files changed, 96 insertions, 65 deletions
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c index 5cd1c4004eca..cb0999fb64b4 100644 --- a/arch/tile/lib/spinlock_32.c +++ b/arch/tile/lib/spinlock_32.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/spinlock.h> | 15 | #include <linux/spinlock.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <asm/processor.h> | 17 | #include <asm/processor.h> |
18 | #include <arch/spr_def.h> | ||
18 | 19 | ||
19 | #include "spinlock_common.h" | 20 | #include "spinlock_common.h" |
20 | 21 | ||
@@ -91,75 +92,75 @@ EXPORT_SYMBOL(arch_spin_unlock_wait); | |||
91 | #define RD_COUNT_MASK ((1 << RD_COUNT_WIDTH) - 1) | 92 | #define RD_COUNT_MASK ((1 << RD_COUNT_WIDTH) - 1) |
92 | 93 | ||
93 | 94 | ||
94 | /* Lock the word, spinning until there are no tns-ers. */ | 95 | /* |
95 | static inline u32 get_rwlock(arch_rwlock_t *rwlock) | 96 | * We can get the read lock if everything but the reader bits (which |
96 | { | 97 | * are in the high part of the word) is zero, i.e. no active or |
97 | u32 iterations = 0; | 98 | * waiting writers, no tns. |
98 | for (;;) { | 99 | * |
99 | u32 val = __insn_tns((int *)&rwlock->lock); | 100 | * We guard the tns/store-back with an interrupt critical section to |
100 | if (unlikely(val & 1)) { | 101 | * preserve the semantic that the same read lock can be acquired in an |
101 | delay_backoff(iterations++); | 102 | * interrupt context. |
102 | continue; | 103 | */ |
103 | } | 104 | inline int arch_read_trylock(arch_rwlock_t *rwlock) |
104 | return val; | ||
105 | } | ||
106 | } | ||
107 | |||
108 | int arch_read_trylock_slow(arch_rwlock_t *rwlock) | ||
109 | { | ||
110 | u32 val = get_rwlock(rwlock); | ||
111 | int locked = (val << RD_COUNT_WIDTH) == 0; | ||
112 | rwlock->lock = val + (locked << RD_COUNT_SHIFT); | ||
113 | return locked; | ||
114 | } | ||
115 | EXPORT_SYMBOL(arch_read_trylock_slow); | ||
116 | |||
117 | void arch_read_unlock_slow(arch_rwlock_t *rwlock) | ||
118 | { | ||
119 | u32 val = get_rwlock(rwlock); | ||
120 | rwlock->lock = val - (1 << RD_COUNT_SHIFT); | ||
121 | } | ||
122 | EXPORT_SYMBOL(arch_read_unlock_slow); | ||
123 | |||
124 | void arch_write_unlock_slow(arch_rwlock_t *rwlock, u32 val) | ||
125 | { | 105 | { |
126 | u32 eq, mask = 1 << WR_CURR_SHIFT; | 106 | u32 val; |
127 | while (unlikely(val & 1)) { | 107 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); |
128 | /* Limited backoff since we are the highest-priority task. */ | 108 | val = __insn_tns((int *)&rwlock->lock); |
129 | relax(4); | 109 | if (likely((val << _RD_COUNT_WIDTH) == 0)) { |
130 | val = __insn_tns((int *)&rwlock->lock); | 110 | val += 1 << RD_COUNT_SHIFT; |
111 | rwlock->lock = val; | ||
112 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); | ||
113 | BUG_ON(val == 0); /* we don't expect wraparound */ | ||
114 | return 1; | ||
131 | } | 115 | } |
132 | val = __insn_addb(val, mask); | 116 | if ((val & 1) == 0) |
133 | eq = __insn_seqb(val, val << (WR_CURR_SHIFT - WR_NEXT_SHIFT)); | 117 | rwlock->lock = val; |
134 | val = __insn_mz(eq & mask, val); | 118 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); |
135 | rwlock->lock = val; | 119 | return 0; |
136 | } | 120 | } |
137 | EXPORT_SYMBOL(arch_write_unlock_slow); | 121 | EXPORT_SYMBOL(arch_read_trylock); |
138 | 122 | ||
139 | /* | 123 | /* |
140 | * We spin until everything but the reader bits (which are in the high | 124 | * Spin doing arch_read_trylock() until we acquire the lock. |
141 | * part of the word) are zero, i.e. no active or waiting writers, no tns. | ||
142 | * | ||
143 | * ISSUE: This approach can permanently starve readers. A reader who sees | 125 | * ISSUE: This approach can permanently starve readers. A reader who sees |
144 | * a writer could instead take a ticket lock (just like a writer would), | 126 | * a writer could instead take a ticket lock (just like a writer would), |
145 | * and atomically enter read mode (with 1 reader) when it gets the ticket. | 127 | * and atomically enter read mode (with 1 reader) when it gets the ticket. |
146 | * This way both readers and writers will always make forward progress | 128 | * This way both readers and writers would always make forward progress |
147 | * in a finite time. | 129 | * in a finite time. |
148 | */ | 130 | */ |
149 | void arch_read_lock_slow(arch_rwlock_t *rwlock, u32 val) | 131 | void arch_read_lock(arch_rwlock_t *rwlock) |
150 | { | 132 | { |
151 | u32 iterations = 0; | 133 | u32 iterations = 0; |
152 | do { | 134 | while (unlikely(!arch_read_trylock(rwlock))) |
153 | if (!(val & 1)) | ||
154 | rwlock->lock = val; | ||
155 | delay_backoff(iterations++); | 135 | delay_backoff(iterations++); |
136 | } | ||
137 | EXPORT_SYMBOL(arch_read_lock); | ||
138 | |||
139 | void arch_read_unlock(arch_rwlock_t *rwlock) | ||
140 | { | ||
141 | u32 val, iterations = 0; | ||
142 | |||
143 | mb(); /* guarantee anything modified under the lock is visible */ | ||
144 | for (;;) { | ||
145 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); | ||
156 | val = __insn_tns((int *)&rwlock->lock); | 146 | val = __insn_tns((int *)&rwlock->lock); |
157 | } while ((val << RD_COUNT_WIDTH) != 0); | 147 | if (likely(val & 1) == 0) { |
158 | rwlock->lock = val + (1 << RD_COUNT_SHIFT); | 148 | rwlock->lock = val - (1 << _RD_COUNT_SHIFT); |
149 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); | ||
150 | break; | ||
151 | } | ||
152 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); | ||
153 | delay_backoff(iterations++); | ||
154 | } | ||
159 | } | 155 | } |
160 | EXPORT_SYMBOL(arch_read_lock_slow); | 156 | EXPORT_SYMBOL(arch_read_unlock); |
161 | 157 | ||
162 | void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val) | 158 | /* |
159 | * We don't need an interrupt critical section here (unlike for | ||
160 | * arch_read_lock) since we should never use a bare write lock where | ||
161 | * it could be interrupted by code that could try to re-acquire it. | ||
162 | */ | ||
163 | void arch_write_lock(arch_rwlock_t *rwlock) | ||
163 | { | 164 | { |
164 | /* | 165 | /* |
165 | * The trailing underscore on this variable (and curr_ below) | 166 | * The trailing underscore on this variable (and curr_ below) |
@@ -168,6 +169,12 @@ void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val) | |||
168 | */ | 169 | */ |
169 | u32 my_ticket_; | 170 | u32 my_ticket_; |
170 | u32 iterations = 0; | 171 | u32 iterations = 0; |
172 | u32 val = __insn_tns((int *)&rwlock->lock); | ||
173 | |||
174 | if (likely(val == 0)) { | ||
175 | rwlock->lock = 1 << _WR_NEXT_SHIFT; | ||
176 | return; | ||
177 | } | ||
171 | 178 | ||
172 | /* | 179 | /* |
173 | * Wait until there are no readers, then bump up the next | 180 | * Wait until there are no readers, then bump up the next |
@@ -206,23 +213,47 @@ void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val) | |||
206 | relax(4); | 213 | relax(4); |
207 | } | 214 | } |
208 | } | 215 | } |
209 | EXPORT_SYMBOL(arch_write_lock_slow); | 216 | EXPORT_SYMBOL(arch_write_lock); |
210 | 217 | ||
211 | int __tns_atomic_acquire(atomic_t *lock) | 218 | int arch_write_trylock(arch_rwlock_t *rwlock) |
212 | { | 219 | { |
213 | int ret; | 220 | u32 val = __insn_tns((int *)&rwlock->lock); |
214 | u32 iterations = 0; | ||
215 | 221 | ||
216 | BUG_ON(__insn_mfspr(SPR_INTERRUPT_CRITICAL_SECTION)); | 222 | /* |
217 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); | 223 | * If a tns is in progress, or there's a waiting or active locker, |
224 | * or active readers, we can't take the lock, so give up. | ||
225 | */ | ||
226 | if (unlikely(val != 0)) { | ||
227 | if (!(val & 1)) | ||
228 | rwlock->lock = val; | ||
229 | return 0; | ||
230 | } | ||
218 | 231 | ||
219 | while ((ret = __insn_tns((void *)&lock->counter)) == 1) | 232 | /* Set the "next" field to mark it locked. */ |
220 | delay_backoff(iterations++); | 233 | rwlock->lock = 1 << _WR_NEXT_SHIFT; |
221 | return ret; | 234 | return 1; |
222 | } | 235 | } |
236 | EXPORT_SYMBOL(arch_write_trylock); | ||
223 | 237 | ||
224 | void __tns_atomic_release(atomic_t *p, int v) | 238 | void arch_write_unlock(arch_rwlock_t *rwlock) |
225 | { | 239 | { |
226 | p->counter = v; | 240 | u32 val, eq, mask; |
227 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); | 241 | |
242 | mb(); /* guarantee anything modified under the lock is visible */ | ||
243 | val = __insn_tns((int *)&rwlock->lock); | ||
244 | if (likely(val == (1 << _WR_NEXT_SHIFT))) { | ||
245 | rwlock->lock = 0; | ||
246 | return; | ||
247 | } | ||
248 | while (unlikely(val & 1)) { | ||
249 | /* Limited backoff since we are the highest-priority task. */ | ||
250 | relax(4); | ||
251 | val = __insn_tns((int *)&rwlock->lock); | ||
252 | } | ||
253 | mask = 1 << WR_CURR_SHIFT; | ||
254 | val = __insn_addb(val, mask); | ||
255 | eq = __insn_seqb(val, val << (WR_CURR_SHIFT - WR_NEXT_SHIFT)); | ||
256 | val = __insn_mz(eq & mask, val); | ||
257 | rwlock->lock = val; | ||
228 | } | 258 | } |
259 | EXPORT_SYMBOL(arch_write_unlock); | ||