diff options
Diffstat (limited to 'lib/rwsem.c')
-rw-r--r-- | lib/rwsem.c | 268 |
1 files changed, 268 insertions, 0 deletions
diff --git a/lib/rwsem.c b/lib/rwsem.c new file mode 100644 index 000000000000..7644089ec8fa --- /dev/null +++ b/lib/rwsem.c | |||
@@ -0,0 +1,268 @@ | |||
1 | /* rwsem.c: R/W semaphores: contention handling functions | ||
2 | * | ||
3 | * Written by David Howells (dhowells@redhat.com). | ||
4 | * Derived from arch/i386/kernel/semaphore.c | ||
5 | */ | ||
6 | #include <linux/rwsem.h> | ||
7 | #include <linux/sched.h> | ||
8 | #include <linux/init.h> | ||
9 | #include <linux/module.h> | ||
10 | |||
11 | struct rwsem_waiter { | ||
12 | struct list_head list; | ||
13 | struct task_struct *task; | ||
14 | unsigned int flags; | ||
15 | #define RWSEM_WAITING_FOR_READ 0x00000001 | ||
16 | #define RWSEM_WAITING_FOR_WRITE 0x00000002 | ||
17 | }; | ||
18 | |||
19 | #if RWSEM_DEBUG | ||
20 | #undef rwsemtrace | ||
21 | void rwsemtrace(struct rw_semaphore *sem, const char *str) | ||
22 | { | ||
23 | printk("sem=%p\n", sem); | ||
24 | printk("(sem)=%08lx\n", sem->count); | ||
25 | if (sem->debug) | ||
26 | printk("[%d] %s({%08lx})\n", current->pid, str, sem->count); | ||
27 | } | ||
28 | #endif | ||
29 | |||
30 | /* | ||
31 | * handle the lock release when processes blocked on it that can now run | ||
32 | * - if we come here from up_xxxx(), then: | ||
33 | * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) | ||
34 | * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) | ||
35 | * - there must be someone on the queue | ||
36 | * - the spinlock must be held by the caller | ||
37 | * - woken process blocks are discarded from the list after having task zeroed | ||
38 | * - writers are only woken if downgrading is false | ||
39 | */ | ||
40 | static inline struct rw_semaphore * | ||
41 | __rwsem_do_wake(struct rw_semaphore *sem, int downgrading) | ||
42 | { | ||
43 | struct rwsem_waiter *waiter; | ||
44 | struct task_struct *tsk; | ||
45 | struct list_head *next; | ||
46 | signed long oldcount, woken, loop; | ||
47 | |||
48 | rwsemtrace(sem, "Entering __rwsem_do_wake"); | ||
49 | |||
50 | if (downgrading) | ||
51 | goto dont_wake_writers; | ||
52 | |||
53 | /* if we came through an up_xxxx() call, we only only wake someone up | ||
54 | * if we can transition the active part of the count from 0 -> 1 | ||
55 | */ | ||
56 | try_again: | ||
57 | oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS, sem) | ||
58 | - RWSEM_ACTIVE_BIAS; | ||
59 | if (oldcount & RWSEM_ACTIVE_MASK) | ||
60 | goto undo; | ||
61 | |||
62 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); | ||
63 | |||
64 | /* try to grant a single write lock if there's a writer at the front | ||
65 | * of the queue - note we leave the 'active part' of the count | ||
66 | * incremented by 1 and the waiting part incremented by 0x00010000 | ||
67 | */ | ||
68 | if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) | ||
69 | goto readers_only; | ||
70 | |||
71 | /* We must be careful not to touch 'waiter' after we set ->task = NULL. | ||
72 | * It is an allocated on the waiter's stack and may become invalid at | ||
73 | * any time after that point (due to a wakeup from another source). | ||
74 | */ | ||
75 | list_del(&waiter->list); | ||
76 | tsk = waiter->task; | ||
77 | mb(); | ||
78 | waiter->task = NULL; | ||
79 | wake_up_process(tsk); | ||
80 | put_task_struct(tsk); | ||
81 | goto out; | ||
82 | |||
83 | /* don't want to wake any writers */ | ||
84 | dont_wake_writers: | ||
85 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); | ||
86 | if (waiter->flags & RWSEM_WAITING_FOR_WRITE) | ||
87 | goto out; | ||
88 | |||
89 | /* grant an infinite number of read locks to the readers at the front | ||
90 | * of the queue | ||
91 | * - note we increment the 'active part' of the count by the number of | ||
92 | * readers before waking any processes up | ||
93 | */ | ||
94 | readers_only: | ||
95 | woken = 0; | ||
96 | do { | ||
97 | woken++; | ||
98 | |||
99 | if (waiter->list.next == &sem->wait_list) | ||
100 | break; | ||
101 | |||
102 | waiter = list_entry(waiter->list.next, | ||
103 | struct rwsem_waiter, list); | ||
104 | |||
105 | } while (waiter->flags & RWSEM_WAITING_FOR_READ); | ||
106 | |||
107 | loop = woken; | ||
108 | woken *= RWSEM_ACTIVE_BIAS - RWSEM_WAITING_BIAS; | ||
109 | if (!downgrading) | ||
110 | /* we'd already done one increment earlier */ | ||
111 | woken -= RWSEM_ACTIVE_BIAS; | ||
112 | |||
113 | rwsem_atomic_add(woken, sem); | ||
114 | |||
115 | next = sem->wait_list.next; | ||
116 | for (; loop > 0; loop--) { | ||
117 | waiter = list_entry(next, struct rwsem_waiter, list); | ||
118 | next = waiter->list.next; | ||
119 | tsk = waiter->task; | ||
120 | mb(); | ||
121 | waiter->task = NULL; | ||
122 | wake_up_process(tsk); | ||
123 | put_task_struct(tsk); | ||
124 | } | ||
125 | |||
126 | sem->wait_list.next = next; | ||
127 | next->prev = &sem->wait_list; | ||
128 | |||
129 | out: | ||
130 | rwsemtrace(sem, "Leaving __rwsem_do_wake"); | ||
131 | return sem; | ||
132 | |||
133 | /* undo the change to count, but check for a transition 1->0 */ | ||
134 | undo: | ||
135 | if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) != 0) | ||
136 | goto out; | ||
137 | goto try_again; | ||
138 | } | ||
139 | |||
140 | /* | ||
141 | * wait for a lock to be granted | ||
142 | */ | ||
143 | static inline struct rw_semaphore * | ||
144 | rwsem_down_failed_common(struct rw_semaphore *sem, | ||
145 | struct rwsem_waiter *waiter, signed long adjustment) | ||
146 | { | ||
147 | struct task_struct *tsk = current; | ||
148 | signed long count; | ||
149 | |||
150 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | ||
151 | |||
152 | /* set up my own style of waitqueue */ | ||
153 | spin_lock_irq(&sem->wait_lock); | ||
154 | waiter->task = tsk; | ||
155 | get_task_struct(tsk); | ||
156 | |||
157 | list_add_tail(&waiter->list, &sem->wait_list); | ||
158 | |||
159 | /* we're now waiting on the lock, but no longer actively read-locking */ | ||
160 | count = rwsem_atomic_update(adjustment, sem); | ||
161 | |||
162 | /* if there are no active locks, wake the front queued process(es) up */ | ||
163 | if (!(count & RWSEM_ACTIVE_MASK)) | ||
164 | sem = __rwsem_do_wake(sem, 0); | ||
165 | |||
166 | spin_unlock_irq(&sem->wait_lock); | ||
167 | |||
168 | /* wait to be given the lock */ | ||
169 | for (;;) { | ||
170 | if (!waiter->task) | ||
171 | break; | ||
172 | schedule(); | ||
173 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | ||
174 | } | ||
175 | |||
176 | tsk->state = TASK_RUNNING; | ||
177 | |||
178 | return sem; | ||
179 | } | ||
180 | |||
181 | /* | ||
182 | * wait for the read lock to be granted | ||
183 | */ | ||
184 | struct rw_semaphore fastcall __sched * | ||
185 | rwsem_down_read_failed(struct rw_semaphore *sem) | ||
186 | { | ||
187 | struct rwsem_waiter waiter; | ||
188 | |||
189 | rwsemtrace(sem, "Entering rwsem_down_read_failed"); | ||
190 | |||
191 | waiter.flags = RWSEM_WAITING_FOR_READ; | ||
192 | rwsem_down_failed_common(sem, &waiter, | ||
193 | RWSEM_WAITING_BIAS - RWSEM_ACTIVE_BIAS); | ||
194 | |||
195 | rwsemtrace(sem, "Leaving rwsem_down_read_failed"); | ||
196 | return sem; | ||
197 | } | ||
198 | |||
199 | /* | ||
200 | * wait for the write lock to be granted | ||
201 | */ | ||
202 | struct rw_semaphore fastcall __sched * | ||
203 | rwsem_down_write_failed(struct rw_semaphore *sem) | ||
204 | { | ||
205 | struct rwsem_waiter waiter; | ||
206 | |||
207 | rwsemtrace(sem, "Entering rwsem_down_write_failed"); | ||
208 | |||
209 | waiter.flags = RWSEM_WAITING_FOR_WRITE; | ||
210 | rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_BIAS); | ||
211 | |||
212 | rwsemtrace(sem, "Leaving rwsem_down_write_failed"); | ||
213 | return sem; | ||
214 | } | ||
215 | |||
216 | /* | ||
217 | * handle waking up a waiter on the semaphore | ||
218 | * - up_read/up_write has decremented the active part of count if we come here | ||
219 | */ | ||
220 | struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem) | ||
221 | { | ||
222 | unsigned long flags; | ||
223 | |||
224 | rwsemtrace(sem, "Entering rwsem_wake"); | ||
225 | |||
226 | spin_lock_irqsave(&sem->wait_lock, flags); | ||
227 | |||
228 | /* do nothing if list empty */ | ||
229 | if (!list_empty(&sem->wait_list)) | ||
230 | sem = __rwsem_do_wake(sem, 0); | ||
231 | |||
232 | spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
233 | |||
234 | rwsemtrace(sem, "Leaving rwsem_wake"); | ||
235 | |||
236 | return sem; | ||
237 | } | ||
238 | |||
239 | /* | ||
240 | * downgrade a write lock into a read lock | ||
241 | * - caller incremented waiting part of count and discovered it still negative | ||
242 | * - just wake up any readers at the front of the queue | ||
243 | */ | ||
244 | struct rw_semaphore fastcall *rwsem_downgrade_wake(struct rw_semaphore *sem) | ||
245 | { | ||
246 | unsigned long flags; | ||
247 | |||
248 | rwsemtrace(sem, "Entering rwsem_downgrade_wake"); | ||
249 | |||
250 | spin_lock_irqsave(&sem->wait_lock, flags); | ||
251 | |||
252 | /* do nothing if list empty */ | ||
253 | if (!list_empty(&sem->wait_list)) | ||
254 | sem = __rwsem_do_wake(sem, 1); | ||
255 | |||
256 | spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
257 | |||
258 | rwsemtrace(sem, "Leaving rwsem_downgrade_wake"); | ||
259 | return sem; | ||
260 | } | ||
261 | |||
262 | EXPORT_SYMBOL(rwsem_down_read_failed); | ||
263 | EXPORT_SYMBOL(rwsem_down_write_failed); | ||
264 | EXPORT_SYMBOL(rwsem_wake); | ||
265 | EXPORT_SYMBOL(rwsem_downgrade_wake); | ||
266 | #if RWSEM_DEBUG | ||
267 | EXPORT_SYMBOL(rwsemtrace); | ||
268 | #endif | ||