aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2013-05-10 14:50:26 -0400
committerDavid Howells <dhowells@redhat.com>2013-05-15 08:50:38 -0400
commitcb65537ee1134d3cc55c1fa83952bc8eb1212833 (patch)
tree7dff1f14ef4f0121c67f85e37093eff759c17ed3
parentb973425cbb51e08301b34fecdfd476a44507d8cf (diff)
Add wait_on_atomic_t() and wake_up_atomic_t()
Add wait_on_atomic_t() and wake_up_atomic_t() to indicate became-zero events on atomic_t types. This uses the bit-wake waitqueue table. The key is set to a value outside of the number of bits in a long so that wait_on_bit() won't be woken up accidentally. What I'm using this for is: in a following patch I add a counter to struct fscache_cookie to count the number of outstanding operations that need access to netfs data. The way this works is: (1) When a cookie is allocated, the counter is initialised to 1. (2) When an operation wants to access netfs data, it calls atomic_inc_unless() to increment the counter before it does so. If it was 0, then the counter isn't incremented, the operation isn't permitted to access the netfs data (which might by this point no longer exist) and the operation aborts in some appropriate manner. (3) When an operation finishes with the netfs data, it decrements the counter and if it reaches 0, calls wake_up_atomic_t() on it - the assumption being that it was the last blocker. (4) When a cookie is released, the counter is decremented and the releaser uses wait_on_atomic_t() to wait for the counter to become 0 - which should indicate no one is using the netfs data any longer. The netfs data can then be destroyed. There are some alternatives that I have thought of and that have been suggested by Tejun Heo: (A) Using wait_on_bit() to wait on a bit in the counter. This doesn't work because if that bit happens to be 0 then the wait won't happen - even if the counter is non-zero. (B) Using wait_on_bit() to wait on a flag elsewhere which is cleared when the counter reaches 0. Such a flag would be redundant and would add complexity. (C) Adding a waitqueue to fscache_cookie - this would expand that struct by several words for an event that happens just once in each cookie's lifetime. Further, cookies are generally per-file so there are likely to be a lot of them. (D) Similar to (C), but add a pointer to a waitqueue in the cookie instead of a waitqueue. This would add single word per cookie and so would be less of an expansion - but still an expansion. (E) Adding a static waitqueue to the fscache module. Generally this would be fine, but under certain circumstances many cookies will all get added at the same time (eg. NFS umount, cache withdrawal) thereby presenting scaling issues. Note that the wait may be significant as disk I/O may be in progress. So, I think reusing the wait_on_bit() waitqueue set is reasonable. I don't make much use of the waitqueue I need on a per-cookie basis, but sometimes I have a huge flood of the cookies to deal with. I also don't want to add a whole new set of global waitqueue tables specifically for the dec-to-0 event if I can reuse the bit tables. Signed-off-by: David Howells <dhowells@redhat.com> Tested-By: Milosz Tanski <milosz@adfin.com> Acked-by: Jeff Layton <jlayton@redhat.com>
-rw-r--r--include/linux/wait.h24
-rw-r--r--kernel/wait.c88
2 files changed, 112 insertions, 0 deletions
diff --git a/include/linux/wait.h b/include/linux/wait.h
index ac38be2692d8..5bacfc4b336d 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -23,6 +23,7 @@ struct __wait_queue {
23struct wait_bit_key { 23struct wait_bit_key {
24 void *flags; 24 void *flags;
25 int bit_nr; 25 int bit_nr;
26#define WAIT_ATOMIC_T_BIT_NR -1
26}; 27};
27 28
28struct wait_bit_queue { 29struct wait_bit_queue {
@@ -60,6 +61,9 @@ struct task_struct;
60#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ 61#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \
61 { .flags = word, .bit_nr = bit, } 62 { .flags = word, .bit_nr = bit, }
62 63
64#define __WAIT_ATOMIC_T_KEY_INITIALIZER(p) \
65 { .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, }
66
63extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *); 67extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *);
64 68
65#define init_waitqueue_head(q) \ 69#define init_waitqueue_head(q) \
@@ -146,8 +150,10 @@ void __wake_up_bit(wait_queue_head_t *, void *, int);
146int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned); 150int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned);
147int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned); 151int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned);
148void wake_up_bit(void *, int); 152void wake_up_bit(void *, int);
153void wake_up_atomic_t(atomic_t *);
149int out_of_line_wait_on_bit(void *, int, int (*)(void *), unsigned); 154int out_of_line_wait_on_bit(void *, int, int (*)(void *), unsigned);
150int out_of_line_wait_on_bit_lock(void *, int, int (*)(void *), unsigned); 155int out_of_line_wait_on_bit_lock(void *, int, int (*)(void *), unsigned);
156int out_of_line_wait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned);
151wait_queue_head_t *bit_waitqueue(void *, int); 157wait_queue_head_t *bit_waitqueue(void *, int);
152 158
153#define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) 159#define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL)
@@ -896,5 +902,23 @@ static inline int wait_on_bit_lock(void *word, int bit,
896 return 0; 902 return 0;
897 return out_of_line_wait_on_bit_lock(word, bit, action, mode); 903 return out_of_line_wait_on_bit_lock(word, bit, action, mode);
898} 904}
905
906/**
907 * wait_on_atomic_t - Wait for an atomic_t to become 0
908 * @val: The atomic value being waited on, a kernel virtual address
909 * @action: the function used to sleep, which may take special actions
910 * @mode: the task state to sleep in
911 *
912 * Wait for an atomic_t to become 0. We abuse the bit-wait waitqueue table for
913 * the purpose of getting a waitqueue, but we set the key to a bit number
914 * outside of the target 'word'.
915 */
916static inline
917int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode)
918{
919 if (atomic_read(val) == 0)
920 return 0;
921 return out_of_line_wait_on_atomic_t(val, action, mode);
922}
899 923
900#endif 924#endif
diff --git a/kernel/wait.c b/kernel/wait.c
index 6698e0c04ead..ce0daa320a26 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -287,3 +287,91 @@ wait_queue_head_t *bit_waitqueue(void *word, int bit)
287 return &zone->wait_table[hash_long(val, zone->wait_table_bits)]; 287 return &zone->wait_table[hash_long(val, zone->wait_table_bits)];
288} 288}
289EXPORT_SYMBOL(bit_waitqueue); 289EXPORT_SYMBOL(bit_waitqueue);
290
291/*
292 * Manipulate the atomic_t address to produce a better bit waitqueue table hash
293 * index (we're keying off bit -1, but that would produce a horrible hash
294 * value).
295 */
296static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p)
297{
298 if (BITS_PER_LONG == 64) {
299 unsigned long q = (unsigned long)p;
300 return bit_waitqueue((void *)(q & ~1), q & 1);
301 }
302 return bit_waitqueue(p, 0);
303}
304
305static int wake_atomic_t_function(wait_queue_t *wait, unsigned mode, int sync,
306 void *arg)
307{
308 struct wait_bit_key *key = arg;
309 struct wait_bit_queue *wait_bit
310 = container_of(wait, struct wait_bit_queue, wait);
311 atomic_t *val = key->flags;
312
313 if (wait_bit->key.flags != key->flags ||
314 wait_bit->key.bit_nr != key->bit_nr ||
315 atomic_read(val) != 0)
316 return 0;
317 return autoremove_wake_function(wait, mode, sync, key);
318}
319
320/*
321 * To allow interruptible waiting and asynchronous (i.e. nonblocking) waiting,
322 * the actions of __wait_on_atomic_t() are permitted return codes. Nonzero
323 * return codes halt waiting and return.
324 */
325static __sched
326int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q,
327 int (*action)(atomic_t *), unsigned mode)
328{
329 atomic_t *val;
330 int ret = 0;
331
332 do {
333 prepare_to_wait(wq, &q->wait, mode);
334 val = q->key.flags;
335 if (atomic_read(val) == 0)
336 ret = (*action)(val);
337 } while (!ret && atomic_read(val) != 0);
338 finish_wait(wq, &q->wait);
339 return ret;
340}
341
342#define DEFINE_WAIT_ATOMIC_T(name, p) \
343 struct wait_bit_queue name = { \
344 .key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p), \
345 .wait = { \
346 .private = current, \
347 .func = wake_atomic_t_function, \
348 .task_list = \
349 LIST_HEAD_INIT((name).wait.task_list), \
350 }, \
351 }
352
353__sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *),
354 unsigned mode)
355{
356 wait_queue_head_t *wq = atomic_t_waitqueue(p);
357 DEFINE_WAIT_ATOMIC_T(wait, p);
358
359 return __wait_on_atomic_t(wq, &wait, action, mode);
360}
361EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
362
363/**
364 * wake_up_atomic_t - Wake up a waiter on a atomic_t
365 * @word: The word being waited on, a kernel virtual address
366 * @bit: The bit of the word being waited on
367 *
368 * Wake up anyone waiting for the atomic_t to go to zero.
369 *
370 * Abuse the bit-waker function and its waitqueue hash table set (the atomic_t
371 * check is done by the waiter's wake function, not the by the waker itself).
372 */
373void wake_up_atomic_t(atomic_t *p)
374{
375 __wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR);
376}
377EXPORT_SYMBOL(wake_up_atomic_t);