aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavide Libenzi <davidel@xmailserver.org>2009-03-31 18:24:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-04-01 11:59:20 -0400
commit4ede816ac36e027db5fe0051ad9c73f76db63772 (patch)
tree6c79eae26067c2355a2d4c7e65bca84208a76c40
parentbcd0b235bf3808dec5115c381cd55568f63b85f0 (diff)
epoll keyed wakeups: add __wake_up_locked_key() and __wake_up_sync_key()
This patchset introduces wakeup hints for some of the most popular (from epoll POV) devices, so that epoll code can avoid spurious wakeups on its waiters. The problem with epoll is that the callback-based wakeups do not, ATM, carry any information about the events the wakeup is related to. So the only choice epoll has (not being able to call f_op->poll() from inside the callback), is to add the file* to a ready-list and resolve the real events later on, at epoll_wait() (or its own f_op->poll()) time. This can cause spurious wakeups, since the wake_up() itself might be for an event the caller is not interested into. The rate of these spurious wakeup can be pretty high in case of many network sockets being monitored. By allowing devices to report the events the wakeups refer to (at least the two major classes - POLLIN/POLLOUT), we are able to spare useless wakeups by proper handling inside the epoll's poll callback. Epoll will have in any case to call f_op->poll() on the file* later on, since the change to be done in order to have the full event set sent via wakeup, is too invasive for the way our f_op->poll() system works (the full event set is calculated inside the poll function - there are too many of them to even start thinking the change - also poll/select would need change too). Epoll is changed in a way that both devices which send event hints, and the ones that don't, are correctly handled. The former will gain some efficiency though. As a general rule for devices, would be to add an event mask by using key-aware wakeup macros, when making up poll wait queues. I tested it (together with the epoll's poll fix patch Andrew has in -mm) and wakeups for the supported devices are correctly filtered. Test program available here: http://www.xmailserver.org/epoll_test.c This patch: Nothing revolutionary here. Just using the available "key" that our wakeup core already support. The __wake_up_locked_key() was no brainer, since both __wake_up_locked() and __wake_up_locked_key() are thin wrappers around __wake_up_common(). The __wake_up_sync() function had a body, so the choice was between borrowing the body for __wake_up_sync_key() and calling it from __wake_up_sync(), or make an inline and calling it from both. I chose the former since in most archs it all resolves to "mov $0, REG; jmp ADDR". Signed-off-by: Davide Libenzi <davidel@xmailserver.org> Cc: Alan Cox <alan@lxorguk.ukuu.org.uk> Cc: Ingo Molnar <mingo@elte.hu> Cc: David Miller <davem@davemloft.net> Cc: William Lee Irwin III <wli@movementarian.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/wait.h7
-rw-r--r--kernel/sched.c23
2 files changed, 24 insertions, 6 deletions
diff --git a/include/linux/wait.h b/include/linux/wait.h
index a210ede73b56..0d2eeb03a718 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -135,8 +135,11 @@ static inline void __remove_wait_queue(wait_queue_head_t *head,
135void __wake_up_common(wait_queue_head_t *q, unsigned int mode, 135void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
136 int nr_exclusive, int sync, void *key); 136 int nr_exclusive, int sync, void *key);
137void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); 137void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
138extern void __wake_up_locked(wait_queue_head_t *q, unsigned int mode); 138void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
139extern void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); 139void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr,
140 void *key);
141void __wake_up_locked(wait_queue_head_t *q, unsigned int mode);
142void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
140void __wake_up_bit(wait_queue_head_t *, void *, int); 143void __wake_up_bit(wait_queue_head_t *, void *, int);
141int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned); 144int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned);
142int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned); 145int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned);
diff --git a/kernel/sched.c b/kernel/sched.c
index 196d48babbef..73513f4e19df 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5196,11 +5196,17 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
5196 __wake_up_common(q, mode, 1, 0, NULL); 5196 __wake_up_common(q, mode, 1, 0, NULL);
5197} 5197}
5198 5198
5199void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
5200{
5201 __wake_up_common(q, mode, 1, 0, key);
5202}
5203
5199/** 5204/**
5200 * __wake_up_sync - wake up threads blocked on a waitqueue. 5205 * __wake_up_sync_key - wake up threads blocked on a waitqueue.
5201 * @q: the waitqueue 5206 * @q: the waitqueue
5202 * @mode: which threads 5207 * @mode: which threads
5203 * @nr_exclusive: how many wake-one or wake-many threads to wake up 5208 * @nr_exclusive: how many wake-one or wake-many threads to wake up
5209 * @key: opaque value to be passed to wakeup targets
5204 * 5210 *
5205 * The sync wakeup differs that the waker knows that it will schedule 5211 * The sync wakeup differs that the waker knows that it will schedule
5206 * away soon, so while the target thread will be woken up, it will not 5212 * away soon, so while the target thread will be woken up, it will not
@@ -5209,8 +5215,8 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
5209 * 5215 *
5210 * On UP it can prevent extra preemption. 5216 * On UP it can prevent extra preemption.
5211 */ 5217 */
5212void 5218void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
5213__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) 5219 int nr_exclusive, void *key)
5214{ 5220{
5215 unsigned long flags; 5221 unsigned long flags;
5216 int sync = 1; 5222 int sync = 1;
@@ -5222,9 +5228,18 @@ __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
5222 sync = 0; 5228 sync = 0;
5223 5229
5224 spin_lock_irqsave(&q->lock, flags); 5230 spin_lock_irqsave(&q->lock, flags);
5225 __wake_up_common(q, mode, nr_exclusive, sync, NULL); 5231 __wake_up_common(q, mode, nr_exclusive, sync, key);
5226 spin_unlock_irqrestore(&q->lock, flags); 5232 spin_unlock_irqrestore(&q->lock, flags);
5227} 5233}
5234EXPORT_SYMBOL_GPL(__wake_up_sync_key);
5235
5236/*
5237 * __wake_up_sync - see __wake_up_sync_key()
5238 */
5239void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
5240{
5241 __wake_up_sync_key(q, mode, nr_exclusive, NULL);
5242}
5228EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ 5243EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
5229 5244
5230/** 5245/**