aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOleg Nesterov <oleg@redhat.com>2012-02-24 14:07:11 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-02-24 14:42:50 -0500
commitd80e731ecab420ddcb79ee9d0ac427acbc187b4b (patch)
treee96a660b75b5bee8ae2c315878ec917b9c0da446
parent855a85f704026d5fe7de94fb1b765fe03404507f (diff)
epoll: introduce POLLFREE to flush ->signalfd_wqh before kfree()
This patch is intentionally incomplete to simplify the review. It ignores ep_unregister_pollwait() which plays with the same wqh. See the next change. epoll assumes that the EPOLL_CTL_ADD'ed file controls everything f_op->poll() needs. In particular it assumes that the wait queue can't go away until eventpoll_release(). This is not true in case of signalfd, the task which does EPOLL_CTL_ADD uses its ->sighand which is not connected to the file. This patch adds the special event, POLLFREE, currently only for epoll. It expects that init_poll_funcptr()'ed hook should do the necessary cleanup. Perhaps it should be defined as EPOLLFREE in eventpoll. __cleanup_sighand() is changed to do wake_up_poll(POLLFREE) if ->signalfd_wqh is not empty, we add the new signalfd_cleanup() helper. ep_poll_callback(POLLFREE) simply does list_del_init(task_list). This make this poll entry inconsistent, but we don't care. If you share epoll fd which contains our sigfd with another process you should blame yourself. signalfd is "really special". I simply do not know how we can define the "right" semantics if it used with epoll. The main problem is, epoll calls signalfd_poll() once to establish the connection with the wait queue, after that signalfd_poll(NULL) returns the different/inconsistent results depending on who does EPOLL_CTL_MOD/signalfd_read/etc. IOW: apart from sigmask, signalfd has nothing to do with the file, it works with the current thread. In short: this patch is the hack which tries to fix the symptoms. It also assumes that nobody can take tasklist_lock under epoll locks, this seems to be true. Note: - we do not have wake_up_all_poll() but wake_up_poll() is fine, poll/epoll doesn't use WQ_FLAG_EXCLUSIVE. - signalfd_cleanup() uses POLLHUP along with POLLFREE, we need a couple of simple changes in eventpoll.c to make sure it can't be "lost". Reported-by: Maxime Bizon <mbizon@freebox.fr> Cc: <stable@kernel.org> Signed-off-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/eventpoll.c4
-rw-r--r--fs/signalfd.c11
-rw-r--r--include/asm-generic/poll.h2
-rw-r--r--include/linux/signalfd.h5
-rw-r--r--kernel/fork.c5
5 files changed, 25 insertions, 2 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index aabdfc38cf24..34bbfc6dd8dc 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -842,6 +842,10 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
842 struct epitem *epi = ep_item_from_wait(wait); 842 struct epitem *epi = ep_item_from_wait(wait);
843 struct eventpoll *ep = epi->ep; 843 struct eventpoll *ep = epi->ep;
844 844
845 /* the caller holds eppoll_entry->whead->lock */
846 if ((unsigned long)key & POLLFREE)
847 list_del_init(&wait->task_list);
848
845 spin_lock_irqsave(&ep->lock, flags); 849 spin_lock_irqsave(&ep->lock, flags);
846 850
847 /* 851 /*
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 492465b451dd..79c1eea98a3a 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -30,6 +30,17 @@
30#include <linux/signalfd.h> 30#include <linux/signalfd.h>
31#include <linux/syscalls.h> 31#include <linux/syscalls.h>
32 32
33void signalfd_cleanup(struct sighand_struct *sighand)
34{
35 wait_queue_head_t *wqh = &sighand->signalfd_wqh;
36
37 if (likely(!waitqueue_active(wqh)))
38 return;
39
40 /* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */
41 wake_up_poll(wqh, POLLHUP | POLLFREE);
42}
43
33struct signalfd_ctx { 44struct signalfd_ctx {
34 sigset_t sigmask; 45 sigset_t sigmask;
35}; 46};
diff --git a/include/asm-generic/poll.h b/include/asm-generic/poll.h
index 44bce836d350..9ce7f44aebd2 100644
--- a/include/asm-generic/poll.h
+++ b/include/asm-generic/poll.h
@@ -28,6 +28,8 @@
28#define POLLRDHUP 0x2000 28#define POLLRDHUP 0x2000
29#endif 29#endif
30 30
31#define POLLFREE 0x4000 /* currently only for epoll */
32
31struct pollfd { 33struct pollfd {
32 int fd; 34 int fd;
33 short events; 35 short events;
diff --git a/include/linux/signalfd.h b/include/linux/signalfd.h
index 3ff4961da9b5..247399b2979a 100644
--- a/include/linux/signalfd.h
+++ b/include/linux/signalfd.h
@@ -61,13 +61,16 @@ static inline void signalfd_notify(struct task_struct *tsk, int sig)
61 wake_up(&tsk->sighand->signalfd_wqh); 61 wake_up(&tsk->sighand->signalfd_wqh);
62} 62}
63 63
64extern void signalfd_cleanup(struct sighand_struct *sighand);
65
64#else /* CONFIG_SIGNALFD */ 66#else /* CONFIG_SIGNALFD */
65 67
66static inline void signalfd_notify(struct task_struct *tsk, int sig) { } 68static inline void signalfd_notify(struct task_struct *tsk, int sig) { }
67 69
70static inline void signalfd_cleanup(struct sighand_struct *sighand) { }
71
68#endif /* CONFIG_SIGNALFD */ 72#endif /* CONFIG_SIGNALFD */
69 73
70#endif /* __KERNEL__ */ 74#endif /* __KERNEL__ */
71 75
72#endif /* _LINUX_SIGNALFD_H */ 76#endif /* _LINUX_SIGNALFD_H */
73
diff --git a/kernel/fork.c b/kernel/fork.c
index b77fd559c78e..e2cd3e2a5ae8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -66,6 +66,7 @@
66#include <linux/user-return-notifier.h> 66#include <linux/user-return-notifier.h>
67#include <linux/oom.h> 67#include <linux/oom.h>
68#include <linux/khugepaged.h> 68#include <linux/khugepaged.h>
69#include <linux/signalfd.h>
69 70
70#include <asm/pgtable.h> 71#include <asm/pgtable.h>
71#include <asm/pgalloc.h> 72#include <asm/pgalloc.h>
@@ -935,8 +936,10 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
935 936
936void __cleanup_sighand(struct sighand_struct *sighand) 937void __cleanup_sighand(struct sighand_struct *sighand)
937{ 938{
938 if (atomic_dec_and_test(&sighand->count)) 939 if (atomic_dec_and_test(&sighand->count)) {
940 signalfd_cleanup(sighand);
939 kmem_cache_free(sighand_cachep, sighand); 941 kmem_cache_free(sighand_cachep, sighand);
942 }
940} 943}
941 944
942 945