diff options
Diffstat (limited to 'fs/signalfd.c')
-rw-r--r-- | fs/signalfd.c | 349 |
1 files changed, 349 insertions, 0 deletions
diff --git a/fs/signalfd.c b/fs/signalfd.c new file mode 100644 index 000000000000..7cfeab412b45 --- /dev/null +++ b/fs/signalfd.c | |||
@@ -0,0 +1,349 @@ | |||
1 | /* | ||
2 | * fs/signalfd.c | ||
3 | * | ||
4 | * Copyright (C) 2003 Linus Torvalds | ||
5 | * | ||
6 | * Mon Mar 5, 2007: Davide Libenzi <davidel@xmailserver.org> | ||
7 | * Changed ->read() to return a siginfo strcture instead of signal number. | ||
8 | * Fixed locking in ->poll(). | ||
9 | * Added sighand-detach notification. | ||
10 | * Added fd re-use in sys_signalfd() syscall. | ||
11 | * Now using anonymous inode source. | ||
12 | * Thanks to Oleg Nesterov for useful code review and suggestions. | ||
13 | * More comments and suggestions from Arnd Bergmann. | ||
14 | */ | ||
15 | |||
16 | #include <linux/file.h> | ||
17 | #include <linux/poll.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/fs.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/kernel.h> | ||
22 | #include <linux/signal.h> | ||
23 | #include <linux/list.h> | ||
24 | #include <linux/anon_inodes.h> | ||
25 | #include <linux/signalfd.h> | ||
26 | |||
27 | struct signalfd_ctx { | ||
28 | struct list_head lnk; | ||
29 | wait_queue_head_t wqh; | ||
30 | sigset_t sigmask; | ||
31 | struct task_struct *tsk; | ||
32 | }; | ||
33 | |||
34 | struct signalfd_lockctx { | ||
35 | struct task_struct *tsk; | ||
36 | unsigned long flags; | ||
37 | }; | ||
38 | |||
39 | /* | ||
40 | * Tries to acquire the sighand lock. We do not increment the sighand | ||
41 | * use count, and we do not even pin the task struct, so we need to | ||
42 | * do it inside an RCU read lock, and we must be prepared for the | ||
43 | * ctx->tsk going to NULL (in signalfd_deliver()), and for the sighand | ||
44 | * being detached. We return 0 if the sighand has been detached, or | ||
45 | * 1 if we were able to pin the sighand lock. | ||
46 | */ | ||
47 | static int signalfd_lock(struct signalfd_ctx *ctx, struct signalfd_lockctx *lk) | ||
48 | { | ||
49 | struct sighand_struct *sighand = NULL; | ||
50 | |||
51 | rcu_read_lock(); | ||
52 | lk->tsk = rcu_dereference(ctx->tsk); | ||
53 | if (likely(lk->tsk != NULL)) | ||
54 | sighand = lock_task_sighand(lk->tsk, &lk->flags); | ||
55 | rcu_read_unlock(); | ||
56 | |||
57 | if (sighand && !ctx->tsk) { | ||
58 | unlock_task_sighand(lk->tsk, &lk->flags); | ||
59 | sighand = NULL; | ||
60 | } | ||
61 | |||
62 | return sighand != NULL; | ||
63 | } | ||
64 | |||
65 | static void signalfd_unlock(struct signalfd_lockctx *lk) | ||
66 | { | ||
67 | unlock_task_sighand(lk->tsk, &lk->flags); | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * This must be called with the sighand lock held. | ||
72 | */ | ||
73 | void signalfd_deliver(struct task_struct *tsk, int sig) | ||
74 | { | ||
75 | struct sighand_struct *sighand = tsk->sighand; | ||
76 | struct signalfd_ctx *ctx, *tmp; | ||
77 | |||
78 | BUG_ON(!sig); | ||
79 | list_for_each_entry_safe(ctx, tmp, &sighand->signalfd_list, lnk) { | ||
80 | /* | ||
81 | * We use a negative signal value as a way to broadcast that the | ||
82 | * sighand has been orphaned, so that we can notify all the | ||
83 | * listeners about this. Remember the ctx->sigmask is inverted, | ||
84 | * so if the user is interested in a signal, that corresponding | ||
85 | * bit will be zero. | ||
86 | */ | ||
87 | if (sig < 0) { | ||
88 | if (ctx->tsk == tsk) { | ||
89 | ctx->tsk = NULL; | ||
90 | list_del_init(&ctx->lnk); | ||
91 | wake_up(&ctx->wqh); | ||
92 | } | ||
93 | } else { | ||
94 | if (!sigismember(&ctx->sigmask, sig)) | ||
95 | wake_up(&ctx->wqh); | ||
96 | } | ||
97 | } | ||
98 | } | ||
99 | |||
100 | static void signalfd_cleanup(struct signalfd_ctx *ctx) | ||
101 | { | ||
102 | struct signalfd_lockctx lk; | ||
103 | |||
104 | /* | ||
105 | * This is tricky. If the sighand is gone, we do not need to remove | ||
106 | * context from the list, the list itself won't be there anymore. | ||
107 | */ | ||
108 | if (signalfd_lock(ctx, &lk)) { | ||
109 | list_del(&ctx->lnk); | ||
110 | signalfd_unlock(&lk); | ||
111 | } | ||
112 | kfree(ctx); | ||
113 | } | ||
114 | |||
115 | static int signalfd_release(struct inode *inode, struct file *file) | ||
116 | { | ||
117 | signalfd_cleanup(file->private_data); | ||
118 | return 0; | ||
119 | } | ||
120 | |||
121 | static unsigned int signalfd_poll(struct file *file, poll_table *wait) | ||
122 | { | ||
123 | struct signalfd_ctx *ctx = file->private_data; | ||
124 | unsigned int events = 0; | ||
125 | struct signalfd_lockctx lk; | ||
126 | |||
127 | poll_wait(file, &ctx->wqh, wait); | ||
128 | |||
129 | /* | ||
130 | * Let the caller get a POLLIN in this case, ala socket recv() when | ||
131 | * the peer disconnects. | ||
132 | */ | ||
133 | if (signalfd_lock(ctx, &lk)) { | ||
134 | if (next_signal(&lk.tsk->pending, &ctx->sigmask) > 0 || | ||
135 | next_signal(&lk.tsk->signal->shared_pending, | ||
136 | &ctx->sigmask) > 0) | ||
137 | events |= POLLIN; | ||
138 | signalfd_unlock(&lk); | ||
139 | } else | ||
140 | events |= POLLIN; | ||
141 | |||
142 | return events; | ||
143 | } | ||
144 | |||
145 | /* | ||
146 | * Copied from copy_siginfo_to_user() in kernel/signal.c | ||
147 | */ | ||
148 | static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, | ||
149 | siginfo_t const *kinfo) | ||
150 | { | ||
151 | long err; | ||
152 | |||
153 | BUILD_BUG_ON(sizeof(struct signalfd_siginfo) != 128); | ||
154 | |||
155 | /* | ||
156 | * Unused memebers should be zero ... | ||
157 | */ | ||
158 | err = __clear_user(uinfo, sizeof(*uinfo)); | ||
159 | |||
160 | /* | ||
161 | * If you change siginfo_t structure, please be sure | ||
162 | * this code is fixed accordingly. | ||
163 | */ | ||
164 | err |= __put_user(kinfo->si_signo, &uinfo->signo); | ||
165 | err |= __put_user(kinfo->si_errno, &uinfo->err); | ||
166 | err |= __put_user((short)kinfo->si_code, &uinfo->code); | ||
167 | switch (kinfo->si_code & __SI_MASK) { | ||
168 | case __SI_KILL: | ||
169 | err |= __put_user(kinfo->si_pid, &uinfo->pid); | ||
170 | err |= __put_user(kinfo->si_uid, &uinfo->uid); | ||
171 | break; | ||
172 | case __SI_TIMER: | ||
173 | err |= __put_user(kinfo->si_tid, &uinfo->tid); | ||
174 | err |= __put_user(kinfo->si_overrun, &uinfo->overrun); | ||
175 | err |= __put_user((long)kinfo->si_ptr, &uinfo->svptr); | ||
176 | break; | ||
177 | case __SI_POLL: | ||
178 | err |= __put_user(kinfo->si_band, &uinfo->band); | ||
179 | err |= __put_user(kinfo->si_fd, &uinfo->fd); | ||
180 | break; | ||
181 | case __SI_FAULT: | ||
182 | err |= __put_user((long)kinfo->si_addr, &uinfo->addr); | ||
183 | #ifdef __ARCH_SI_TRAPNO | ||
184 | err |= __put_user(kinfo->si_trapno, &uinfo->trapno); | ||
185 | #endif | ||
186 | break; | ||
187 | case __SI_CHLD: | ||
188 | err |= __put_user(kinfo->si_pid, &uinfo->pid); | ||
189 | err |= __put_user(kinfo->si_uid, &uinfo->uid); | ||
190 | err |= __put_user(kinfo->si_status, &uinfo->status); | ||
191 | err |= __put_user(kinfo->si_utime, &uinfo->utime); | ||
192 | err |= __put_user(kinfo->si_stime, &uinfo->stime); | ||
193 | break; | ||
194 | case __SI_RT: /* This is not generated by the kernel as of now. */ | ||
195 | case __SI_MESGQ: /* But this is */ | ||
196 | err |= __put_user(kinfo->si_pid, &uinfo->pid); | ||
197 | err |= __put_user(kinfo->si_uid, &uinfo->uid); | ||
198 | err |= __put_user((long)kinfo->si_ptr, &uinfo->svptr); | ||
199 | break; | ||
200 | default: /* this is just in case for now ... */ | ||
201 | err |= __put_user(kinfo->si_pid, &uinfo->pid); | ||
202 | err |= __put_user(kinfo->si_uid, &uinfo->uid); | ||
203 | break; | ||
204 | } | ||
205 | |||
206 | return err ? -EFAULT: sizeof(*uinfo); | ||
207 | } | ||
208 | |||
209 | /* | ||
210 | * Returns either the size of a "struct signalfd_siginfo", or zero if the | ||
211 | * sighand we are attached to, has been orphaned. The "count" parameter | ||
212 | * must be at least the size of a "struct signalfd_siginfo". | ||
213 | */ | ||
214 | static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, | ||
215 | loff_t *ppos) | ||
216 | { | ||
217 | struct signalfd_ctx *ctx = file->private_data; | ||
218 | ssize_t res = 0; | ||
219 | int locked, signo; | ||
220 | siginfo_t info; | ||
221 | struct signalfd_lockctx lk; | ||
222 | DECLARE_WAITQUEUE(wait, current); | ||
223 | |||
224 | if (count < sizeof(struct signalfd_siginfo)) | ||
225 | return -EINVAL; | ||
226 | locked = signalfd_lock(ctx, &lk); | ||
227 | if (!locked) | ||
228 | return 0; | ||
229 | res = -EAGAIN; | ||
230 | signo = dequeue_signal(lk.tsk, &ctx->sigmask, &info); | ||
231 | if (signo == 0 && !(file->f_flags & O_NONBLOCK)) { | ||
232 | add_wait_queue(&ctx->wqh, &wait); | ||
233 | for (;;) { | ||
234 | set_current_state(TASK_INTERRUPTIBLE); | ||
235 | signo = dequeue_signal(lk.tsk, &ctx->sigmask, &info); | ||
236 | if (signo != 0) | ||
237 | break; | ||
238 | if (signal_pending(current)) { | ||
239 | res = -ERESTARTSYS; | ||
240 | break; | ||
241 | } | ||
242 | signalfd_unlock(&lk); | ||
243 | schedule(); | ||
244 | locked = signalfd_lock(ctx, &lk); | ||
245 | if (unlikely(!locked)) { | ||
246 | /* | ||
247 | * Let the caller read zero byte, ala socket | ||
248 | * recv() when the peer disconnect. This test | ||
249 | * must be done before doing a dequeue_signal(), | ||
250 | * because if the sighand has been orphaned, | ||
251 | * the dequeue_signal() call is going to crash. | ||
252 | */ | ||
253 | res = 0; | ||
254 | break; | ||
255 | } | ||
256 | } | ||
257 | remove_wait_queue(&ctx->wqh, &wait); | ||
258 | __set_current_state(TASK_RUNNING); | ||
259 | } | ||
260 | if (likely(locked)) | ||
261 | signalfd_unlock(&lk); | ||
262 | if (likely(signo)) | ||
263 | res = signalfd_copyinfo((struct signalfd_siginfo __user *) buf, | ||
264 | &info); | ||
265 | |||
266 | return res; | ||
267 | } | ||
268 | |||
269 | static const struct file_operations signalfd_fops = { | ||
270 | .release = signalfd_release, | ||
271 | .poll = signalfd_poll, | ||
272 | .read = signalfd_read, | ||
273 | }; | ||
274 | |||
275 | /* | ||
276 | * Create a file descriptor that is associated with our signal | ||
277 | * state. We can pass it around to others if we want to, but | ||
278 | * it will always be _our_ signal state. | ||
279 | */ | ||
280 | asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask) | ||
281 | { | ||
282 | int error; | ||
283 | sigset_t sigmask; | ||
284 | struct signalfd_ctx *ctx; | ||
285 | struct sighand_struct *sighand; | ||
286 | struct file *file; | ||
287 | struct inode *inode; | ||
288 | struct signalfd_lockctx lk; | ||
289 | |||
290 | if (sizemask != sizeof(sigset_t) || | ||
291 | copy_from_user(&sigmask, user_mask, sizeof(sigmask))) | ||
292 | return error = -EINVAL; | ||
293 | sigdelsetmask(&sigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); | ||
294 | signotset(&sigmask); | ||
295 | |||
296 | if (ufd == -1) { | ||
297 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); | ||
298 | if (!ctx) | ||
299 | return -ENOMEM; | ||
300 | |||
301 | init_waitqueue_head(&ctx->wqh); | ||
302 | ctx->sigmask = sigmask; | ||
303 | ctx->tsk = current; | ||
304 | |||
305 | sighand = current->sighand; | ||
306 | /* | ||
307 | * Add this fd to the list of signal listeners. | ||
308 | */ | ||
309 | spin_lock_irq(&sighand->siglock); | ||
310 | list_add_tail(&ctx->lnk, &sighand->signalfd_list); | ||
311 | spin_unlock_irq(&sighand->siglock); | ||
312 | |||
313 | /* | ||
314 | * When we call this, the initialization must be complete, since | ||
315 | * anon_inode_getfd() will install the fd. | ||
316 | */ | ||
317 | error = anon_inode_getfd(&ufd, &inode, &file, "[signalfd]", | ||
318 | &signalfd_fops, ctx); | ||
319 | if (error) | ||
320 | goto err_fdalloc; | ||
321 | } else { | ||
322 | file = fget(ufd); | ||
323 | if (!file) | ||
324 | return -EBADF; | ||
325 | ctx = file->private_data; | ||
326 | if (file->f_op != &signalfd_fops) { | ||
327 | fput(file); | ||
328 | return -EINVAL; | ||
329 | } | ||
330 | /* | ||
331 | * We need to be prepared of the fact that the sighand this fd | ||
332 | * is attached to, has been detched. In that case signalfd_lock() | ||
333 | * will return 0, and we'll just skip setting the new mask. | ||
334 | */ | ||
335 | if (signalfd_lock(ctx, &lk)) { | ||
336 | ctx->sigmask = sigmask; | ||
337 | signalfd_unlock(&lk); | ||
338 | } | ||
339 | wake_up(&ctx->wqh); | ||
340 | fput(file); | ||
341 | } | ||
342 | |||
343 | return ufd; | ||
344 | |||
345 | err_fdalloc: | ||
346 | signalfd_cleanup(ctx); | ||
347 | return error; | ||
348 | } | ||
349 | |||