aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDavide Libenzi <davidel@xmailserver.org>2009-06-30 14:41:11 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-06-30 21:55:58 -0400
commit133890103b9de08904f909995973e4b5c08a780e (patch)
tree0cda85a58dafafa0a197cf1a789124203f1e7a88 /fs
parentf7c2df9b55212d5ec94169a4de11e44c683e0af4 (diff)
eventfd: revised interface and cleanups
Change the eventfd interface to de-couple the eventfd memory context, from the file pointer instance. Without such change, there is no clean way to racely free handle the POLLHUP event sent when the last instance of the file* goes away. Also, now the internal eventfd APIs are using the eventfd context instead of the file*. This patch is required by KVM's IRQfd code, which is still under development. Signed-off-by: Davide Libenzi <davidel@xmailserver.org> Cc: Gregory Haskins <ghaskins@novell.com> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Benjamin LaHaise <bcrl@kvack.org> Cc: Avi Kivity <avi@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c24
-rw-r--r--fs/eventfd.c122
2 files changed, 117 insertions, 29 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 76da12537956..d065b2c3273e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -485,6 +485,8 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
485{ 485{
486 assert_spin_locked(&ctx->ctx_lock); 486 assert_spin_locked(&ctx->ctx_lock);
487 487
488 if (req->ki_eventfd != NULL)
489 eventfd_ctx_put(req->ki_eventfd);
488 if (req->ki_dtor) 490 if (req->ki_dtor)
489 req->ki_dtor(req); 491 req->ki_dtor(req);
490 if (req->ki_iovec != &req->ki_inline_vec) 492 if (req->ki_iovec != &req->ki_inline_vec)
@@ -509,8 +511,6 @@ static void aio_fput_routine(struct work_struct *data)
509 /* Complete the fput(s) */ 511 /* Complete the fput(s) */
510 if (req->ki_filp != NULL) 512 if (req->ki_filp != NULL)
511 __fput(req->ki_filp); 513 __fput(req->ki_filp);
512 if (req->ki_eventfd != NULL)
513 __fput(req->ki_eventfd);
514 514
515 /* Link the iocb into the context's free list */ 515 /* Link the iocb into the context's free list */
516 spin_lock_irq(&ctx->ctx_lock); 516 spin_lock_irq(&ctx->ctx_lock);
@@ -528,8 +528,6 @@ static void aio_fput_routine(struct work_struct *data)
528 */ 528 */
529static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) 529static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
530{ 530{
531 int schedule_putreq = 0;
532
533 dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n", 531 dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
534 req, atomic_long_read(&req->ki_filp->f_count)); 532 req, atomic_long_read(&req->ki_filp->f_count));
535 533
@@ -549,24 +547,16 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
549 * we would not be holding the last reference to the file*, so 547 * we would not be holding the last reference to the file*, so
550 * this function will be executed w/out any aio kthread wakeup. 548 * this function will be executed w/out any aio kthread wakeup.
551 */ 549 */
552 if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) 550 if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
553 schedule_putreq++;
554 else
555 req->ki_filp = NULL;
556 if (req->ki_eventfd != NULL) {
557 if (unlikely(atomic_long_dec_and_test(&req->ki_eventfd->f_count)))
558 schedule_putreq++;
559 else
560 req->ki_eventfd = NULL;
561 }
562 if (unlikely(schedule_putreq)) {
563 get_ioctx(ctx); 551 get_ioctx(ctx);
564 spin_lock(&fput_lock); 552 spin_lock(&fput_lock);
565 list_add(&req->ki_list, &fput_head); 553 list_add(&req->ki_list, &fput_head);
566 spin_unlock(&fput_lock); 554 spin_unlock(&fput_lock);
567 queue_work(aio_wq, &fput_work); 555 queue_work(aio_wq, &fput_work);
568 } else 556 } else {
557 req->ki_filp = NULL;
569 really_put_req(ctx, req); 558 really_put_req(ctx, req);
559 }
570 return 1; 560 return 1;
571} 561}
572 562
@@ -1622,7 +1612,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1622 * an eventfd() fd, and will be signaled for each completed 1612 * an eventfd() fd, and will be signaled for each completed
1623 * event using the eventfd_signal() function. 1613 * event using the eventfd_signal() function.
1624 */ 1614 */
1625 req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd); 1615 req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd);
1626 if (IS_ERR(req->ki_eventfd)) { 1616 if (IS_ERR(req->ki_eventfd)) {
1627 ret = PTR_ERR(req->ki_eventfd); 1617 ret = PTR_ERR(req->ki_eventfd);
1628 req->ki_eventfd = NULL; 1618 req->ki_eventfd = NULL;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 3f0e1974abdc..31d12de83a2a 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -14,35 +14,44 @@
14#include <linux/list.h> 14#include <linux/list.h>
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/anon_inodes.h> 16#include <linux/anon_inodes.h>
17#include <linux/eventfd.h>
18#include <linux/syscalls.h> 17#include <linux/syscalls.h>
19#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/kref.h>
20#include <linux/eventfd.h>
20 21
21struct eventfd_ctx { 22struct eventfd_ctx {
23 struct kref kref;
22 wait_queue_head_t wqh; 24 wait_queue_head_t wqh;
23 /* 25 /*
24 * Every time that a write(2) is performed on an eventfd, the 26 * Every time that a write(2) is performed on an eventfd, the
25 * value of the __u64 being written is added to "count" and a 27 * value of the __u64 being written is added to "count" and a
26 * wakeup is performed on "wqh". A read(2) will return the "count" 28 * wakeup is performed on "wqh". A read(2) will return the "count"
27 * value to userspace, and will reset "count" to zero. The kernel 29 * value to userspace, and will reset "count" to zero. The kernel
28 * size eventfd_signal() also, adds to the "count" counter and 30 * side eventfd_signal() also, adds to the "count" counter and
29 * issue a wakeup. 31 * issue a wakeup.
30 */ 32 */
31 __u64 count; 33 __u64 count;
32 unsigned int flags; 34 unsigned int flags;
33}; 35};
34 36
35/* 37/**
36 * Adds "n" to the eventfd counter "count". Returns "n" in case of 38 * eventfd_signal - Adds @n to the eventfd counter.
37 * success, or a value lower then "n" in case of coutner overflow. 39 * @ctx: [in] Pointer to the eventfd context.
38 * This function is supposed to be called by the kernel in paths 40 * @n: [in] Value of the counter to be added to the eventfd internal counter.
39 * that do not allow sleeping. In this function we allow the counter 41 * The value cannot be negative.
40 * to reach the ULLONG_MAX value, and we signal this as overflow 42 *
41 * condition by returining a POLLERR to poll(2). 43 * This function is supposed to be called by the kernel in paths that do not
44 * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
45 * value, and we signal this as overflow condition by returining a POLLERR
46 * to poll(2).
47 *
48 * Returns @n in case of success, a non-negative number lower than @n in case
49 * of overflow, or the following error codes:
50 *
51 * -EINVAL : The value of @n is negative.
42 */ 52 */
43int eventfd_signal(struct file *file, int n) 53int eventfd_signal(struct eventfd_ctx *ctx, int n)
44{ 54{
45 struct eventfd_ctx *ctx = file->private_data;
46 unsigned long flags; 55 unsigned long flags;
47 56
48 if (n < 0) 57 if (n < 0)
@@ -59,9 +68,45 @@ int eventfd_signal(struct file *file, int n)
59} 68}
60EXPORT_SYMBOL_GPL(eventfd_signal); 69EXPORT_SYMBOL_GPL(eventfd_signal);
61 70
71static void eventfd_free(struct kref *kref)
72{
73 struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref);
74
75 kfree(ctx);
76}
77
78/**
79 * eventfd_ctx_get - Acquires a reference to the internal eventfd context.
80 * @ctx: [in] Pointer to the eventfd context.
81 *
82 * Returns: In case of success, returns a pointer to the eventfd context.
83 */
84struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx)
85{
86 kref_get(&ctx->kref);
87 return ctx;
88}
89EXPORT_SYMBOL_GPL(eventfd_ctx_get);
90
91/**
92 * eventfd_ctx_put - Releases a reference to the internal eventfd context.
93 * @ctx: [in] Pointer to eventfd context.
94 *
95 * The eventfd context reference must have been previously acquired either
96 * with eventfd_ctx_get() or eventfd_ctx_fdget()).
97 */
98void eventfd_ctx_put(struct eventfd_ctx *ctx)
99{
100 kref_put(&ctx->kref, eventfd_free);
101}
102EXPORT_SYMBOL_GPL(eventfd_ctx_put);
103
62static int eventfd_release(struct inode *inode, struct file *file) 104static int eventfd_release(struct inode *inode, struct file *file)
63{ 105{
64 kfree(file->private_data); 106 struct eventfd_ctx *ctx = file->private_data;
107
108 wake_up_poll(&ctx->wqh, POLLHUP);
109 eventfd_ctx_put(ctx);
65 return 0; 110 return 0;
66} 111}
67 112
@@ -185,6 +230,16 @@ static const struct file_operations eventfd_fops = {
185 .write = eventfd_write, 230 .write = eventfd_write,
186}; 231};
187 232
233/**
234 * eventfd_fget - Acquire a reference of an eventfd file descriptor.
235 * @fd: [in] Eventfd file descriptor.
236 *
237 * Returns a pointer to the eventfd file structure in case of success, or the
238 * following error pointer:
239 *
240 * -EBADF : Invalid @fd file descriptor.
241 * -EINVAL : The @fd file descriptor is not an eventfd file.
242 */
188struct file *eventfd_fget(int fd) 243struct file *eventfd_fget(int fd)
189{ 244{
190 struct file *file; 245 struct file *file;
@@ -201,6 +256,48 @@ struct file *eventfd_fget(int fd)
201} 256}
202EXPORT_SYMBOL_GPL(eventfd_fget); 257EXPORT_SYMBOL_GPL(eventfd_fget);
203 258
259/**
260 * eventfd_ctx_fdget - Acquires a reference to the internal eventfd context.
261 * @fd: [in] Eventfd file descriptor.
262 *
263 * Returns a pointer to the internal eventfd context, otherwise the error
264 * pointers returned by the following functions:
265 *
266 * eventfd_fget
267 */
268struct eventfd_ctx *eventfd_ctx_fdget(int fd)
269{
270 struct file *file;
271 struct eventfd_ctx *ctx;
272
273 file = eventfd_fget(fd);
274 if (IS_ERR(file))
275 return (struct eventfd_ctx *) file;
276 ctx = eventfd_ctx_get(file->private_data);
277 fput(file);
278
279 return ctx;
280}
281EXPORT_SYMBOL_GPL(eventfd_ctx_fdget);
282
283/**
284 * eventfd_ctx_fileget - Acquires a reference to the internal eventfd context.
285 * @file: [in] Eventfd file pointer.
286 *
287 * Returns a pointer to the internal eventfd context, otherwise the error
288 * pointer:
289 *
290 * -EINVAL : The @fd file descriptor is not an eventfd file.
291 */
292struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
293{
294 if (file->f_op != &eventfd_fops)
295 return ERR_PTR(-EINVAL);
296
297 return eventfd_ctx_get(file->private_data);
298}
299EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
300
204SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) 301SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
205{ 302{
206 int fd; 303 int fd;
@@ -217,6 +314,7 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
217 if (!ctx) 314 if (!ctx)
218 return -ENOMEM; 315 return -ENOMEM;
219 316
317 kref_init(&ctx->kref);
220 init_waitqueue_head(&ctx->wqh); 318 init_waitqueue_head(&ctx->wqh);
221 ctx->count = count; 319 ctx->count = count;
222 ctx->flags = flags; 320 ctx->flags = flags;