aboutsummaryrefslogtreecommitdiffstats
path: root/fs/eventfd.c
diff options
context:
space:
mode:
authorDavide Libenzi <davidel@xmailserver.org>2009-03-31 18:24:18 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-04-01 11:59:20 -0400
commitbcd0b235bf3808dec5115c381cd55568f63b85f0 (patch)
treed73c4aa83dcd5321d2c48e070020576098b9705e /fs/eventfd.c
parent4f0989dbfa8d18dd17c32120aac1eb3e906a62a2 (diff)
eventfd: improve support for semaphore-like behavior
People started using eventfd in a semaphore-like way where before they were using pipes. That is, counter-based resource access. Where a "wait()" returns immediately by decrementing the counter by one, if counter is greater than zero. Otherwise will wait. And where a "post(count)" will add count to the counter releasing the appropriate amount of waiters. If eventfd the "post" (write) part is fine, while the "wait" (read) does not dequeue 1, but the whole counter value. The problem with eventfd is that a read() on the fd returns and wipes the whole counter, making the use of it as semaphore a little bit more cumbersome. You can do a read() followed by a write() of COUNTER-1, but IMO it's pretty easy and cheap to make this work w/out extra steps. This patch introduces a new eventfd flag that tells eventfd to only dequeue 1 from the counter, allowing simple read/write to make it behave like a semaphore. Simple test here: http://www.xmailserver.org/eventfd-sem.c To be back-compatible with earlier kernels, userspace applications should probe for the availability of this feature via #ifdef EFD_SEMAPHORE fd = eventfd2 (CNT, EFD_SEMAPHORE); if (fd == -1 && errno == EINVAL) <fallback> #else <fallback> #endif Signed-off-by: Davide Libenzi <davidel@xmailserver.org> Cc: <linux-api@vger.kernel.org> Tested-by: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Ulrich Drepper <drepper@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/eventfd.c')
-rw-r--r--fs/eventfd.c20
1 files changed, 11 insertions, 9 deletions
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 5de2c2db3aa2..91c0829a7035 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -28,6 +28,7 @@ struct eventfd_ctx {
28 * issue a wakeup. 28 * issue a wakeup.
29 */ 29 */
30 __u64 count; 30 __u64 count;
31 unsigned int flags;
31}; 32};
32 33
33/* 34/*
@@ -87,22 +88,20 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
87{ 88{
88 struct eventfd_ctx *ctx = file->private_data; 89 struct eventfd_ctx *ctx = file->private_data;
89 ssize_t res; 90 ssize_t res;
90 __u64 ucnt; 91 __u64 ucnt = 0;
91 DECLARE_WAITQUEUE(wait, current); 92 DECLARE_WAITQUEUE(wait, current);
92 93
93 if (count < sizeof(ucnt)) 94 if (count < sizeof(ucnt))
94 return -EINVAL; 95 return -EINVAL;
95 spin_lock_irq(&ctx->wqh.lock); 96 spin_lock_irq(&ctx->wqh.lock);
96 res = -EAGAIN; 97 res = -EAGAIN;
97 ucnt = ctx->count; 98 if (ctx->count > 0)
98 if (ucnt > 0)
99 res = sizeof(ucnt); 99 res = sizeof(ucnt);
100 else if (!(file->f_flags & O_NONBLOCK)) { 100 else if (!(file->f_flags & O_NONBLOCK)) {
101 __add_wait_queue(&ctx->wqh, &wait); 101 __add_wait_queue(&ctx->wqh, &wait);
102 for (res = 0;;) { 102 for (res = 0;;) {
103 set_current_state(TASK_INTERRUPTIBLE); 103 set_current_state(TASK_INTERRUPTIBLE);
104 if (ctx->count > 0) { 104 if (ctx->count > 0) {
105 ucnt = ctx->count;
106 res = sizeof(ucnt); 105 res = sizeof(ucnt);
107 break; 106 break;
108 } 107 }
@@ -117,8 +116,9 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
117 __remove_wait_queue(&ctx->wqh, &wait); 116 __remove_wait_queue(&ctx->wqh, &wait);
118 __set_current_state(TASK_RUNNING); 117 __set_current_state(TASK_RUNNING);
119 } 118 }
120 if (res > 0) { 119 if (likely(res > 0)) {
121 ctx->count = 0; 120 ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
121 ctx->count -= ucnt;
122 if (waitqueue_active(&ctx->wqh)) 122 if (waitqueue_active(&ctx->wqh))
123 wake_up_locked(&ctx->wqh); 123 wake_up_locked(&ctx->wqh);
124 } 124 }
@@ -166,7 +166,7 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c
166 __remove_wait_queue(&ctx->wqh, &wait); 166 __remove_wait_queue(&ctx->wqh, &wait);
167 __set_current_state(TASK_RUNNING); 167 __set_current_state(TASK_RUNNING);
168 } 168 }
169 if (res > 0) { 169 if (likely(res > 0)) {
170 ctx->count += ucnt; 170 ctx->count += ucnt;
171 if (waitqueue_active(&ctx->wqh)) 171 if (waitqueue_active(&ctx->wqh))
172 wake_up_locked(&ctx->wqh); 172 wake_up_locked(&ctx->wqh);
@@ -207,7 +207,7 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
207 BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC); 207 BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
208 BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK); 208 BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
209 209
210 if (flags & ~(EFD_CLOEXEC | EFD_NONBLOCK)) 210 if (flags & ~EFD_FLAGS_SET)
211 return -EINVAL; 211 return -EINVAL;
212 212
213 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 213 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
@@ -216,13 +216,14 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
216 216
217 init_waitqueue_head(&ctx->wqh); 217 init_waitqueue_head(&ctx->wqh);
218 ctx->count = count; 218 ctx->count = count;
219 ctx->flags = flags;
219 220
220 /* 221 /*
221 * When we call this, the initialization must be complete, since 222 * When we call this, the initialization must be complete, since
222 * anon_inode_getfd() will install the fd. 223 * anon_inode_getfd() will install the fd.
223 */ 224 */
224 fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx, 225 fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
225 flags & (O_CLOEXEC | O_NONBLOCK)); 226 flags & EFD_SHARED_FCNTL_FLAGS);
226 if (fd < 0) 227 if (fd < 0)
227 kfree(ctx); 228 kfree(ctx);
228 return fd; 229 return fd;
@@ -232,3 +233,4 @@ SYSCALL_DEFINE1(eventfd, unsigned int, count)
232{ 233{
233 return sys_eventfd2(count, 0); 234 return sys_eventfd2(count, 0);
234} 235}
236