diff options
author | Davide Libenzi <davidel@xmailserver.org> | 2009-03-31 18:24:18 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-04-01 11:59:20 -0400 |
commit | bcd0b235bf3808dec5115c381cd55568f63b85f0 (patch) | |
tree | d73c4aa83dcd5321d2c48e070020576098b9705e | |
parent | 4f0989dbfa8d18dd17c32120aac1eb3e906a62a2 (diff) |
eventfd: improve support for semaphore-like behavior
People started using eventfd in a semaphore-like way where before they
were using pipes.
That is, counter-based resource access. Where a "wait()" returns
immediately by decrementing the counter by one, if counter is greater than
zero. Otherwise will wait. And where a "post(count)" will add count to
the counter releasing the appropriate amount of waiters. If eventfd the
"post" (write) part is fine, while the "wait" (read) does not dequeue 1,
but the whole counter value.
The problem with eventfd is that a read() on the fd returns and wipes the
whole counter, making the use of it as semaphore a little bit more
cumbersome. You can do a read() followed by a write() of COUNTER-1, but
IMO it's pretty easy and cheap to make this work w/out extra steps. This
patch introduces a new eventfd flag that tells eventfd to only dequeue 1
from the counter, allowing simple read/write to make it behave like a
semaphore. Simple test here:
http://www.xmailserver.org/eventfd-sem.c
To be back-compatible with earlier kernels, userspace applications should
probe for the availability of this feature via
#ifdef EFD_SEMAPHORE
fd = eventfd2 (CNT, EFD_SEMAPHORE);
if (fd == -1 && errno == EINVAL)
<fallback>
#else
<fallback>
#endif
Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Cc: <linux-api@vger.kernel.org>
Tested-by: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/eventfd.c | 20 | ||||
-rw-r--r-- | include/linux/eventfd.h | 12 |
2 files changed, 22 insertions, 10 deletions
diff --git a/fs/eventfd.c b/fs/eventfd.c index 5de2c2db3aa2..91c0829a7035 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c | |||
@@ -28,6 +28,7 @@ struct eventfd_ctx { | |||
28 | * issue a wakeup. | 28 | * issue a wakeup. |
29 | */ | 29 | */ |
30 | __u64 count; | 30 | __u64 count; |
31 | unsigned int flags; | ||
31 | }; | 32 | }; |
32 | 33 | ||
33 | /* | 34 | /* |
@@ -87,22 +88,20 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, | |||
87 | { | 88 | { |
88 | struct eventfd_ctx *ctx = file->private_data; | 89 | struct eventfd_ctx *ctx = file->private_data; |
89 | ssize_t res; | 90 | ssize_t res; |
90 | __u64 ucnt; | 91 | __u64 ucnt = 0; |
91 | DECLARE_WAITQUEUE(wait, current); | 92 | DECLARE_WAITQUEUE(wait, current); |
92 | 93 | ||
93 | if (count < sizeof(ucnt)) | 94 | if (count < sizeof(ucnt)) |
94 | return -EINVAL; | 95 | return -EINVAL; |
95 | spin_lock_irq(&ctx->wqh.lock); | 96 | spin_lock_irq(&ctx->wqh.lock); |
96 | res = -EAGAIN; | 97 | res = -EAGAIN; |
97 | ucnt = ctx->count; | 98 | if (ctx->count > 0) |
98 | if (ucnt > 0) | ||
99 | res = sizeof(ucnt); | 99 | res = sizeof(ucnt); |
100 | else if (!(file->f_flags & O_NONBLOCK)) { | 100 | else if (!(file->f_flags & O_NONBLOCK)) { |
101 | __add_wait_queue(&ctx->wqh, &wait); | 101 | __add_wait_queue(&ctx->wqh, &wait); |
102 | for (res = 0;;) { | 102 | for (res = 0;;) { |
103 | set_current_state(TASK_INTERRUPTIBLE); | 103 | set_current_state(TASK_INTERRUPTIBLE); |
104 | if (ctx->count > 0) { | 104 | if (ctx->count > 0) { |
105 | ucnt = ctx->count; | ||
106 | res = sizeof(ucnt); | 105 | res = sizeof(ucnt); |
107 | break; | 106 | break; |
108 | } | 107 | } |
@@ -117,8 +116,9 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, | |||
117 | __remove_wait_queue(&ctx->wqh, &wait); | 116 | __remove_wait_queue(&ctx->wqh, &wait); |
118 | __set_current_state(TASK_RUNNING); | 117 | __set_current_state(TASK_RUNNING); |
119 | } | 118 | } |
120 | if (res > 0) { | 119 | if (likely(res > 0)) { |
121 | ctx->count = 0; | 120 | ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count; |
121 | ctx->count -= ucnt; | ||
122 | if (waitqueue_active(&ctx->wqh)) | 122 | if (waitqueue_active(&ctx->wqh)) |
123 | wake_up_locked(&ctx->wqh); | 123 | wake_up_locked(&ctx->wqh); |
124 | } | 124 | } |
@@ -166,7 +166,7 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c | |||
166 | __remove_wait_queue(&ctx->wqh, &wait); | 166 | __remove_wait_queue(&ctx->wqh, &wait); |
167 | __set_current_state(TASK_RUNNING); | 167 | __set_current_state(TASK_RUNNING); |
168 | } | 168 | } |
169 | if (res > 0) { | 169 | if (likely(res > 0)) { |
170 | ctx->count += ucnt; | 170 | ctx->count += ucnt; |
171 | if (waitqueue_active(&ctx->wqh)) | 171 | if (waitqueue_active(&ctx->wqh)) |
172 | wake_up_locked(&ctx->wqh); | 172 | wake_up_locked(&ctx->wqh); |
@@ -207,7 +207,7 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) | |||
207 | BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC); | 207 | BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC); |
208 | BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK); | 208 | BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK); |
209 | 209 | ||
210 | if (flags & ~(EFD_CLOEXEC | EFD_NONBLOCK)) | 210 | if (flags & ~EFD_FLAGS_SET) |
211 | return -EINVAL; | 211 | return -EINVAL; |
212 | 212 | ||
213 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); | 213 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); |
@@ -216,13 +216,14 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) | |||
216 | 216 | ||
217 | init_waitqueue_head(&ctx->wqh); | 217 | init_waitqueue_head(&ctx->wqh); |
218 | ctx->count = count; | 218 | ctx->count = count; |
219 | ctx->flags = flags; | ||
219 | 220 | ||
220 | /* | 221 | /* |
221 | * When we call this, the initialization must be complete, since | 222 | * When we call this, the initialization must be complete, since |
222 | * anon_inode_getfd() will install the fd. | 223 | * anon_inode_getfd() will install the fd. |
223 | */ | 224 | */ |
224 | fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx, | 225 | fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx, |
225 | flags & (O_CLOEXEC | O_NONBLOCK)); | 226 | flags & EFD_SHARED_FCNTL_FLAGS); |
226 | if (fd < 0) | 227 | if (fd < 0) |
227 | kfree(ctx); | 228 | kfree(ctx); |
228 | return fd; | 229 | return fd; |
@@ -232,3 +233,4 @@ SYSCALL_DEFINE1(eventfd, unsigned int, count) | |||
232 | { | 233 | { |
233 | return sys_eventfd2(count, 0); | 234 | return sys_eventfd2(count, 0); |
234 | } | 235 | } |
236 | |||
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index a667637b54e3..f45a8ae5f828 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h | |||
@@ -13,10 +13,20 @@ | |||
13 | /* For O_CLOEXEC and O_NONBLOCK */ | 13 | /* For O_CLOEXEC and O_NONBLOCK */ |
14 | #include <linux/fcntl.h> | 14 | #include <linux/fcntl.h> |
15 | 15 | ||
16 | /* Flags for eventfd2. */ | 16 | /* |
17 | * CAREFUL: Check include/asm-generic/fcntl.h when defining | ||
18 | * new flags, since they might collide with O_* ones. We want | ||
19 | * to re-use O_* flags that couldn't possibly have a meaning | ||
20 | * from eventfd, in order to leave a free define-space for | ||
21 | * shared O_* flags. | ||
22 | */ | ||
23 | #define EFD_SEMAPHORE (1 << 0) | ||
17 | #define EFD_CLOEXEC O_CLOEXEC | 24 | #define EFD_CLOEXEC O_CLOEXEC |
18 | #define EFD_NONBLOCK O_NONBLOCK | 25 | #define EFD_NONBLOCK O_NONBLOCK |
19 | 26 | ||
27 | #define EFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) | ||
28 | #define EFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS | EFD_SEMAPHORE) | ||
29 | |||
20 | struct file *eventfd_fget(int fd); | 30 | struct file *eventfd_fget(int fd); |
21 | int eventfd_signal(struct file *file, int n); | 31 | int eventfd_signal(struct file *file, int n); |
22 | 32 | ||