aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/Makefile1
-rw-r--r--fs/eventfd.c228
-rw-r--r--include/linux/eventfd.h29
-rw-r--r--include/linux/syscalls.h1
-rw-r--r--init/Kconfig10
-rw-r--r--kernel/sys_ni.c1
6 files changed, 270 insertions, 0 deletions
diff --git a/fs/Makefile b/fs/Makefile
index 39625da9e2d6..720c29d57a62 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_EPOLL) += eventpoll.o
25obj-$(CONFIG_ANON_INODES) += anon_inodes.o 25obj-$(CONFIG_ANON_INODES) += anon_inodes.o
26obj-$(CONFIG_SIGNALFD) += signalfd.o 26obj-$(CONFIG_SIGNALFD) += signalfd.o
27obj-$(CONFIG_TIMERFD) += timerfd.o 27obj-$(CONFIG_TIMERFD) += timerfd.o
28obj-$(CONFIG_EVENTFD) += eventfd.o
28obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o 29obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o
29 30
30nfsd-$(CONFIG_NFSD) := nfsctl.o 31nfsd-$(CONFIG_NFSD) := nfsctl.o
diff --git a/fs/eventfd.c b/fs/eventfd.c
new file mode 100644
index 000000000000..480e2b3c4166
--- /dev/null
+++ b/fs/eventfd.c
@@ -0,0 +1,228 @@
1/*
2 * fs/eventfd.c
3 *
4 * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org>
5 *
6 */
7
8#include <linux/file.h>
9#include <linux/poll.h>
10#include <linux/init.h>
11#include <linux/fs.h>
12#include <linux/sched.h>
13#include <linux/kernel.h>
14#include <linux/list.h>
15#include <linux/spinlock.h>
16#include <linux/anon_inodes.h>
17#include <linux/eventfd.h>
18
19struct eventfd_ctx {
20 spinlock_t lock;
21 wait_queue_head_t wqh;
22 /*
23 * Every time that a write(2) is performed on an eventfd, the
24 * value of the __u64 being written is added to "count" and a
25 * wakeup is performed on "wqh". A read(2) will return the "count"
26 * value to userspace, and will reset "count" to zero. The kernel
27 * size eventfd_signal() also, adds to the "count" counter and
28 * issue a wakeup.
29 */
30 __u64 count;
31};
32
33/*
34 * Adds "n" to the eventfd counter "count". Returns "n" in case of
35 * success, or a value lower then "n" in case of coutner overflow.
36 * This function is supposed to be called by the kernel in paths
37 * that do not allow sleeping. In this function we allow the counter
38 * to reach the ULLONG_MAX value, and we signal this as overflow
39 * condition by returining a POLLERR to poll(2).
40 */
41int eventfd_signal(struct file *file, int n)
42{
43 struct eventfd_ctx *ctx = file->private_data;
44 unsigned long flags;
45
46 if (n < 0)
47 return -EINVAL;
48 spin_lock_irqsave(&ctx->lock, flags);
49 if (ULLONG_MAX - ctx->count < n)
50 n = (int) (ULLONG_MAX - ctx->count);
51 ctx->count += n;
52 if (waitqueue_active(&ctx->wqh))
53 wake_up_locked(&ctx->wqh);
54 spin_unlock_irqrestore(&ctx->lock, flags);
55
56 return n;
57}
58
59static int eventfd_release(struct inode *inode, struct file *file)
60{
61 kfree(file->private_data);
62 return 0;
63}
64
65static unsigned int eventfd_poll(struct file *file, poll_table *wait)
66{
67 struct eventfd_ctx *ctx = file->private_data;
68 unsigned int events = 0;
69 unsigned long flags;
70
71 poll_wait(file, &ctx->wqh, wait);
72
73 spin_lock_irqsave(&ctx->lock, flags);
74 if (ctx->count > 0)
75 events |= POLLIN;
76 if (ctx->count == ULLONG_MAX)
77 events |= POLLERR;
78 if (ULLONG_MAX - 1 > ctx->count)
79 events |= POLLOUT;
80 spin_unlock_irqrestore(&ctx->lock, flags);
81
82 return events;
83}
84
85static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
86 loff_t *ppos)
87{
88 struct eventfd_ctx *ctx = file->private_data;
89 ssize_t res;
90 __u64 ucnt;
91 DECLARE_WAITQUEUE(wait, current);
92
93 if (count < sizeof(ucnt))
94 return -EINVAL;
95 spin_lock_irq(&ctx->lock);
96 res = -EAGAIN;
97 ucnt = ctx->count;
98 if (ucnt > 0)
99 res = sizeof(ucnt);
100 else if (!(file->f_flags & O_NONBLOCK)) {
101 __add_wait_queue(&ctx->wqh, &wait);
102 for (res = 0;;) {
103 set_current_state(TASK_INTERRUPTIBLE);
104 if (ctx->count > 0) {
105 ucnt = ctx->count;
106 res = sizeof(ucnt);
107 break;
108 }
109 if (signal_pending(current)) {
110 res = -ERESTARTSYS;
111 break;
112 }
113 spin_unlock_irq(&ctx->lock);
114 schedule();
115 spin_lock_irq(&ctx->lock);
116 }
117 __remove_wait_queue(&ctx->wqh, &wait);
118 __set_current_state(TASK_RUNNING);
119 }
120 if (res > 0) {
121 ctx->count = 0;
122 if (waitqueue_active(&ctx->wqh))
123 wake_up_locked(&ctx->wqh);
124 }
125 spin_unlock_irq(&ctx->lock);
126 if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
127 return -EFAULT;
128
129 return res;
130}
131
132static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
133 loff_t *ppos)
134{
135 struct eventfd_ctx *ctx = file->private_data;
136 ssize_t res;
137 __u64 ucnt;
138 DECLARE_WAITQUEUE(wait, current);
139
140 if (count < sizeof(ucnt))
141 return -EINVAL;
142 if (copy_from_user(&ucnt, buf, sizeof(ucnt)))
143 return -EFAULT;
144 if (ucnt == ULLONG_MAX)
145 return -EINVAL;
146 spin_lock_irq(&ctx->lock);
147 res = -EAGAIN;
148 if (ULLONG_MAX - ctx->count > ucnt)
149 res = sizeof(ucnt);
150 else if (!(file->f_flags & O_NONBLOCK)) {
151 __add_wait_queue(&ctx->wqh, &wait);
152 for (res = 0;;) {
153 set_current_state(TASK_INTERRUPTIBLE);
154 if (ULLONG_MAX - ctx->count > ucnt) {
155 res = sizeof(ucnt);
156 break;
157 }
158 if (signal_pending(current)) {
159 res = -ERESTARTSYS;
160 break;
161 }
162 spin_unlock_irq(&ctx->lock);
163 schedule();
164 spin_lock_irq(&ctx->lock);
165 }
166 __remove_wait_queue(&ctx->wqh, &wait);
167 __set_current_state(TASK_RUNNING);
168 }
169 if (res > 0) {
170 ctx->count += ucnt;
171 if (waitqueue_active(&ctx->wqh))
172 wake_up_locked(&ctx->wqh);
173 }
174 spin_unlock_irq(&ctx->lock);
175
176 return res;
177}
178
179static const struct file_operations eventfd_fops = {
180 .release = eventfd_release,
181 .poll = eventfd_poll,
182 .read = eventfd_read,
183 .write = eventfd_write,
184};
185
186struct file *eventfd_fget(int fd)
187{
188 struct file *file;
189
190 file = fget(fd);
191 if (!file)
192 return ERR_PTR(-EBADF);
193 if (file->f_op != &eventfd_fops) {
194 fput(file);
195 return ERR_PTR(-EINVAL);
196 }
197
198 return file;
199}
200
201asmlinkage long sys_eventfd(unsigned int count)
202{
203 int error, fd;
204 struct eventfd_ctx *ctx;
205 struct file *file;
206 struct inode *inode;
207
208 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
209 if (!ctx)
210 return -ENOMEM;
211
212 init_waitqueue_head(&ctx->wqh);
213 spin_lock_init(&ctx->lock);
214 ctx->count = count;
215
216 /*
217 * When we call this, the initialization must be complete, since
218 * anon_inode_getfd() will install the fd.
219 */
220 error = anon_inode_getfd(&fd, &inode, &file, "[eventfd]",
221 &eventfd_fops, ctx);
222 if (!error)
223 return fd;
224
225 kfree(ctx);
226 return error;
227}
228
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
new file mode 100644
index 000000000000..0d6ecc60b94d
--- /dev/null
+++ b/include/linux/eventfd.h
@@ -0,0 +1,29 @@
1/*
2 * include/linux/eventfd.h
3 *
4 * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org>
5 *
6 */
7
8#ifndef _LINUX_EVENTFD_H
9#define _LINUX_EVENTFD_H
10
11
12#ifdef __KERNEL__
13
14#ifdef CONFIG_EVENTFD
15
16struct file *eventfd_fget(int fd);
17int eventfd_signal(struct file *file, int n);
18
19#else /* CONFIG_EVENTFD */
20
21#define eventfd_fget(fd) ERR_PTR(-ENOSYS)
22#define eventfd_signal(f, n) 0
23
24#endif /* CONFIG_EVENTFD */
25
26#endif /* __KERNEL__ */
27
28#endif /* _LINUX_EVENTFD_H */
29
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index fc637be1d9cf..b02070eac422 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -607,6 +607,7 @@ asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, struct g
607asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask); 607asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask);
608asmlinkage long sys_timerfd(int ufd, int clockid, int flags, 608asmlinkage long sys_timerfd(int ufd, int clockid, int flags,
609 const struct itimerspec __user *utmr); 609 const struct itimerspec __user *utmr);
610asmlinkage long sys_eventfd(unsigned int count);
610 611
611int kernel_execve(const char *filename, char *const argv[], char *const envp[]); 612int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
612 613
diff --git a/init/Kconfig b/init/Kconfig
index 02c167de9646..4e009fde4b69 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -512,6 +512,16 @@ config TIMERFD
512 512
513 If unsure, say Y. 513 If unsure, say Y.
514 514
515config EVENTFD
516 bool "Enable eventfd() system call" if EMBEDDED
517 depends on ANON_INODES
518 default y
519 help
520 Enable the eventfd() system call that allows to receive both
521 kernel notification (ie. KAIO) or userspace notifications.
522
523 If unsure, say Y.
524
515config SHMEM 525config SHMEM
516 bool "Use full shmem filesystem" if EMBEDDED 526 bool "Use full shmem filesystem" if EMBEDDED
517 default y 527 default y
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index b18f62549515..b6d77a8a1ca9 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -145,3 +145,4 @@ cond_syscall(sys_ioprio_get);
145/* New file descriptors */ 145/* New file descriptors */
146cond_syscall(sys_signalfd); 146cond_syscall(sys_signalfd);
147cond_syscall(sys_timerfd); 147cond_syscall(sys_timerfd);
148cond_syscall(sys_eventfd);