aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorDavide Libenzi <davidel@xmailserver.org>2007-05-11 01:23:19 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-11 11:29:36 -0400
commite1ad7468c77ddb94b0615d5f50fa255525fde0f0 (patch)
tree856be1a028fece7e1fa10b7b585096839913fe2e /include/linux
parent83f5d1266926c75890f1bc4678e49d79483cb573 (diff)
signal/timer/event: eventfd core
This is a very simple and light file descriptor, that can be used as event wait/dispatch by userspace (both wait and dispatch) and by the kernel (dispatch only). It can be used instead of pipe(2) in all cases where those would simply be used to signal events. Their kernel overhead is much lower than pipes, and they do not consume two fds. When used in the kernel, it can offer an fd-bridge to enable, for example, functionalities like KAIO or syslets/threadlets to signal to an fd the completion of certain operations. But more in general, an eventfd can be used by the kernel to signal readiness, in a POSIX poll/select way, of interfaces that would otherwise be incompatible with it. The API is: int eventfd(unsigned int count); The eventfd API accepts an initial "count" parameter, and returns an eventfd fd. It supports poll(2) (POLLIN, POLLOUT, POLLERR), read(2) and write(2). The POLLIN flag is raised when the internal counter is greater than zero. The POLLOUT flag is raised when at least a value of "1" can be written to the internal counter. The POLLERR flag is raised when an overflow in the counter value is detected. The write(2) operation can never overflow the counter, since it blocks (unless O_NONBLOCK is set, in which case -EAGAIN is returned). But the eventfd_signal() function can do it, since it's supposed to not sleep during its operation. The read(2) function reads the __u64 counter value, and reset the internal value to zero. If the value read is equal to (__u64) -1, an overflow happened on the internal counter (due to 2^64 eventfd_signal() posts that has never been retired - unlickely, but possible). The write(2) call writes an __u64 count value, and adds it to the current counter. The eventfd fd supports O_NONBLOCK also. On the kernel side, we have: struct file *eventfd_fget(int fd); int eventfd_signal(struct file *file, unsigned int n); The eventfd_fget() should be called to get a struct file* from an eventfd fd (this is an fget() + check of f_op being an eventfd fops pointer). The kernel can then call eventfd_signal() every time it wants to post an event to userspace. The eventfd_signal() function can be called from any context. An eventfd() simple test and bench is available here: http://www.xmailserver.org/eventfd-bench.c This is the eventfd-based version of pipetest-4 (pipe(2) based): http://www.xmailserver.org/pipetest-4.c Not that performance matters much in the eventfd case, but eventfd-bench shows almost as double as performance than pipetest-4. [akpm@linux-foundation.org: fix i386 build] [akpm@linux-foundation.org: add sys_eventfd to sys_ni.c] Signed-off-by: Davide Libenzi <davidel@xmailserver.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/eventfd.h29
-rw-r--r--include/linux/syscalls.h1
2 files changed, 30 insertions, 0 deletions
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
new file mode 100644
index 000000000000..0d6ecc60b94d
--- /dev/null
+++ b/include/linux/eventfd.h
@@ -0,0 +1,29 @@
1/*
2 * include/linux/eventfd.h
3 *
4 * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org>
5 *
6 */
7
8#ifndef _LINUX_EVENTFD_H
9#define _LINUX_EVENTFD_H
10
11
12#ifdef __KERNEL__
13
14#ifdef CONFIG_EVENTFD
15
16struct file *eventfd_fget(int fd);
17int eventfd_signal(struct file *file, int n);
18
19#else /* CONFIG_EVENTFD */
20
21#define eventfd_fget(fd) ERR_PTR(-ENOSYS)
22#define eventfd_signal(f, n) 0
23
24#endif /* CONFIG_EVENTFD */
25
26#endif /* __KERNEL__ */
27
28#endif /* _LINUX_EVENTFD_H */
29
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index fc637be1d9cf..b02070eac422 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -607,6 +607,7 @@ asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, struct g
607asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask); 607asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask);
608asmlinkage long sys_timerfd(int ufd, int clockid, int flags, 608asmlinkage long sys_timerfd(int ufd, int clockid, int flags,
609 const struct itimerspec __user *utmr); 609 const struct itimerspec __user *utmr);
610asmlinkage long sys_eventfd(unsigned int count);
610 611
611int kernel_execve(const char *filename, char *const argv[], char *const envp[]); 612int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
612 613